├── .gitattributes ├── .ipynb_checkpoints ├── bank churn model - modeling(GBDT)-checkpoint.ipynb ├── bank churn model - preprocessing-checkpoint.ipynb └── bank churn model - preview-checkpoint.ipynb ├── ExternalData.csv ├── bank churn model - modeling(GBDT).ipynb ├── bank churn model - preprocessing.ipynb ├── bank churn model - preview.ipynb ├── bankChurn.csv └── model_data.csv /.gitattributes: -------------------------------------------------------------------------------- 1 | # Auto detect text files and perform LF normalization 2 | * text=auto 3 | -------------------------------------------------------------------------------- /.ipynb_checkpoints/bank churn model - modeling(GBDT)-checkpoint.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [ 8 | { 9 | "name": "stderr", 10 | "output_type": "stream", 11 | "text": [ 12 | "D:\\Anaconda3\\lib\\site-packages\\sklearn\\cross_validation.py:41: DeprecationWarning: This module was deprecated in version 0.18 in favor of the model_selection module into which all the refactored classes and functions are moved. Also note that the interface of the new CV iterators are different from that of this module. This module will be removed in 0.20.\n", 13 | " \"This module will be removed in 0.20.\", DeprecationWarning)\n" 14 | ] 15 | } 16 | ], 17 | "source": [ 18 | "import pandas as pd\n", 19 | "from sklearn import ensemble, cross_validation, metrics\n", 20 | "from sklearn.ensemble import GradientBoostingClassifier\n", 21 | "from sklearn.cross_validation import train_test_split\n", 22 | "from sklearn.cross_validation import KFold\n", 23 | "from sklearn.model_selection import GridSearchCV" 24 | ] 25 | }, 26 | { 27 | "cell_type": "code", 28 | "execution_count": 2, 29 | "metadata": {}, 30 | "outputs": [ 31 | { 32 | "data": { 33 | "text/html": [ 34 | "
\n", 35 | "\n", 48 | "\n", 49 | " \n", 50 | " \n", 51 | " \n", 52 | " \n", 53 | " \n", 54 | " \n", 55 | " \n", 56 | " \n", 57 | " \n", 58 | " \n", 59 | " \n", 60 | " \n", 61 | " \n", 62 | " \n", 63 | " \n", 64 | " \n", 65 | " \n", 66 | " \n", 67 | " \n", 68 | " \n", 69 | " \n", 70 | " \n", 71 | " \n", 72 | " \n", 73 | " \n", 74 | " \n", 75 | " \n", 76 | " \n", 77 | " \n", 78 | " \n", 79 | " \n", 80 | " \n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | " \n", 108 | " \n", 109 | " \n", 110 | " \n", 111 | " \n", 112 | " \n", 113 | " \n", 114 | " \n", 115 | " \n", 116 | " \n", 117 | " \n", 118 | " \n", 119 | " \n", 120 | " \n", 121 | " \n", 122 | " \n", 123 | " \n", 124 | " \n", 125 | " \n", 126 | " \n", 127 | " \n", 128 | " \n", 129 | " \n", 130 | " \n", 131 | " \n", 132 | " \n", 133 | " \n", 134 | " \n", 135 | " \n", 136 | " \n", 137 | " \n", 138 | " \n", 139 | " \n", 140 | " \n", 141 | " \n", 142 | " \n", 143 | " \n", 144 | " \n", 145 | " \n", 146 | " \n", 147 | " \n", 148 | " \n", 149 | " \n", 150 | " \n", 151 | " \n", 152 | " \n", 153 | " \n", 154 | " \n", 155 | " \n", 156 | " \n", 157 | " \n", 158 | " \n", 159 | " \n", 160 | " \n", 161 | " \n", 162 | " \n", 163 | " \n", 164 | " \n", 165 | " \n", 166 | " \n", 167 | " \n", 168 | " \n", 169 | " \n", 170 | " \n", 171 | " \n", 172 | " \n", 173 | " \n", 174 | " \n", 175 | " \n", 176 | " \n", 177 | " \n", 178 | " \n", 179 | " \n", 180 | " \n", 181 | " \n", 182 | " \n", 183 | " \n", 184 | " \n", 185 | " \n", 186 | " \n", 187 | " \n", 188 | " \n", 189 | " \n", 190 | " \n", 191 | " \n", 192 | " \n", 193 | " \n", 194 | " \n", 195 | " \n", 196 | " \n", 197 | "
LOCAL_CUR_SAV_SLOPELOCAL_CUR_MON_AVG_BALLOCAL_OVEONEYR_FF_MON_AVG_BALLOCAL_FIX_MON_AVG_BALLOCAL_FIX_MON_AVG_BAL_PROPLOCAL_BELONEYR_FF_SLOPELOCAL_BELONEYR_FF_MON_AVG_BALLOCAL_OVEONEYR_FF_SLOPELOCAL_SAV_SLOPELOCAL_SAV_CUR_ALL_BAL...kid6_10_nankid11_15_Ukid11_15_Ykid11_15_nankid16_17_Ukid16_17_Ykid16_17_nancar_buy_Newcar_buy_UNKNOWNcar_buy_nan
00.3333330.0000000.0090680.0011571.0000000.3444370.00.3332840.3331969.277653e-04...0.01.00.00.01.00.00.01.00.00.0
10.6685640.0026480.0100780.0012860.7355650.3444370.00.2931690.3457167.225051e-04...0.01.00.00.01.00.00.01.00.00.0
20.3335210.0000020.0000000.0000000.0000000.3444370.00.3444420.3445454.032385e-07...0.01.00.00.01.00.00.01.00.00.0
30.1496960.0038580.0000000.0000000.0000000.3444370.00.3444420.1546447.529284e-04...0.01.00.00.01.00.00.00.01.00.0
40.3334290.0000050.0000000.0000000.0000000.3444370.00.3444420.3444507.852062e-07...0.01.00.00.00.01.00.01.00.00.0
\n", 198 | "

5 rows × 178 columns

\n", 199 | "
" 200 | ], 201 | "text/plain": [ 202 | " LOCAL_CUR_SAV_SLOPE LOCAL_CUR_MON_AVG_BAL LOCAL_OVEONEYR_FF_MON_AVG_BAL \\\n", 203 | "0 0.333333 0.000000 0.009068 \n", 204 | "1 0.668564 0.002648 0.010078 \n", 205 | "2 0.333521 0.000002 0.000000 \n", 206 | "3 0.149696 0.003858 0.000000 \n", 207 | "4 0.333429 0.000005 0.000000 \n", 208 | "\n", 209 | " LOCAL_FIX_MON_AVG_BAL LOCAL_FIX_MON_AVG_BAL_PROP LOCAL_BELONEYR_FF_SLOPE \\\n", 210 | "0 0.001157 1.000000 0.344437 \n", 211 | "1 0.001286 0.735565 0.344437 \n", 212 | "2 0.000000 0.000000 0.344437 \n", 213 | "3 0.000000 0.000000 0.344437 \n", 214 | "4 0.000000 0.000000 0.344437 \n", 215 | "\n", 216 | " LOCAL_BELONEYR_FF_MON_AVG_BAL LOCAL_OVEONEYR_FF_SLOPE LOCAL_SAV_SLOPE \\\n", 217 | "0 0.0 0.333284 0.333196 \n", 218 | "1 0.0 0.293169 0.345716 \n", 219 | "2 0.0 0.344442 0.344545 \n", 220 | "3 0.0 0.344442 0.154644 \n", 221 | "4 0.0 0.344442 0.344450 \n", 222 | "\n", 223 | " LOCAL_SAV_CUR_ALL_BAL ... kid6_10_nan kid11_15_U kid11_15_Y \\\n", 224 | "0 9.277653e-04 ... 0.0 1.0 0.0 \n", 225 | "1 7.225051e-04 ... 0.0 1.0 0.0 \n", 226 | "2 4.032385e-07 ... 0.0 1.0 0.0 \n", 227 | "3 7.529284e-04 ... 0.0 1.0 0.0 \n", 228 | "4 7.852062e-07 ... 0.0 1.0 0.0 \n", 229 | "\n", 230 | " kid11_15_nan kid16_17_U kid16_17_Y kid16_17_nan car_buy_New \\\n", 231 | "0 0.0 1.0 0.0 0.0 1.0 \n", 232 | "1 0.0 1.0 0.0 0.0 1.0 \n", 233 | "2 0.0 1.0 0.0 0.0 1.0 \n", 234 | "3 0.0 1.0 0.0 0.0 0.0 \n", 235 | "4 0.0 0.0 1.0 0.0 1.0 \n", 236 | "\n", 237 | " car_buy_UNKNOWN car_buy_nan \n", 238 | "0 0.0 0.0 \n", 239 | "1 0.0 0.0 \n", 240 | "2 0.0 0.0 \n", 241 | "3 1.0 0.0 \n", 242 | "4 0.0 0.0 \n", 243 | "\n", 244 | "[5 rows x 178 columns]" 245 | ] 246 | }, 247 | "execution_count": 2, 248 | "metadata": {}, 249 | "output_type": "execute_result" 250 | } 251 | ], 252 | "source": [ 253 | "model_data = pd.read_csv('./model_data.csv', encoding='utf-8')\n", 254 | "model_data.head()" 255 | ] 256 | }, 257 | { 258 | "cell_type": "code", 259 | "execution_count": 3, 260 | "metadata": { 261 | "collapsed": true 262 | }, 263 | "outputs": [], 264 | "source": [ 265 | "all_features = list(model_data.columns)\n", 266 | "all_features.remove('CHURN_CUST_IND')" 267 | ] 268 | }, 269 | { 270 | "cell_type": "code", 271 | "execution_count": 4, 272 | "metadata": { 273 | "collapsed": true 274 | }, 275 | "outputs": [], 276 | "source": [ 277 | "X_train, X_test, y_train, y_test = train_test_split(model_data[all_features], model_data['CHURN_CUST_IND'], test_size=0.3, random_state=1)" 278 | ] 279 | }, 280 | { 281 | "cell_type": "code", 282 | "execution_count": 5, 283 | "metadata": {}, 284 | "outputs": [ 285 | { 286 | "name": "stdout", 287 | "output_type": "stream", 288 | "text": [ 289 | "y_train_count: 0.0 10851\n", 290 | "1.0 1217\n", 291 | "Name: CHURN_CUST_IND, dtype: int64\n", 292 | "y_test_count: 0.0 4649\n", 293 | "1.0 524\n", 294 | "Name: CHURN_CUST_IND, dtype: int64\n" 295 | ] 296 | } 297 | ], 298 | "source": [ 299 | "print('y_train_count: ', y_train.value_counts())\n", 300 | "print('y_test_count: ', y_test.value_counts())" 301 | ] 302 | }, 303 | { 304 | "cell_type": "code", 305 | "execution_count": 6, 306 | "metadata": {}, 307 | "outputs": [ 308 | { 309 | "name": "stdout", 310 | "output_type": "stream", 311 | "text": [ 312 | "Accuracy : 0.9115\n", 313 | "AUC Score (Testing): 0.855843\n" 314 | ] 315 | } 316 | ], 317 | "source": [ 318 | "# 使用GBDT模块的默认参数进行训练\n", 319 | "gbm0 = GradientBoostingClassifier(random_state=10)\n", 320 | "gbm0.fit(X_train,y_train)\n", 321 | "y_pred = gbm0.predict(X_test)\n", 322 | "y_predprob = gbm0.predict_proba(X_test)[:,1]\n", 323 | "print(\"Accuracy : %.4g\" % metrics.accuracy_score(y_test, y_pred))\n", 324 | "print (\"AUC Score (Testing): %f\" % metrics.roc_auc_score(y_test, y_predprob))" 325 | ] 326 | }, 327 | { 328 | "cell_type": "code", 329 | "execution_count": null, 330 | "metadata": { 331 | "collapsed": true 332 | }, 333 | "outputs": [], 334 | "source": [] 335 | } 336 | ], 337 | "metadata": { 338 | "kernelspec": { 339 | "display_name": "Python 3", 340 | "language": "python", 341 | "name": "python3" 342 | }, 343 | "language_info": { 344 | "codemirror_mode": { 345 | "name": "ipython", 346 | "version": 3 347 | }, 348 | "file_extension": ".py", 349 | "mimetype": "text/x-python", 350 | "name": "python", 351 | "nbconvert_exporter": "python", 352 | "pygments_lexer": "ipython3", 353 | "version": "3.6.3" 354 | } 355 | }, 356 | "nbformat": 4, 357 | "nbformat_minor": 2 358 | } 359 | -------------------------------------------------------------------------------- /bank churn model - modeling(GBDT).ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 47, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import pandas as pd\n", 10 | "from sklearn import ensemble, cross_validation, metrics\n", 11 | "from sklearn.ensemble import GradientBoostingClassifier\n", 12 | "from sklearn.cross_validation import train_test_split\n", 13 | "from sklearn.cross_validation import KFold\n", 14 | "from sklearn.model_selection import GridSearchCV\n", 15 | "from sklearn.metrics import roc_curve, auc\n", 16 | "import matplotlib.pyplot as plt" 17 | ] 18 | }, 19 | { 20 | "cell_type": "code", 21 | "execution_count": 2, 22 | "metadata": {}, 23 | "outputs": [ 24 | { 25 | "data": { 26 | "text/html": [ 27 | "
\n", 28 | "\n", 41 | "\n", 42 | " \n", 43 | " \n", 44 | " \n", 45 | " \n", 46 | " \n", 47 | " \n", 48 | " \n", 49 | " \n", 50 | " \n", 51 | " \n", 52 | " \n", 53 | " \n", 54 | " \n", 55 | " \n", 56 | " \n", 57 | " \n", 58 | " \n", 59 | " \n", 60 | " \n", 61 | " \n", 62 | " \n", 63 | " \n", 64 | " \n", 65 | " \n", 66 | " \n", 67 | " \n", 68 | " \n", 69 | " \n", 70 | " \n", 71 | " \n", 72 | " \n", 73 | " \n", 74 | " \n", 75 | " \n", 76 | " \n", 77 | " \n", 78 | " \n", 79 | " \n", 80 | " \n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | " \n", 108 | " \n", 109 | " \n", 110 | " \n", 111 | " \n", 112 | " \n", 113 | " \n", 114 | " \n", 115 | " \n", 116 | " \n", 117 | " \n", 118 | " \n", 119 | " \n", 120 | " \n", 121 | " \n", 122 | " \n", 123 | " \n", 124 | " \n", 125 | " \n", 126 | " \n", 127 | " \n", 128 | " \n", 129 | " \n", 130 | " \n", 131 | " \n", 132 | " \n", 133 | " \n", 134 | " \n", 135 | " \n", 136 | " \n", 137 | " \n", 138 | " \n", 139 | " \n", 140 | " \n", 141 | " \n", 142 | " \n", 143 | " \n", 144 | " \n", 145 | " \n", 146 | " \n", 147 | " \n", 148 | " \n", 149 | " \n", 150 | " \n", 151 | " \n", 152 | " \n", 153 | " \n", 154 | " \n", 155 | " \n", 156 | " \n", 157 | " \n", 158 | " \n", 159 | " \n", 160 | " \n", 161 | " \n", 162 | " \n", 163 | " \n", 164 | " \n", 165 | " \n", 166 | " \n", 167 | " \n", 168 | " \n", 169 | " \n", 170 | " \n", 171 | " \n", 172 | " \n", 173 | " \n", 174 | " \n", 175 | " \n", 176 | " \n", 177 | " \n", 178 | " \n", 179 | " \n", 180 | " \n", 181 | " \n", 182 | " \n", 183 | " \n", 184 | " \n", 185 | " \n", 186 | " \n", 187 | " \n", 188 | " \n", 189 | " \n", 190 | "
LOCAL_CUR_SAV_SLOPELOCAL_CUR_MON_AVG_BALLOCAL_OVEONEYR_FF_MON_AVG_BALLOCAL_FIX_MON_AVG_BALLOCAL_FIX_MON_AVG_BAL_PROPLOCAL_BELONEYR_FF_SLOPELOCAL_BELONEYR_FF_MON_AVG_BALLOCAL_OVEONEYR_FF_SLOPELOCAL_SAV_SLOPELOCAL_SAV_CUR_ALL_BAL...kid6_10_nankid11_15_Ukid11_15_Ykid11_15_nankid16_17_Ukid16_17_Ykid16_17_nancar_buy_Newcar_buy_UNKNOWNcar_buy_nan
00.3333330.0000000.0090680.0011571.0000000.3444370.00.3332840.3331969.277653e-04...0.01.00.00.01.00.00.01.00.00.0
10.6685640.0026480.0100780.0012860.7355650.3444370.00.2931690.3457167.225051e-04...0.01.00.00.01.00.00.01.00.00.0
20.3335210.0000020.0000000.0000000.0000000.3444370.00.3444420.3445454.032385e-07...0.01.00.00.01.00.00.01.00.00.0
30.1496960.0038580.0000000.0000000.0000000.3444370.00.3444420.1546447.529284e-04...0.01.00.00.01.00.00.00.01.00.0
40.3334290.0000050.0000000.0000000.0000000.3444370.00.3444420.3444507.852062e-07...0.01.00.00.00.01.00.01.00.00.0
\n", 191 | "

5 rows × 178 columns

\n", 192 | "
" 193 | ], 194 | "text/plain": [ 195 | " LOCAL_CUR_SAV_SLOPE LOCAL_CUR_MON_AVG_BAL LOCAL_OVEONEYR_FF_MON_AVG_BAL \\\n", 196 | "0 0.333333 0.000000 0.009068 \n", 197 | "1 0.668564 0.002648 0.010078 \n", 198 | "2 0.333521 0.000002 0.000000 \n", 199 | "3 0.149696 0.003858 0.000000 \n", 200 | "4 0.333429 0.000005 0.000000 \n", 201 | "\n", 202 | " LOCAL_FIX_MON_AVG_BAL LOCAL_FIX_MON_AVG_BAL_PROP LOCAL_BELONEYR_FF_SLOPE \\\n", 203 | "0 0.001157 1.000000 0.344437 \n", 204 | "1 0.001286 0.735565 0.344437 \n", 205 | "2 0.000000 0.000000 0.344437 \n", 206 | "3 0.000000 0.000000 0.344437 \n", 207 | "4 0.000000 0.000000 0.344437 \n", 208 | "\n", 209 | " LOCAL_BELONEYR_FF_MON_AVG_BAL LOCAL_OVEONEYR_FF_SLOPE LOCAL_SAV_SLOPE \\\n", 210 | "0 0.0 0.333284 0.333196 \n", 211 | "1 0.0 0.293169 0.345716 \n", 212 | "2 0.0 0.344442 0.344545 \n", 213 | "3 0.0 0.344442 0.154644 \n", 214 | "4 0.0 0.344442 0.344450 \n", 215 | "\n", 216 | " LOCAL_SAV_CUR_ALL_BAL ... kid6_10_nan kid11_15_U kid11_15_Y \\\n", 217 | "0 9.277653e-04 ... 0.0 1.0 0.0 \n", 218 | "1 7.225051e-04 ... 0.0 1.0 0.0 \n", 219 | "2 4.032385e-07 ... 0.0 1.0 0.0 \n", 220 | "3 7.529284e-04 ... 0.0 1.0 0.0 \n", 221 | "4 7.852062e-07 ... 0.0 1.0 0.0 \n", 222 | "\n", 223 | " kid11_15_nan kid16_17_U kid16_17_Y kid16_17_nan car_buy_New \\\n", 224 | "0 0.0 1.0 0.0 0.0 1.0 \n", 225 | "1 0.0 1.0 0.0 0.0 1.0 \n", 226 | "2 0.0 1.0 0.0 0.0 1.0 \n", 227 | "3 0.0 1.0 0.0 0.0 0.0 \n", 228 | "4 0.0 0.0 1.0 0.0 1.0 \n", 229 | "\n", 230 | " car_buy_UNKNOWN car_buy_nan \n", 231 | "0 0.0 0.0 \n", 232 | "1 0.0 0.0 \n", 233 | "2 0.0 0.0 \n", 234 | "3 1.0 0.0 \n", 235 | "4 0.0 0.0 \n", 236 | "\n", 237 | "[5 rows x 178 columns]" 238 | ] 239 | }, 240 | "execution_count": 2, 241 | "metadata": {}, 242 | "output_type": "execute_result" 243 | } 244 | ], 245 | "source": [ 246 | "model_data = pd.read_csv('./model_data.csv', encoding='utf-8')\n", 247 | "model_data.head()" 248 | ] 249 | }, 250 | { 251 | "cell_type": "code", 252 | "execution_count": 3, 253 | "metadata": { 254 | "collapsed": true 255 | }, 256 | "outputs": [], 257 | "source": [ 258 | "all_features = list(model_data.columns)\n", 259 | "all_features.remove('CHURN_CUST_IND')" 260 | ] 261 | }, 262 | { 263 | "cell_type": "code", 264 | "execution_count": 4, 265 | "metadata": { 266 | "collapsed": true 267 | }, 268 | "outputs": [], 269 | "source": [ 270 | "X_train, X_test, y_train, y_test = train_test_split(model_data[all_features], model_data['CHURN_CUST_IND'], test_size=0.3, random_state=1)" 271 | ] 272 | }, 273 | { 274 | "cell_type": "code", 275 | "execution_count": 5, 276 | "metadata": {}, 277 | "outputs": [ 278 | { 279 | "name": "stdout", 280 | "output_type": "stream", 281 | "text": [ 282 | "y_train_count: 0.0 10851\n", 283 | "1.0 1217\n", 284 | "Name: CHURN_CUST_IND, dtype: int64\n", 285 | "y_test_count: 0.0 4649\n", 286 | "1.0 524\n", 287 | "Name: CHURN_CUST_IND, dtype: int64\n" 288 | ] 289 | } 290 | ], 291 | "source": [ 292 | "print('y_train_count: ', y_train.value_counts())\n", 293 | "print('y_test_count: ', y_test.value_counts())" 294 | ] 295 | }, 296 | { 297 | "cell_type": "code", 298 | "execution_count": 6, 299 | "metadata": {}, 300 | "outputs": [ 301 | { 302 | "name": "stdout", 303 | "output_type": "stream", 304 | "text": [ 305 | "Accuracy : 0.9115\n", 306 | "AUC Score (Testing): 0.855843\n" 307 | ] 308 | } 309 | ], 310 | "source": [ 311 | "# 使用GBDT模块的默认参数进行训练\n", 312 | "gbm0 = GradientBoostingClassifier(random_state=10)\n", 313 | "gbm0.fit(X_train,y_train)\n", 314 | "y_pred = gbm0.predict(X_test)\n", 315 | "y_predprob = gbm0.predict_proba(X_test)[:,1]\n", 316 | "print(\"Accuracy : %.4g\" % metrics.accuracy_score(y_test, y_pred))\n", 317 | "print (\"AUC Score (Testing): %f\" % metrics.roc_auc_score(y_test, y_predprob))" 318 | ] 319 | }, 320 | { 321 | "cell_type": "code", 322 | "execution_count": 12, 323 | "metadata": {}, 324 | "outputs": [ 325 | { 326 | "data": { 327 | "text/plain": [ 328 | "" 336 | ] 337 | }, 338 | "execution_count": 12, 339 | "metadata": {}, 340 | "output_type": "execute_result" 341 | } 342 | ], 343 | "source": [ 344 | "gbm0.get_params" 345 | ] 346 | }, 347 | { 348 | "cell_type": "code", 349 | "execution_count": 20, 350 | "metadata": {}, 351 | "outputs": [ 352 | { 353 | "data": { 354 | "text/plain": [ 355 | "GridSearchCV(cv=5, error_score='raise',\n", 356 | " estimator=GradientBoostingClassifier(criterion='friedman_mse', init=None,\n", 357 | " learning_rate=0.1, loss='deviance', max_depth=5,\n", 358 | " max_features=1.0, max_leaf_nodes=None,\n", 359 | " min_impurity_decrease=0.0, min_impurity_split=None,\n", 360 | " min_samples_leaf=20, min_samples_split=300,\n", 361 | " min_weight_fraction_leaf=0.0, n_estimators=100,\n", 362 | " presort='auto', random_state=10, subsample=0.8, verbose=0,\n", 363 | " warm_start=False),\n", 364 | " fit_params=None, iid=False, n_jobs=1,\n", 365 | " param_grid={'n_estimators': range(50, 201, 10)},\n", 366 | " pre_dispatch='2*n_jobs', refit=True, return_train_score='warn',\n", 367 | " scoring='roc_auc', verbose=0)" 368 | ] 369 | }, 370 | "execution_count": 20, 371 | "metadata": {}, 372 | "output_type": "execute_result" 373 | } 374 | ], 375 | "source": [ 376 | "# 对estimator进行参数调优\n", 377 | "param_test1 = {'n_estimators': range(50, 201, 10)}\n", 378 | "est1 = GridSearchCV(estimator=GradientBoostingClassifier(learning_rate=0.1, min_samples_split=300, min_samples_leaf=20, max_depth=5, \n", 379 | " max_features=1.0, subsample=0.8, random_state=10), \n", 380 | " param_grid = param_test1, scoring='roc_auc', iid=False, cv=5)\n", 381 | "est1.fit(X_train, y_train)" 382 | ] 383 | }, 384 | { 385 | "cell_type": "code", 386 | "execution_count": 21, 387 | "metadata": {}, 388 | "outputs": [ 389 | { 390 | "name": "stderr", 391 | "output_type": "stream", 392 | "text": [ 393 | "D:\\Anaconda3\\lib\\site-packages\\sklearn\\model_selection\\_search.py:761: DeprecationWarning: The grid_scores_ attribute was deprecated in version 0.18 in favor of the more elaborate cv_results_ attribute. The grid_scores_ attribute will not be available from 0.20\n", 394 | " DeprecationWarning)\n" 395 | ] 396 | }, 397 | { 398 | "data": { 399 | "text/plain": [ 400 | "[mean: 0.85970, std: 0.01218, params: {'n_estimators': 50},\n", 401 | " mean: 0.85998, std: 0.01228, params: {'n_estimators': 60},\n", 402 | " mean: 0.86043, std: 0.01162, params: {'n_estimators': 70},\n", 403 | " mean: 0.86064, std: 0.01188, params: {'n_estimators': 80},\n", 404 | " mean: 0.86067, std: 0.01115, params: {'n_estimators': 90},\n", 405 | " mean: 0.86028, std: 0.01136, params: {'n_estimators': 100},\n", 406 | " mean: 0.85983, std: 0.01133, params: {'n_estimators': 110},\n", 407 | " mean: 0.85983, std: 0.01136, params: {'n_estimators': 120},\n", 408 | " mean: 0.85962, std: 0.01143, params: {'n_estimators': 130},\n", 409 | " mean: 0.85910, std: 0.01211, params: {'n_estimators': 140},\n", 410 | " mean: 0.85833, std: 0.01207, params: {'n_estimators': 150},\n", 411 | " mean: 0.85808, std: 0.01135, params: {'n_estimators': 160},\n", 412 | " mean: 0.85829, std: 0.01167, params: {'n_estimators': 170},\n", 413 | " mean: 0.85774, std: 0.01135, params: {'n_estimators': 180},\n", 414 | " mean: 0.85699, std: 0.01170, params: {'n_estimators': 190},\n", 415 | " mean: 0.85577, std: 0.01126, params: {'n_estimators': 200}]" 416 | ] 417 | }, 418 | "execution_count": 21, 419 | "metadata": {}, 420 | "output_type": "execute_result" 421 | } 422 | ], 423 | "source": [ 424 | "est1.grid_scores_" 425 | ] 426 | }, 427 | { 428 | "cell_type": "code", 429 | "execution_count": 22, 430 | "metadata": {}, 431 | "outputs": [ 432 | { 433 | "data": { 434 | "text/plain": [ 435 | "{'n_estimators': 90}" 436 | ] 437 | }, 438 | "execution_count": 22, 439 | "metadata": {}, 440 | "output_type": "execute_result" 441 | } 442 | ], 443 | "source": [ 444 | "est1.best_params_" 445 | ] 446 | }, 447 | { 448 | "cell_type": "code", 449 | "execution_count": 23, 450 | "metadata": {}, 451 | "outputs": [ 452 | { 453 | "data": { 454 | "text/plain": [ 455 | "0.8606705972896014" 456 | ] 457 | }, 458 | "execution_count": 23, 459 | "metadata": {}, 460 | "output_type": "execute_result" 461 | } 462 | ], 463 | "source": [ 464 | "est1.best_score_" 465 | ] 466 | }, 467 | { 468 | "cell_type": "code", 469 | "execution_count": 25, 470 | "metadata": {}, 471 | "outputs": [ 472 | { 473 | "name": "stdout", 474 | "output_type": "stream", 475 | "text": [ 476 | "Accuracy : 0.9103\n", 477 | "AUC Score (Testing): 0.858071\n" 478 | ] 479 | } 480 | ], 481 | "source": [ 482 | "gbm1 = GradientBoostingClassifier(n_estimators=90, learning_rate=0.1, min_samples_split=300, min_samples_leaf=20, max_depth=8, \n", 483 | " max_features=1.0, subsample=0.8, random_state=10)\n", 484 | "gbm1.fit(X_train,y_train)\n", 485 | "y_pred = gbm1.predict(X_test)\n", 486 | "y_predprob = gbm1.predict_proba(X_test)[:,1]\n", 487 | "print(\"Accuracy : %.4g\" % metrics.accuracy_score(y_test, y_pred))\n", 488 | "print (\"AUC Score (Testing): %f\" % metrics.roc_auc_score(y_test, y_predprob))" 489 | ] 490 | }, 491 | { 492 | "cell_type": "code", 493 | "execution_count": 31, 494 | "metadata": {}, 495 | "outputs": [ 496 | { 497 | "data": { 498 | "text/plain": [ 499 | "GridSearchCV(cv=5, error_score='raise',\n", 500 | " estimator=GradientBoostingClassifier(criterion='friedman_mse', init=None,\n", 501 | " learning_rate=0.1, loss='deviance', max_depth=3,\n", 502 | " max_features=1.0, max_leaf_nodes=None,\n", 503 | " min_impurity_decrease=0.0, min_impurity_split=None,\n", 504 | " min_samples_leaf=20, min_samples_split=300,\n", 505 | " min_weight_fraction_leaf=0.0, n_estimators=90,\n", 506 | " presort='auto', random_state=10, subsample=0.8, verbose=0,\n", 507 | " warm_start=False),\n", 508 | " fit_params=None, iid=False, n_jobs=1,\n", 509 | " param_grid={'max_depth': range(5, 16, 2)}, pre_dispatch='2*n_jobs',\n", 510 | " refit=True, return_train_score='warn', scoring='roc_auc', verbose=0)" 511 | ] 512 | }, 513 | "execution_count": 31, 514 | "metadata": {}, 515 | "output_type": "execute_result" 516 | } 517 | ], 518 | "source": [ 519 | "# 对max_depth进行调优\n", 520 | "param_test2 = {'max_depth': range(5, 16, 2)}\n", 521 | "est2 = GridSearchCV(estimator=GradientBoostingClassifier(n_estimators=90, learning_rate=0.1, min_samples_split=300, min_samples_leaf=20,\n", 522 | " max_features=1.0, subsample=0.8, random_state=10),\n", 523 | " param_grid=param_test2, scoring='roc_auc', iid=False, cv=5)\n", 524 | "est2.fit(X_train, y_train)" 525 | ] 526 | }, 527 | { 528 | "cell_type": "code", 529 | "execution_count": 32, 530 | "metadata": {}, 531 | "outputs": [ 532 | { 533 | "name": "stderr", 534 | "output_type": "stream", 535 | "text": [ 536 | "D:\\Anaconda3\\lib\\site-packages\\sklearn\\model_selection\\_search.py:761: DeprecationWarning: The grid_scores_ attribute was deprecated in version 0.18 in favor of the more elaborate cv_results_ attribute. The grid_scores_ attribute will not be available from 0.20\n", 537 | " DeprecationWarning)\n" 538 | ] 539 | }, 540 | { 541 | "data": { 542 | "text/plain": [ 543 | "[mean: 0.86067, std: 0.01115, params: {'max_depth': 5},\n", 544 | " mean: 0.85743, std: 0.00934, params: {'max_depth': 7},\n", 545 | " mean: 0.85636, std: 0.01054, params: {'max_depth': 9},\n", 546 | " mean: 0.85099, std: 0.00972, params: {'max_depth': 11},\n", 547 | " mean: 0.85169, std: 0.01273, params: {'max_depth': 13},\n", 548 | " mean: 0.84906, std: 0.01225, params: {'max_depth': 15}]" 549 | ] 550 | }, 551 | "execution_count": 32, 552 | "metadata": {}, 553 | "output_type": "execute_result" 554 | } 555 | ], 556 | "source": [ 557 | "est2.grid_scores_" 558 | ] 559 | }, 560 | { 561 | "cell_type": "code", 562 | "execution_count": 33, 563 | "metadata": {}, 564 | "outputs": [ 565 | { 566 | "data": { 567 | "text/plain": [ 568 | "{'max_depth': 5}" 569 | ] 570 | }, 571 | "execution_count": 33, 572 | "metadata": {}, 573 | "output_type": "execute_result" 574 | } 575 | ], 576 | "source": [ 577 | "est2.best_params_" 578 | ] 579 | }, 580 | { 581 | "cell_type": "code", 582 | "execution_count": 34, 583 | "metadata": {}, 584 | "outputs": [ 585 | { 586 | "data": { 587 | "text/plain": [ 588 | "0.8606705972896014" 589 | ] 590 | }, 591 | "execution_count": 34, 592 | "metadata": {}, 593 | "output_type": "execute_result" 594 | } 595 | ], 596 | "source": [ 597 | "est2.best_score_" 598 | ] 599 | }, 600 | { 601 | "cell_type": "code", 602 | "execution_count": 35, 603 | "metadata": {}, 604 | "outputs": [ 605 | { 606 | "name": "stdout", 607 | "output_type": "stream", 608 | "text": [ 609 | "Accuracy : 0.9126\n", 610 | "AUC Score (Testing): 0.859257\n" 611 | ] 612 | } 613 | ], 614 | "source": [ 615 | "gbm2 = GradientBoostingClassifier(n_estimators=90, learning_rate=0.1, min_samples_split=300, min_samples_leaf=20, max_depth=5, \n", 616 | " max_features=1.0, subsample=0.8, random_state=10)\n", 617 | "gbm2.fit(X_train,y_train)\n", 618 | "y_pred = gbm2.predict(X_test)\n", 619 | "y_predprob = gbm2.predict_proba(X_test)[:,1]\n", 620 | "print(\"Accuracy : %.4g\" % metrics.accuracy_score(y_test, y_pred))\n", 621 | "print (\"AUC Score (Testing): %f\" % metrics.roc_auc_score(y_test, y_predprob))" 622 | ] 623 | }, 624 | { 625 | "cell_type": "code", 626 | "execution_count": 37, 627 | "metadata": {}, 628 | "outputs": [ 629 | { 630 | "data": { 631 | "text/plain": [ 632 | "GridSearchCV(cv=5, error_score='raise',\n", 633 | " estimator=GradientBoostingClassifier(criterion='friedman_mse', init=None,\n", 634 | " learning_rate=0.1, loss='deviance', max_depth=5,\n", 635 | " max_features=1.0, max_leaf_nodes=None,\n", 636 | " min_impurity_decrease=0.0, min_impurity_split=None,\n", 637 | " min_samples_leaf=20, min_samples_split=300,\n", 638 | " min_weight_fraction_leaf=0.0, n_estimators=90,\n", 639 | " presort='auto', random_state=10, subsample=0.8, verbose=0,\n", 640 | " warm_start=False),\n", 641 | " fit_params=None, iid=False, n_jobs=1,\n", 642 | " param_grid={'learning_rate': [0.01, 0.05, 0.1, 0.15]},\n", 643 | " pre_dispatch='2*n_jobs', refit=True, return_train_score='warn',\n", 644 | " scoring='roc_auc', verbose=0)" 645 | ] 646 | }, 647 | "execution_count": 37, 648 | "metadata": {}, 649 | "output_type": "execute_result" 650 | } 651 | ], 652 | "source": [ 653 | "# 对learning_rate进行调优\n", 654 | "param_test3 = {'learning_rate': [0.01, 0.05, 0.1, 0.15]}\n", 655 | "est3 = GridSearchCV(estimator=GradientBoostingClassifier(n_estimators=90, max_depth=5, min_samples_split=300, min_samples_leaf=20,\n", 656 | " max_features=1.0, subsample=0.8, random_state=10),\n", 657 | " param_grid=param_test3, scoring='roc_auc', iid=False, cv=5)\n", 658 | "est3.fit(X_train, y_train)" 659 | ] 660 | }, 661 | { 662 | "cell_type": "code", 663 | "execution_count": 38, 664 | "metadata": {}, 665 | "outputs": [ 666 | { 667 | "name": "stderr", 668 | "output_type": "stream", 669 | "text": [ 670 | "D:\\Anaconda3\\lib\\site-packages\\sklearn\\model_selection\\_search.py:761: DeprecationWarning: The grid_scores_ attribute was deprecated in version 0.18 in favor of the more elaborate cv_results_ attribute. The grid_scores_ attribute will not be available from 0.20\n", 671 | " DeprecationWarning)\n" 672 | ] 673 | }, 674 | { 675 | "data": { 676 | "text/plain": [ 677 | "[mean: 0.85457, std: 0.01371, params: {'learning_rate': 0.01},\n", 678 | " mean: 0.86058, std: 0.01204, params: {'learning_rate': 0.05},\n", 679 | " mean: 0.86067, std: 0.01115, params: {'learning_rate': 0.1},\n", 680 | " mean: 0.85709, std: 0.01315, params: {'learning_rate': 0.15}]" 681 | ] 682 | }, 683 | "execution_count": 38, 684 | "metadata": {}, 685 | "output_type": "execute_result" 686 | } 687 | ], 688 | "source": [ 689 | "est3.grid_scores_" 690 | ] 691 | }, 692 | { 693 | "cell_type": "code", 694 | "execution_count": 39, 695 | "metadata": {}, 696 | "outputs": [ 697 | { 698 | "data": { 699 | "text/plain": [ 700 | "{'learning_rate': 0.1}" 701 | ] 702 | }, 703 | "execution_count": 39, 704 | "metadata": {}, 705 | "output_type": "execute_result" 706 | } 707 | ], 708 | "source": [ 709 | "est3.best_params_" 710 | ] 711 | }, 712 | { 713 | "cell_type": "code", 714 | "execution_count": 40, 715 | "metadata": {}, 716 | "outputs": [ 717 | { 718 | "data": { 719 | "text/plain": [ 720 | "0.8606705972896014" 721 | ] 722 | }, 723 | "execution_count": 40, 724 | "metadata": {}, 725 | "output_type": "execute_result" 726 | } 727 | ], 728 | "source": [ 729 | "est3.best_score_" 730 | ] 731 | }, 732 | { 733 | "cell_type": "code", 734 | "execution_count": 41, 735 | "metadata": {}, 736 | "outputs": [ 737 | { 738 | "data": { 739 | "text/plain": [ 740 | "GridSearchCV(cv=5, error_score='raise',\n", 741 | " estimator=GradientBoostingClassifier(criterion='friedman_mse', init=None,\n", 742 | " learning_rate=0.1, loss='deviance', max_depth=5,\n", 743 | " max_features=None, max_leaf_nodes=None,\n", 744 | " min_impurity_decrease=0.0, min_impurity_split=None,\n", 745 | " min_samples_leaf=20, min_samples_split=300,\n", 746 | " min_weight_fraction_leaf=0.0, n_estimators=90,\n", 747 | " presort='auto', random_state=10, subsample=0.8, verbose=0,\n", 748 | " warm_start=False),\n", 749 | " fit_params=None, iid=False, n_jobs=1,\n", 750 | " param_grid={'max_features': (0.3, 0.5, 0.8, 1.0)},\n", 751 | " pre_dispatch='2*n_jobs', refit=True, return_train_score='warn',\n", 752 | " scoring='roc_auc', verbose=0)" 753 | ] 754 | }, 755 | "execution_count": 41, 756 | "metadata": {}, 757 | "output_type": "execute_result" 758 | } 759 | ], 760 | "source": [ 761 | "# 对max_features进行调优\n", 762 | "param_test4 = {'max_features': (0.3, 0.5, 0.8, 1.0)}\n", 763 | "est4 = GridSearchCV(estimator=GradientBoostingClassifier(n_estimators=90, learning_rate=0.1, min_samples_split=300, min_samples_leaf=20,\n", 764 | " max_depth=5, subsample=0.8, random_state=10),\n", 765 | " param_grid=param_test4, scoring='roc_auc', iid=False, cv=5)\n", 766 | "est4.fit(X_train, y_train)" 767 | ] 768 | }, 769 | { 770 | "cell_type": "code", 771 | "execution_count": 42, 772 | "metadata": {}, 773 | "outputs": [ 774 | { 775 | "name": "stderr", 776 | "output_type": "stream", 777 | "text": [ 778 | "D:\\Anaconda3\\lib\\site-packages\\sklearn\\model_selection\\_search.py:761: DeprecationWarning: The grid_scores_ attribute was deprecated in version 0.18 in favor of the more elaborate cv_results_ attribute. The grid_scores_ attribute will not be available from 0.20\n", 779 | " DeprecationWarning)\n" 780 | ] 781 | }, 782 | { 783 | "data": { 784 | "text/plain": [ 785 | "[mean: 0.85937, std: 0.01201, params: {'max_features': 0.3},\n", 786 | " mean: 0.85815, std: 0.01321, params: {'max_features': 0.5},\n", 787 | " mean: 0.85989, std: 0.01139, params: {'max_features': 0.8},\n", 788 | " mean: 0.86067, std: 0.01115, params: {'max_features': 1.0}]" 789 | ] 790 | }, 791 | "execution_count": 42, 792 | "metadata": {}, 793 | "output_type": "execute_result" 794 | } 795 | ], 796 | "source": [ 797 | "est4.grid_scores_" 798 | ] 799 | }, 800 | { 801 | "cell_type": "code", 802 | "execution_count": 43, 803 | "metadata": {}, 804 | "outputs": [ 805 | { 806 | "data": { 807 | "text/plain": [ 808 | "{'max_features': 1.0}" 809 | ] 810 | }, 811 | "execution_count": 43, 812 | "metadata": {}, 813 | "output_type": "execute_result" 814 | } 815 | ], 816 | "source": [ 817 | "est4.best_params_" 818 | ] 819 | }, 820 | { 821 | "cell_type": "code", 822 | "execution_count": 44, 823 | "metadata": {}, 824 | "outputs": [ 825 | { 826 | "data": { 827 | "text/plain": [ 828 | "0.8606705972896014" 829 | ] 830 | }, 831 | "execution_count": 44, 832 | "metadata": {}, 833 | "output_type": "execute_result" 834 | } 835 | ], 836 | "source": [ 837 | "est4.best_score_" 838 | ] 839 | }, 840 | { 841 | "cell_type": "code", 842 | "execution_count": 51, 843 | "metadata": {}, 844 | "outputs": [ 845 | { 846 | "data": { 847 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYUAAAGDCAYAAADEegxVAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMS4wLCBo\ndHRwOi8vbWF0cGxvdGxpYi5vcmcvpW3flQAAIABJREFUeJzt3XeYVOXZx/HvDYggzQI2QAHFShFd\nsTeKIgoao4INu0ZfNNaoaMSWSIy9S+wVIlHBCiogsaCAoAgKAjYQFZQuZVnu94/nsBmWLcPuzpwp\nv891zbVzysy5z8zs3POU8zzm7oiIiADUiDsAERHJHEoKIiJSTElBRESKKSmIiEgxJQURESmmpCAi\nIsWUFKRSzOwUMxsRdxxxMbOlZtYqhuO2MDM3s1rpPnYqmNkUMzu0Eo/L689fKikp5AAz+9bMlkdf\nVD+Z2ZNmVj+Vx3T359z98FQ8t5ntb2YjzWyJmS0ys1fNbLdUHCvJeEab2TmJ69y9vrvPStHxdjKz\nF81sfnT+n5vZZWZWMxXHq6woOe1Yledw993dfXQFx1kvEaby85fvlBRyRw93rw/sAXQArok5nkox\ns/2AEcBQYFugJfAZ8EEqfpln2i9uM9sB+Bj4AWjr7o2AE4ACoEE1Hyu2c8+0110SuLtuWX4DvgW6\nJCzfBryesLwxcDvwPfAz8DBQN2H7McAkYDEwE+gWrW8EPAbMBeYAtwA1o21nAO9H9x8Gbi8R01Dg\nsuj+tsB/gHnAN8DF5ZzLf4EHS1n/JvB0dP9QYDbQD5gfnf8pyZxvwmOvAn4CngE2A16L4lsQ3W8W\n7f83oAhYASwF7o/WO7BjdP9J4AHgdWAJ4Ut9h4R4DgemAYuAB4H3gHPKOP9nE9+7Ura3iI59enR+\n84FrE7Z3BD4CFkbv2/1A7YTtDvwf8DXwTbTuHkISWgxMAA5K2L9m9DrPjM5tAtAcGBM917LodekV\n7X804bO0EPgQaFfic3oV8DmwEqhFwmc3in18FMfPwJ3R+u+jYy2NbvuR8PmL9tkdeBv4LXpsv7j/\nL7P1FnsAulXDm7juP1YzYDJwT8L2u4FhwOaEX5uvArdG2zpGX1ZdCSXHpsAu0bZXgEeAesCWwCfA\n+dG24n9K4ODoS8Wi5c2A5YRkUCP6IrkeqA20AmYBR5RyHpsQvoAPK2XbmcDc6P6hwGrgTkICOCT6\ncto5ifNd+9h/RI+tC2wB/DE6fgPgReCVhGOPpsSXOOsnhd+i17IW8BwwKNrWOPqSOy7a9megsOTz\nJTzvT8CZ5bzXLaJj/yuKvT3hC3bXaPtewL7RsVoAXwKXlIj77ei1WZsoT41eg1rA5VEMdaJtVxI+\nTzsDFh1vi5KvQbS8J/ALsA8hmZxO+GxunPA5nURIKnUT1q397H4EnBbdrw/sW+KcayUc6wz+9/lr\nQEiAlwN1ouV94v6/zNZb7AHoVg1vYvjHWkr4JefAu8Cm0TYjfGEm/nLdj//9SnwEuKuU59wq+rJJ\nLFGcBIyK7if+Uxrh19zB0fK5wMjo/j7A9yWe+xrgiVKO2SyKf5dStnUDCqP7hxK+2OslbP838Nck\nzvdQYNXaL70yXs89gAUJy6OpOCk8mrCtO/BVdL8P8FHCNiMk0LKSQiFRSa2M7Wu/IJslrPsE6F3G\n/pcAL5eIu1MFn6cFQPvo/jTgmDL2K5kUHgJuLrHPNOCQhM/pWaV8dtcmhTHAjUDjMs65rKRwEjAx\nXf9vuX5TvV7uONbd3zGzQ4DnCb9QFwJNCL+AJ5jZ2n2N8EsOwq+2N0p5vu2BjYC5CY+rQfhCW4e7\nu5kNIvxzjgFOJlSDrH2ebc1sYcJDahKqiUpaAKwBtgG+KrFtG0JVSfG+7r4sYfk7QsmkovMFmOfu\nK4o3mm0C3EVIPJtFqxuYWU13LyolztL8lHD/d8IvXaKYil+z6LWaXc7z/Eo410odz8x2IpSgCgiv\nQy1CSS3ROu+hmV0OnBPF6kBDwucHwudjZhLxQHivTzezixLW1Y6et9Rjl3A2cBPwlZl9A9zo7q8l\ncdwNiVEqoIbmHOPu7xF+ud4erZpPqMrZ3d03jW6NPDRKQ/gn3aGUp/qBUFJonPC4hu6+exmHfgE4\n3sy2J5QO/pPwPN8kPMem7t7A3buXEvsyQhXCCaU8/4mEEtBam5lZvYTl7YAfkzhfCF98iS4nVI/s\n4+4NCdVhEJJJaftviLmEElB4wpCpmpW9O+8QqrIq6yFCQm0dnUs//nceaxWfj5kdRKjnPxHYzN03\nJVQnrn1MWZ+P0vwA/K3Ee72Ju79Q2rFLcvev3f0kQlXlP4Ah0Xtc0eu/ITFKBZQUctPdQFcz28Pd\n1xDqn+8ysy0BzKypmR0R7fsYcKaZdTazGtG2Xdx9LqEX0B1m1jDatkNUElmPu08kNNQ+Cgx397Ul\ng0+AxWZ2lZnVNbOaZtbGzPYuI/arCb82LzazBma2mZndQqgCurHEvjeaWe3oi+1o4MUkzrc0DQiJ\nZKGZbQ70L7H9Z0JbSGW8DrQ1s2OjHjf/B2xdzv79gf3N7J9mtnUU/45m9qyZbZrE8RoQ2jCWmtku\nwAVJ7L+a8N7VMrPrCSWFtR4Fbjaz1ha0M7Mtom0lX5d/AX8ys32ifeuZ2VFmllSvKTM71cyaRO/h\n2s9PURTbGsp+D14DtjazS8xs4+hzs08yx5T1KSnkIHefBzxNqGOH8EtwBjDWzBYTfo3uHO37CaER\n9y7CL8T3CNUAEOrDawNTCVU7Qyi/auMFoAuh+mptLEVAD0I9/TeEX/KPEno2lRb7+8ARhIbZuYRq\noQ7Age7+dcKuP0Ux/Uho2P2Tu6+tcirzfMtwN6HRdj4wFnirxPZ7CKWgBWZ2bznPU9r5zCeUfG4j\nVA3tRuhhs7KM/WcSEmALYIqZLSKUusYT2owqcgWh+m4J4Ut6cAX7Dyf07JpOeK1XsG4Vz52E9poR\nhGTzGOG1ArgBeMrMFprZie4+ntCedD/hvZlBqPtPVjfCOS8lvOa93X2Fu/9O6AX2QXSsfRMf5O5L\nCB0lehA+F18Dh23AcSXB2t4iIlkjugL2WXcvrxomI5lZDUKX2FPcfVTc8YiUpJKCSIqZ2RFmtqmZ\nbcz/6vjHxhyWSKmUFERSbz9C75j5hCqOY919ebwhiZRO1UciIlJMJQURESmmpCAiIsWy7ormxo0b\ne4sWLeIOQ0Qkq0yYMGG+uzepaL+sSwotWrRg/PjxcYchIpJVzOy7ZPZT9ZGIiBRTUhARkWJKCiIi\nUkxJQUREiikpiIhIMSUFEREppqQgIiLFlBRERKSYkoKIiBRLWVIws8fN7Bcz+6KM7WZm95rZDDP7\n3Mz2TFUsIiKSnFSWFJ4kTK9XliOB1tHtPMKE4yIiEqOUJQV3HwP8Vs4uxwBPezAW2NTMypv/V0Qk\nf61Zk5bDxDkgXlPWnSB8drRubskdzew8QmmC7bbbLi3BiYikQvsbR7BoeeEGPeb8j4fQ/sfpXNzz\nL8z45zEpiiyIMylYKetKnQbO3QcCAwEKCgo0VZyIVIvKfEFXVaO6G/HtgKOSf8DNN8PoJ6F3b7r/\nvXvK4lorzqQwG2iesNwM+DGmWEQkxeL4Aq7IBn9Bp9vnn0P//nDaafDEE1CzZsoPGWdSGAb0NbNB\nwD7AIndfr+pIRLJXYiLI+C/gTNSuHYwZA/vtl5aEAClMCmb2AnAo0NjMZgP9gY0A3P1h4A2gOzAD\n+B04M1WxiEhqlVUKUCKoBHe46io47DA48kg48MC0Hj5lScHdT6pguwP/l6rji0h6tL9xBIC+/KvD\nmjXQty88FPXQP/LItIeQddNxikg8yisNfNb/8BgiyjFFRXD++fDYY/CXv8CAAbGEoaQgIuVamwxU\nFZRCRUVw5pnwzDPw17/CjTeCldZBM/WUFESkWGmlASWDNKhRAzbZJHQ/ve66WENRUhDJYyWTgBJA\nmq1aBb/8As2ahXaEmEoHiZQURPKQqoQywMqVcMIJ8NlnMGUK1K8fd0SAkoJIVqrqhWBKBjFbvhz+\n8AcYPhwefDBjEgIoKYhkBVXz5JBly6BnTxg1Ch59FM4+O+6I1qGkIJKhdDVwjurXD0aPhqeeCsNX\nZBglBZGYVFQFpESQo266CY44ArqnfnC7ylBSEEkzNfLmoQULwrUHt94KjRplbEIAJQWRaqcSgKxj\n/nzo2hWmTg29jQ44IO6IyqWkIFINVP8vpfrlF+jcGWbMgKFDMz4hgJKCSJWoKkjKNHduSAjffguv\nvRbuZwElBZFK0uigUq6FC8MFam+9BQcfHHc0SVNSEKmARgeVDfLbb7DZZrDrrvDVV7DRRnFHtEGU\nFEQSaEA4qZKZM6FTJzj33DCwXZYlBFBSEFEjsVSPadNCu8GKFXBU9n6GlBQk56mLqKTc1KkhIaxZ\nE4avaNs27ogqTUlBcpZ6BklaLFsGXbqE+6NHh7aELKakIDlFVUGSdvXqwX33hdLBTjvFHU2VKSlI\nTlCpQNLuk0/CtQjHHAN//GPc0VQbJQXJerpeQNLugw/gyCOhadMwjlEW9jIqi5KCZK3E0oGuF5C0\nGT0ajj46JIR33smphABKCpJl1GYgsXrnnTBBTsuW8O67sPXWcUdU7ZQUJGuomkhiN2IE7LhjSA5b\nbhl3NClRI+4ARJK1aHmhqokkHitWhL//+EdoT8jRhAAqKUiGK1ldJJJ2Q4bA5ZeHi9JatYIGDeKO\nKKWUFCQjqYupZITnn4c+fWCffaBx47ijSQslBckoSgaSMZ56Cs48Mwx7/dprUL9+3BGlhZKCZAw1\nJEvGGDYsJITOncOMaZtsEndEaaOkILHT9QaScTp1gquugv79oU6duKNJKyUFiY2qiiTjDBoULkyr\nXx9uvTXuaGKhLqkSi8SqIpUOJCMMGAAnnQR33x13JLFSSUHSpmT3UiUDyQjucPPNoaropJPg6qvj\njihWSgqScqomkozlHqbN/Pvf4fTT4bHHoGbNuKOKlZKCpNyi5YVKBpKZ5s+HJ54Icyo//DDUUI26\nkoKkVPsbR+hKZMk87uFvkyYwfnwY2E4JAVBSkBRRN1PJWGvWwIUXQt26cOedsO22cUeUUZQUJCVU\nZSQZqagoVBU98UTeNyiXReUlqXaqMpKMtHo1nHFGSAj9+4fGZbO4o8o4KilItVp7/YGqjCTjnHUW\nPPss/O1v0K9f3NFkLCUFqVaqNpKMddxx0L59GAZbyqSkINVG1UaScVasgI8+gsMOg2OPjTuarKCk\nIFWiq5QlYy1fHhLBqFEwfTq0aBF3RFlBSUEqRVcpS0Zbtgx69IDRo+HRR5UQNoCSgmwwzXsgGW3J\nEjjqqDCX8tNPw6mnxh1RVlFSkA2i3kWS8Z57Dj78MEyl2atX3NFkHSUFSYquUJascf75sP/+0K5d\n3JFkJSUFKZfaDiQrzJ8PJ58c5kLYbTclhCpQUpBy6boDyXg//xzmUp45E+bMCUlBKi2lw1yYWTcz\nm2ZmM8xsvYFGzGw7MxtlZhPN7HMz657KeGTD6LoDyXg//giHHgrffAOvvw5du8YdUdZLWVIws5rA\nA8CRwG7ASWZWMoVfB/zb3TsAvYEHUxWPbLhFywvVfiCZ68cf4ZBDYPZseOst6NQp7ohyQipLCh2B\nGe4+y91XAYOAY0rs40DD6H4j4McUxiMbQKUEyXibbgpt2sCIEXDQQXFHkzNS2abQFPghYXk2sE+J\nfW4ARpjZRUA9oEsK45ENoLYEyVizZsEWW0CjRvDyy3FHk3NSWVIobUxaL7F8EvCkuzcDugPPmNl6\nMZnZeWY23szGz5s3LwWhSiKVEiRjffVVKBWcdlrckeSsVCaF2UDzhOVmrF89dDbwbwB3/wioAzQu\n+UTuPtDdC9y9oEmTJikKV9ZSW4JkpC++CI3KRUVhLgRJiVQmhXFAazNraWa1CQ3Jw0rs8z3QGcDM\ndiUkBRUFYqRSgmSkzz4LI53WqBHGM2rTJu6IclbK2hTcfbWZ9QWGAzWBx919ipndBIx392HA5cC/\nzOxSQtXSGe5esopJ0kBXLEvGcg8zptWpAyNHQuvWcUeU0yzbvoMLCgp8/PjxcYeRUzSekWS8GTOg\nZk1o2TLuSLKWmU1w94KK9tMczaI2BMlM778fZklzhx13VEJIEyWFPKc2BMlIo0dDt27w2muwYEHc\n0eQVJYU8pmojyUhvvw3du8P228N778Hmm8cdUV5RUshTSgiSkd54I8yYttNOobSw9dZxR5R3NEpq\nntIVy5Kx9toLXn1VJYSYqKSQh9SOIBnnh2hEnO7dQwOzEkJslBTyjKqNJOM89xzssEMY6RTAShsh\nR9JFSSGPKCFIxnnyyTCO0YEHhpvETkkhTyghSMYZOBDOPBO6dAldT+vXjzsiQUkhLyghSMYZNw7O\nPz+0IQwbBptsEndEElFSyHFKCJKR9t47tCW89FIY00gyhpJCDlNCkIxz110waVK4f/LJsPHG8cYj\n61FSyGEa00gyhjvceCNcdhk8+mjc0Ug5dPGaiKSWO1x7Ldx6axgC+5574o5IyqGkkIMS50YQiZU7\nXHEF3HknnHcePPRQmChHMpaSQo5Z246gISwkI6xeDV9+CX37wr336sK0LKCkkGM0ppFkhDVrYOlS\naNgQXnkFNtpICSFLqBwnItWrqAjOPjvMqbx8OdSurYSQRVRSyBFqR5CMsHo1nH46PP883HCDrkHI\nQkoKOULVRhK7wkI45RR48UX4+9/hmmvijkgqQUlBRKrHZZeFhHDHHeG+ZCUlhRyg+REkI1xxBXTo\nAGedFXckUgVqaM4BunJZYvP772HoijVrwpzKSghZT0khy6mUILFZuhSOOgouvxw++ijuaKSaqPoo\ni2nAO4nN4sVh2OuPPoJnn4UDDog7IqkmSgpZTD2OJBYLF0K3bjBhAgwaBCecEHdEUo2UFLKUqo0k\nNlOmhKErhgyBY46JOxqpZkoKWUjVRhKLVavC1ckHHADffgubbRZ3RJICamjOMkoIEouffoK99oLH\nHw/LSgg5SyWFLKKEILGYMwc6dQp/W7WKOxpJMSWFLKGEILH4/vuQEH75Bd56Cw48MO6IJMWUFLKE\nehpJ2i1eDIccAgsWwIgRsO++cUckaaCkICKla9gQLr4YDjoICgrijkbSREkhC6j7qaTVV1+Fq5UL\nCuDSS+OORtJMSSELqOpI0uaLL6BzZ9h883C/Zs24I5I0U5dUEQkmTYJDD4VatcIUmkoIeUlJQURg\n/PjQy2iTTeC992DnneOOSGKi6qMMp/YESYv77oNGjWDUKGjRIu5oJEZKChlM1yZIyrmDGQwcCL/9\nBttsE3dEEjNVH2UwTZ4jKTVyJOy/P8yfDxtvrIQgQJJJwcxqm9mOqQ5GRNJk+PAwQc6SJVBUFHc0\nkkEqTApmdhQwGXg7Wt7DzF5OdWD5Tm0JkjKvvQY9e4bG5FGjYKut4o5IMkgyJYWbgH2AhQDuPglQ\nqSHFVHUkKfHmm3DccdCuXag+atIk7ogkwySTFArdfWGJdZ6KYEQkxdq3hxNPhHfeCReoiZSQTFL4\n0sxOBGqYWUszuxsYm+K48pqqjqTavfcerF4N224b5lRu1CjuiCRDJZMU+gJ7AWuAl4AVwJ9TGVS+\nU9WRVKvHH4fDDoO77oo7EskCySSFI9z9KnfvEN2uBo5MdWAiUg0efhjOPhu6doX/+7+4o5EskExS\nuK6UdddWdyASqOpIqs2998IFF4Sup0OHhiEsRCpQ5hXNZnYE0A1oamZ3JmxqSKhKkhTQiKhSLX78\nEfr1gz/8AQYNgtq1445IskR5w1z8AnxBaEOYkrB+CXB1KoMSkSradlv48EPYdVfYSCVPSV6ZScHd\nJwITzew5d1+RxpjyUvsbR7BoeaGqjqTy3OGGG2DLLUP7Qbt2cUckWSiZNoWmZjbIzD43s+lrb8k8\nuZl1M7NpZjbDzEotXZjZiWY21cymmNnzGxR9jlg78N23A45SryOpHHe45hq46SaYODEsi1RCMqOk\nPgncAtxO6HV0Jkm0KZhZTeABoCswGxhnZsPcfWrCPq2Ba4AD3H2BmW25wWeQA9SOIFXiDpddBnff\nDX/6EzzwQBj5VKQSkikpbOLuwwHcfaa7XwcclsTjOgIz3H2Wu68CBgHHlNjnXOABd18QPf8vyYee\nG9TbSKrEHS66KCSEiy+GBx+EGhr8WCovmU/PSjMzYKaZ/cnMegDJ/KJvCvyQsDw7WpdoJ2AnM/vA\nzMaaWbfSnsjMzjOz8WY2ft68eUkcOjtovgSpMjPYYQe48sqQGFRCkCpKpvroUqA+cDHwN6ARcFYS\njyvt01myorMW0Bo4FGgG/NfM2pQca8ndBwIDAQoKCnKmslTVRlJpRUUwfXroXXTppXFHIzmkwpKC\nu3/s7kvc/Xt3P83dewLfJfHcs4HmCcvNgB9L2Weouxe6+zfANEKSEJGyrF4Np50G++wDc+bEHY3k\nmHKTgpntbWbHmlnjaHl3M3ua5AbEGwe0jgbRqw30BoaV2OcVovaJ6Bg7AbM28ByyktoSpFIKC6F3\nb3jhBbj2WmhaskZWpGrKTApmdivwHHAK8JaZXQuMAj4jfHmXy91XEwbTGw58Cfzb3aeY2U1m1jPa\nbTjwq5lNjZ77Snf/tSonlC006J1ssJUr4fjj4T//gTvvhKuuijsiyUHltSkcA7R39+Vmtjmh6qe9\nu09L9snd/Q3gjRLrrk+478Bl0S0v6CI1qbT77oNhw+D++zW4naRMeUlhhbsvB3D338zsqw1JCFI6\nNS5Lpf35z9C2LRxxRNyRSA4rr02hlZm9FN1eBlokLL+UrgBF8trSpXDOOfDTT2EMIyUESbHySgp/\nLLF8fyoDEZESFi+G7t1h7Fg45hjo0SPuiCQPlDcg3rvpDCQfqMeRJG3BAujWDT79FAYPVkKQtEnm\n4jWpJmpPkKT8+iscfjhMnhx6GvXsWfFjRKqJkoJIpikqCn+HDoUjNfOtpFfSScHMNnb3lakMJpep\n6kgqNG8ebLppmA9h3DgNbCexqPBTZ2YdzWwy8HW03N7M7kt5ZDlGF6tJuebMgQMPhPPOC8tKCBKT\nZD559wJHA78CuPtnJDd0tkRUSpByffcdHHwwzJ0bup+KxCiZ6qMa7v6drTskb1GK4sk5Gh5byjVr\nFhx2GCxaBG+/HQa5E4lRMknhBzPrCHg0m9pFQFLTceY7JQQpV1FR6Fm0dCmMHAl77hl3RCJJJYUL\nCFVI2wE/A+9E66QC6oIq5apZE/71L6hXD9q1izsaESC5pLDa3XunPJIco3YEKdPkyTBmTBjUbr/9\n4o5GZB3JJIVxZjYNGAy85O5LUhxTTlApQUo1cSJ07Qp16sCpp0KjRnFHJLKOZGZe2wG4BdgLmGxm\nr5iZSg7lUClBSvXJJ9CpU6gueu89JQTJSEl1hnb3D939YmBPYDFh8h0pg65JkPV8+CF06QKbbRaq\njnbYIe6IREqVzMVr9c3sFDN7FfgEmAfsn/LIspRKCVKqKVNgm21CQth++7ijESlTMm0KXwCvAre5\n+39THE/WU1uCrGPJEmjQAM49N7Qh1K0bd0Qi5Uqm+qiVu1+khCCygd56C1q2hI8+CstKCJIFyiwp\nmNkd7n458B8z85Lb3f24lEaWhVR1JMVefRWOPx523x122inuaESSVl710eDor2ZcS5KqjgQIcyD0\n7g0dOsDw4aFxWSRLlDfz2ifR3V3dfZ3EYGZ9Ac3MlkClBAFCVVGvXmEMozfeULdTyTrJtCmcVcq6\ns6s7kGynbqgCQMeOcPPNoT1BCUGyUHltCr2A3kBLM3spYVMDYGGqAxPJKi+8AAcdBM2awTXXxB2N\nSKWV16bwCWEOhWbAAwnrlwATUxlUtlHVUZ576CG48EK44AJ48MG4oxGpkvLaFL4BviGMiirlUANz\nHrvnHrjkEujRA+66K+5oRKqszDYFM3sv+rvAzH5LuC0ws9/SF6JIhvrnP0NCOO44GDIENt447ohE\nqqy86qO1U242TkcgIlllxQp49tnQ9fTpp2EjVR9Kbiiv+mhNdLc58KO7rzKzA4F2wLOEgfFE8ot7\nmDGtTh0YPToMYVErmdFiRLJDMl1SXyFMxbkD8DSwK/B8SqMSyUTucPXV8Ic/QGFhuChNCUFyTDJJ\nYY27FwLHAXe7+0VA09SGJZJh3OHSS+G226B58zCVpkgOSiYprDazE4DTgNeidapAjag7ah5YsyZM\nnXnPPfDnP8MDD0CNpKYiEck6yV7RfBhh6OxZZtYSeCG1YWUPXcmcB668MlyL8Je/hG6nZnFHJJIy\nFVaIuvsXZnYxsKOZ7QLMcPe/pT40kQxx8smw+ebQr58SguS8CpOCmR0EPAPMAQzY2sxOc/cPUh2c\nSGwKC8Pw18cdB3vtFW4ieSCZ6qO7gO7ufoC77w8cBdyT2rBEYrRqVbj+4I9/hI8/jjsakbRKpj9d\nbXefunbB3b80s9opjClrqJE5B61cCSecEEoJd98dhsAWySPJJIVPzewRQhUSwCloQDxAYx7lnOXL\nwzUIw4eHge0uuCDuiETSLpmk8CfgYuAvhDaFMcB9qQxKJBajR8M778Cjj8LZmjJE8lO5ScHM2gI7\nAC+7+23pCSk7qOooh7iHXkVHHglffQU77hh3RCKxKW+U1H6EIS5OAd42s9JmYMtbuj4hRyxaBF26\nhBICKCFI3iuvpHAK0M7dl5lZE+AN4PH0hJW52t84gkXLC1VKyAULFsARR8DEieGKZREpNymsdPdl\nAO4+z8zy/rr+9jeOAFDjci6YPx+6doWpU+Gll8IkOSJSblJolTA3swE7JM7V7O7HpTSyDKTeRjli\n4UI47DCYMQOGDoVu3eKOSCRjlJcU/lhi+f5UBpLp1LCcQxo2hEMOCdchdO4cdzQiGaW8SXbeTWcg\nmU6lhBwwezasXg0tWsD9ef0bR6RMmiEkCSol5IBvv4VOnaBRI5gwQUNfi5RBSSEJKiVkuZkzQ0JY\nvBgGD1ZCEClH0v8dZrZxKgMRSYlp0+Dgg2HZMhg5EvbeO+6IRDJahUnBzDqa2WTg62i5vZlpmAvJ\nDldeGdoRRo2CDh3ijkYk4yVTUrgXOBr4FcDdPyPMxFYhM+tmZtPMbIaZXV3OfsebmZtZQTLPm05q\nT8hyTz0FY8ZA27ZxRyKSFZJnbDF9AAAeFElEQVRJCjXc/bsS64oqepCZ1QQeAI4EdgNOMrPdStmv\nAWHAvYwcuF7DWWShTz+Fk06CFStgs81g553jjkgkaySTFH4ws46Am1lNM7sEmJ7E4zoSpu6c5e6r\ngEHAMaXsdzNwG7Ai2aBFyvTxx6FR+aOPYN68uKMRyTrJJIULgMuA7YCfgX2jdRVpCvyQsDw7WlfM\nzDoAzd39tfKeyMzOM7PxZjZ+nv7RpSwffBCGrthiC3jvPWjePO6IRLJOhV1S3f0XoHclnru0Gc69\neGMYS+ku4IwkYhgIDAQoKCjwCnavNmpPyCJjxkD37tC0aehl1LRpxY8RkfVUmBTM7F8kfJmv5e7n\nVfDQ2UDiT7VmwI8Jyw2ANsBoMwPYGhhmZj3dfXxFcaWDrk/IIpttBnvtFa5D2HrruKMRyVrJXLz2\nTsL9OsAfWLdaqCzjgNZm1hKYQyhtnLx2o7svAhqvXTaz0cAVmZIQJEtMmwY77RR6F40eHSbLEZFK\nq7BNwd0HJ9yeAo4j9Caq6HGrgb7AcOBL4N/uPsXMbjKznlUNXIRhw6BdOxg4MCwrIYhUWWWGuWgJ\nbJ/Mju7+BmFynsR115ex76GViEXy1ZAhodvpnntCr15xRyOSM5JpU1jA/9oUagC/AWVeiCaSci+8\nAKedBvvsA2++GYbCFpFqUW5SsNAC3J7QJgCwxt3T1vtHZD3ffw+nnw4HHgivvQb168cdkUhOKbdN\nIUoAL7t7UXTLm4Sg7qgZarvtQlvCG28oIYikQDIXr31iZnumPJIMo+EtMsxDD4WSAYTpMzfZJN54\nRHJUmUnBzNZWLR1ISAzTzOxTM5toZp+mJzwR4K674MIL4Zln4o5EJOeV16bwCbAncGyaYhFZ34AB\ncM01cPzx8OyzcUcjkvPKSwoG4O4z0xSLyLpuugn69w9dT59+GmppokCRVCvvv6yJmV1W1kZ3vzMF\n8YgE7vDzz6Gn0WOPQc2acUckkhfKSwo1gfqUPrBdTlPPoxi5hyGvt9wS7osm+NOcyiJpU15SmOvu\nN6UtkgyigfBi4g6XXAIvvQQTJoTEICJpVd5PsLwrIUiM1qyBCy6Ae++FE06AJk3ijkgkL5WXFDqn\nLYoMoqqjGBQVwTnnwCOPwNVXwx13aHA7kZiUWX3k7r+lM5BMoaqjGPzjH/DEE6GnUf/+SggiMVIf\nP4lf376wzTZw5plxRyKS99StQ+KxalW4DuH338Mop0oIIhlBSUHSb8UK+OMfQ1XRW2/FHY2IJFD1\nUQI1MqfB8uVw7LEwYgQ8/DAcd1zcEYlIAiWFBGpkTrFly6BHjzCX8uOPq8pIJAMpKUj6zJ0L06aF\ncYxOPTXuaESkFEoKknq//w5168KOO8L06VCvXtwRiUgZ1NAsqfXbb3DwwXDddWFZCUEkoykpRNTI\nnALz50OnTjB5Muy3X9zRiEgSVH1ESAiApt+sTj//DJ07w8yZ8OqrcLheW5FsoKSAeh1Vu9WroWtX\n+OYbeP31UFoQkaygpCDVr1YtuP562GorOOiguKMRkQ2gpCDV59tvQ/tBjx5hTmURyTp5nxTUwFxN\nZswI1UQrV4Z2hPr1445IRCoh75OC2hOqwVdfhUbllSvhnXeUEESyWN4nBamiL76ALl3CVJqjR0Ob\nNnFHJCJVoKQgVfOf/0CNGjByJOyyS9zRiEgV6eI1qZyiovD3+uth4kQlBJEcoaQgG27sWNh99zC4\nnVnoeioiOUFJQTbM+++HC9NWrw6D3IlITlFSkOSNHg1HHAFNm8J778F228UdkYhUs7xOCrpGYQOM\nHQvdu0OLFiE5NG0ad0QikgJ5nRQWLS/UIHjJatsW+vQJCWHrreOORkRSJK+TgiRh5EhYsiTMg/Dw\nw9CkSdwRiUgKKSlI2V58MbQhXHtt3JGISJooKUjpnnsOeveGffeFW26JOxoRSRMlBVnfk0/CaafB\nIYfAm29Cw4ZxRyQiaaKkIOtatgz++tcwntFrr2lwO5E8o7GP5H/cQ4PymDGwzTZQp07cEYlImqmk\nIMEdd8All4TE0LKlEoJInlJSELj1VrjiCpg7938D3YlIXlJSyGfucOON0K8fnHwyPP98mF9ZRPJW\n3iYFDXEB3HBDuJ1xBjz9tBKCiORvUtAQF0BBAVx4ITz2GNSsGXc0IpIB8jYp5K01a2DcuHC/Rw94\n4IEwc5qICEoK+WXNGrjggnCV8qRJcUcjIhlIlcj5oqgIzjknXK3crx+0bx93RCKSgVJaUjCzbmY2\nzcxmmNnVpWy/zMymmtnnZvaumW2fynjy1urVYdjrJ58MvY1uuSVMoykiUkLKkoKZ1QQeAI4EdgNO\nMrPdSuw2EShw93bAEOC2VMWT1156KXQ3/fvf4frrlRBEpEyprD7qCMxw91kAZjYIOAaYunYHdx+V\nsP9Y4NQUxpO/TjghDFtx0EFxRyIiGS6V1UdNgR8SlmdH68pyNvBmCuPJLytWhCqjL74IJQMlBBFJ\nQiqTQml1FF7qjmanAgXAP8vYfp6ZjTez8fPmzavGEHPU779Dz57wzDMwfnzc0YhIFkllUpgNNE9Y\nbgb8WHInM+sCXAv0dPeVpT2Ruw909wJ3L2ii6SDLt3QpHHUUvPMOPP54uFpZRCRJqUwK44DWZtbS\nzGoDvYFhiTuYWQfgEUJC+CWFseSHJUugW7cw9PUzz8CZZ8YdkYhkmZQ1NLv7ajPrCwwHagKPu/sU\nM7sJGO/uwwjVRfWBFy30iPne3XumKqacV6tWmCVt0KDQuCwisoFSevGau78BvFFi3fUJ97uk8vh5\n47ffwlAVm24Kr7+uLqciUml5OcxFTo2QOm8eHHYYHHNMGApbCUFEqiAvh7lYtLyQbwccFXcYVffT\nT9C5M8yaBcOGKSGISJXlZVLICXPmQKdOMHs2vPFGKC2IiFSRkkK2Ov30MH3m8OFw4IFxRyMiOUJJ\nIVsNHAjz50PHjnFHIiI5JC8bmrPW11+HYa/XrIFWrZQQRKTaKSlkiy+/hEMOgX/9K7QjiIikgJJC\nNvjiCzj00FBCGD0attsu7ohEJEcpKWS6SZNCQqhVC957D3bfPe6IRCSHKSlkul9+gc03Dwlh553j\njkZEcpx6H2WqX3+FLbaAww+HKVNgoxy5AltEMppKCplozJjQu2jIkLCshCAiaaKkkGlGjoQjj4Rt\nt4X99487GhHJM0oKmWT48DBBTqtWoZfRttvGHZGI5BklhUwxc2aYQnOXXWDUKNhqq7gjEpE8pKSQ\nKXbYAe6/H959Fxo3jjsaEclTSgpxGzIEPv003D/33ND9VEQkJkoKcXr2WejVC26+Oe5IREQAJYX4\nPP449OkTrlZ+9tm4oxERAZQU4vHII3D22dC1K7z2GtSrF3dEIiKAkkL6uYdEcNRRMHQo1K0bd0Qi\nIsU0zEU6rVgBderAiy9CjRpQu3bcEYmIrEMlhXT5299gv/1g0aKQGJQQRCQD5V1SaH/jCBrVTeNY\nQu7Qvz9cdx20aaP2AxHJaHlXfbRoeSHfDjgqPQdzh2uugX/8A848M8yaVrNmeo4tIlIJeVdSSKvb\nbgsJ4U9/gkcfVUIQkYyXdyWFtDr5ZCgshGuvBbO4oxERqZBKCtVtzRp44gkoKoLmzUNbghKCiGQJ\nJYXqVFQEZ50VbsOGxR2NiMgGU/VRdVm9Ogxb8cILcNNN8Ic/xB2RiMgGU1KoDqtWhfaD//wHBgyA\nq66KOyIRkUpRUqgOU6fCm2/CnXfCpZfGHY0IhYWFzJ49mxUrVsQdiqRZnTp1aNasGRtVcm53JYWq\nWLMmDFexxx4wfTo0bRp3RCIAzJ49mwYNGtCiRQtMHR3yhrvz66+/Mnv2bFq2bFmp51BDc2X9/jt0\n6wYDB4ZlJQTJICtWrGCLLbZQQsgzZsYWW2xRpRKikkJlLF0K3buHqTPr1Ik7GpFSKSHkp6q+76o+\n2lCLFoWE8PHHYXKck06KOyIRkWqjksKGWLUKDj8cPvkEBg9WQhApR82aNdljjz1o06YNPXr0YOHC\nhcXbpkyZQqdOndhpp51o3bo1N998M+5evP3NN9+koKCAXXfdlV122YUrrrii1GO88sor3HTTTSk/\nl8pydy6++GJ23HFH2rVrx6dr52Mv4YUXXqBt27a0a9eObt26MX/+/OJt9913HzvvvDO77747f/nL\nXwCYPHkyZ5xxRuqCzqbbXnvt5VWx/VWvVenx/s9/ug8dWrXnEEmxqVOnxh2C16tXr/h+nz59/JZb\nbnF3999//91btWrlw4cPd3f3ZcuWebdu3fz+++93d/fJkyd7q1at/Msvv3R398LCQn/ggQdKPcZ+\n++3n8+bNSzqmwsLCSp1LZb3++uverVs3X7NmjX/00UfesWPHUmNq0qRJ8XlceeWV3r9/f3d3Hzly\npHfu3NlXrFjh7u4///xz8eM6d+7s3333XanHLe39B8Z7Et+xeVVSqPSw2b/8AhMmhPtXXAE9e1Zv\nYCI5br/99mPOnDkAPP/88xxwwAEcfvjhAGyyySbcf//9DBgwAIDbbruNa6+9ll122QWAWrVqceGF\nF673nNOnT2fjjTemcePGALz66qvss88+dOjQgS5duvDzzz8DcMMNN3Deeedx+OGH06dPH4qKirjy\nyivZe++9adeuHY888ggAS5cupXPnzuy55560bduWoUOHVvm8hw4dSp8+fTAz9t13XxYuXMjcuXPX\n2Wftl/GyZctwdxYvXsy2224LwEMPPcTVV1/NxhtvDMCWW25Z/LgePXowaNCgKsdYUl61KVRq2Oy5\nc6FzZ1i4EGbO1PSZkpVaXP16tT9nsv9LRUVFvPvuu5x99tlAqDraa6+91tlnhx12YOnSpSxevJgv\nvviCyy+/vMLn/eCDD9hzzz2Llw888EDGjh2LmfHoo49y2223cccddwAwYcIE3n//ferWrcvAgQNp\n1KgR48aNY+XKlcUJqnnz5rz88ss0bNiQ+fPns++++9KzZ8/1Gm579erFtGnT1ovnsssuo0+fPuus\nmzNnDs2bNy9ebtasGXPmzGGbbbYpXrfRRhvx0EMP0bZtW+rVq0fr1q154IEHgJD4/vvf/3LttddS\np04dbr/9dvbee28ACgoKGDBgQHGVUnXJq6SwwebMgU6dwt/XX1dCkKyVtjlEEixfvpw99tiDb7/9\nlr322ouuXbsC4ZdxWT1kNqTnzNy5c2nSpEnx8uzZs+nVqxdz585l1apV6/TT79mzJ3Wj/98RI0bw\n+eefM2TIEAAWLVrE119/TbNmzejXrx9jxoyhRo0azJkzh59//pmtt956neMOHjw46Rg9oZ2krHMs\nLCzkoYceYuLEibRq1YqLLrqIW2+9leuuu47Vq1ezYMECxo4dy7hx4zjxxBOZNWsWZsaWW27Jjz/+\nmHQsycqr6qMN8t13cPDBoaQwfDgcckjcEYlklbp16zJp0iS+++47Vq1aVfzrd/fdd2f8+PHr7Dtr\n1izq169PgwYN2H333Zmwtrq2gudP7I9/0UUX0bdvXyZPnswjjzyyzrZ6CTMeujv33XcfkyZNYtKk\nSXzzzTccfvjhPPfcc8ybN48JEyYwadIkttpqq1L7+/fq1Ys99thjvdvTTz+93r7NmjXjhx9+KF6e\nPXt2cdXQWpMmTQJCacnMOPHEE/nwww+LH3/cccdhZnTs2JEaNWoUN0KvWLGiONFVJyWFstx+O/z6\nK7z9NhxwQNzRiGStRo0ace+993L77bdTWFjIKaecwvvvv88777wDhBLFxRdfXFwNcuWVV/L3v/+d\n6dOnA7BmzRruvPPO9Z531113ZcaMGcXLixYtoml0EelTTz1VZjxHHHEEDz30EIWFhUCoolm2bBmL\nFi1iyy23ZKONNmLUqFF89913pT5+8ODBxQkl8Vay6ghCCeXpp5/G3Rk7diyNGjVap+oIoGnTpkyd\nOpV58+YB8Pbbb7PrrrsCcOyxxzJy5MjiOFetWlXchjJ9+nTatGlT5nlWlpJCWe64A8aOhX32iTsS\nkazXoUMH2rdvz6BBg6hbty5Dhw7llltuYeedd6Zt27bsvffe9O3bF4B27dpx9913c9JJJ7HrrrvS\npk2b9RpnAQ4++GAmTpxYXEVzww03cMIJJ3DQQQcVf3GW5pxzzmG33XZjzz33pE2bNpx//vmsXr2a\nU045hfHjx1NQUMBzzz1X3NBdFd27d6dVq1bsuOOOnHvuuTz44IPF2/bYYw8Att12W/r378/BBx9M\nu3btmDRpEv369QPgrLPOYtasWbRp04bevXvz1FNPFVc/jRo1iqOOqv5qQSutziuTFRQUeMmiZ7Ja\nXP16+XWrX34JF18Mzz8PCXWVItnmyy+/LP61mcv+/Oc/06NHD7p06RJ3KGm1cuVKDjnkEN5//31q\n1Vq/abi099/MJrh7QUXPrZLCWpMnh3aDL74I1UYikvH69evH77//HncYaff9998zYMCAUhNCVan3\nEcDEidC1axjHaORI2GmnuCMSkSRstdVW9MzD64Zat25N69atU/LcKil8+mnodlqvHrz3nhKCiOQ1\nJYVttoF994UxY2CHHeKORkQkVvmbFCZPDvMqb7NNmDVt++3jjkhEJHb5mRTefTd0Nf3rX+OORCRn\n1a9fv1KPe/LJJ4u7p0r6pTQpmFk3M5tmZjPM7OpStm9sZoOj7R+bWYtUxgOEq5OPPjpUFV1yScoP\nJyKSTVKWFMysJvAAcCSwG3CSme1WYrezgQXuviNwF/CPVMUD0HnGx2GE0112gVGjYKutUnk4EQFG\njx7N0UcfXbzct29fnnzySQDGjRvH/vvvT/v27enYsSNLlixZ57Gvv/46++233zrzC0hqpbJLakdg\nhrvPAjCzQcAxwNSEfY4BbojuDwHuNzPzVFxRt3gxt79+N7RvH0oLm21W7YcQyViHHrr+uhNPhAsv\nDPONd+++/vYzzgi3+fPh+OPX3TZ6dJVDWrVqFb169WLw4MHsvffeLF68eJ2xfF5++WXuvPNO3njj\nDTbT/2vapDIpNAV+SFieDZQcM6J4H3dfbWaLgC2AdX4WmNl5wHkA2223XeWiadiQzca8G7qcNmpU\nuecQkWozbdo0ttlmm+KhoBs2bFi8bdSoUYwfP54RI0ass15SL5VJobQxcEuWAJLZB3cfCAyEMMxF\npSOKPnwieae8X/abbFL+9saNq1QyqFWrFmvWrCleXjvyaHlDaLdq1YpZs2Yxffp0CgoqHJlBqlEq\nG5pnA80TlpsBJQf/Lt7HzGoBjYDfUhiTiKTZ9ttvz9SpU1m5ciWLFi3i3XffBWCXXXbhxx9/ZNy4\ncQAsWbKE1atXFz/mpZdeok+fPkyZMiW22PNRKpPCOKC1mbU0s9pAb2BYiX2GAadH948HRqakPUFE\nYtO8eXNOPPFE2rVrxymnnEKHDh0AqF27NoMHD+aiiy6iffv2dO3adZ35C3beeWeee+45TjjhBGbO\nnBlX+HknpaOkmll34G6gJvC4u//NzG4iTCA9zMzqAM8AHQglhN5rG6bLUpVRUkXyRb6Mkiqlq8oo\nqSkdEM/d3wDeKLHu+oT7K4ATUhmDiIgkLz+vaBYRkVIpKYiISDElBZEcpT4b+amq77uSgkgOqlOn\nDr/++qsSQ55xd3799Vfq1KlT6efQzGsiOahZs2bMnj2befPmxR2KpFmdOnVo1qxZpR+vpCCSgzba\naCNatmwZdxiShVR9JCIixZQURESkmJKCiIgUS+kwF6lgZvOA7yr58MaUGJY7D+ic84POOT9U5Zy3\nd/cmFe2UdUmhKsxsfDJjf+QSnXN+0Dnnh3Scs6qPRESkmJKCiIgUy7ekMDDuAGKgc84POuf8kPJz\nzqs2BRERKV++lRRERKQcOZkUzKybmU0zsxlmdnUp2zc2s8HR9o/NrEX6o6xeSZzzZWY21cw+N7N3\nzWz7OOKsThWdc8J+x5uZm1nW91RJ5pzN7MTovZ5iZs+nO8bqlsRnezszG2VmE6PPd/c44qwuZva4\nmf1iZl+Usd3M7N7o9fjczPas1gDcPaduhKk/ZwKtgNrAZ8BuJfa5EHg4ut8bGBx33Gk458OATaL7\nF+TDOUf7NQDGAGOBgrjjTsP73BqYCGwWLW8Zd9xpOOeBwAXR/d2Ab+OOu4rnfDCwJ/BFGdu7A28C\nBuwLfFydx8/FkkJHYIa7z3L3VcAg4JgS+xwDPBXdHwJ0NjNLY4zVrcJzdvdR7v57tDgWqPwwipkh\nmfcZ4GbgNmBFKduyTTLnfC7wgLsvAHD3X9IcY3VL5pwdaBjdbwT8mMb4qp27jyHMWV+WY4CnPRgL\nbGpm21TX8XMxKTQFfkhYnh2tK3Ufd18NLAK2SEt0qZHMOSc6m/BLI5tVeM5m1gFo7u6vpTOwFErm\nfd4J2MnMPjCzsWbWLW3RpUYy53wDcKqZzSbMCX9RekKLzYb+v2+QXBw6u7Rf/CW7WCWzTzZJ+nzM\n7FSgADgkpRGlXrnnbGY1gLuAM9IVUBok8z7XIlQhHUooDf7XzNq4+8IUx5YqyZzzScCT7n6Hme0H\nPBOd85rUhxeLlH5/5WJJYTbQPGG5GesXJ4v3MbNahCJnecW1TJfMOWNmXYBrgZ7uvjJNsaVKRefc\nAGgDjDazbwl1r8OyvLE52c/2UHcvdPdvgGmEJJGtkjnns4F/A7j7R0AdwhhBuSqp//fKysWkMA5o\nbWYtzaw2oSF5WIl9hgGnR/ePB0Z61IKTpSo856gq5RFCQsj2emao4JzdfZG7N3b3Fu7egtCO0tPd\nx8cTbrVI5rP9CqFTAWbWmFCdNCutUVavZM75e6AzgJntSkgKuTzl3DCgT9QLaV9gkbvPra4nz7nq\nI3dfbWZ9geGEnguPu/sUM7sJGO/uw4DHCEXMGYQSQu/4Iq66JM/5n0B94MWoTf17d+8ZW9BVlOQ5\n55Qkz3k4cLiZTQWKgCvd/df4oq6aJM/5cuBfZnYpoRrljGz+kWdmLxCq/xpH7ST9gY0A3P1hQrtJ\nd2AG8DtwZrUeP4tfOxERqWa5WH0kIiKVpKQgIiLFlBRERKSYkoKIiBRTUhARkWJKCpJxzKzIzCYl\n3FqUs2+LskaT3MBjjo5G4vwsGiJi50o8x5/MrE90/wwz2zZh26Nmtls1xznOzPZI4jGXmNkmVT22\n5AclBclEy919j4Tbt2k67inu3p4wWOI/N/TB7v6wuz8dLZ4BbJuw7Rx3n1otUf4vzgdJLs5LACUF\nSYqSgmSFqETwXzP7NLrtX8o+u5vZJ1Hp4nMzax2tPzVh/SNmVrOCw40Bdowe2zkap39yNM79xtH6\nAfa/+Sluj9bdYGZXmNnxhPGlnouOWTf6hV9gZheY2W0JMZ9hZvdVMs6PSBgIzcweMrPxFuZRuDFa\ndzEhOY0ys1HRusPN7KPodXzRzOpXcBzJI0oKkonqJlQdvRyt+wXo6u57Ar2Ae0t53J+Ae9x9D8KX\n8uxo2INewAHR+iLglAqO3wOYbGZ1gCeBXu7eljACwAVmtjnwB2B3d28H3JL4YHcfAown/KLfw92X\nJ2weAhyXsNwLGFzJOLsRhrVY61p3LwDaAYeYWTt3v5cwLs5h7n5YNPTFdUCX6LUcD1xWwXEkj+Tc\nMBeSE5ZHX4yJNgLuj+rQiwhj+pT0EXCtmTUDXnL3r82sM7AXMC4a3qMuIcGU5jkzWw58Sxh+eWfg\nG3efHm1/Cvg/4H7C/AyPmtnrQNJDc7v7PDObFY1Z83V0jA+i592QOOsRhn1InHXrRDM7j/B/vQ1h\nwpnPSzx232j9B9FxahNeNxFASUGyx6XAz0B7Qgl3vUlz3P15M/sYOAoYbmbnEIYZfsrdr0niGKck\nDphnZqXOsRGNx9ORMAhbb6Av0GkDzmUwcCLwFfCyu7uFb+ik4yTMQDYAeAA4zsxaAlcAe7v7AjN7\nkjAwXEkGvO3uJ21AvJJHVH0k2aIRMDcaI/80wq/kdZhZK2BWVGUyjFCN8i5wvJltGe2zuSU/P/VX\nQAsz2zFaPg14L6qDb+TubxAacUvrAbSEMHx3aV4CjiXMAzA4WrdBcbp7IaEaaN+o6qkhsAxYZGZb\nAUeWEctY4IC152Rmm5hZaaUuyVNKCpItHgRON7OxhKqjZaXs0wv4wswmAbsQpiycSvjyHGFmnwNv\nE6pWKuTuKwgjUL5oZpOBNcDDhC/Y16Lne49QiinpSeDhtQ3NJZ53ATAV2N7dP4nWbXCcUVvFHcAV\n7v4ZYW7mKcDjhCqptQYCb5rZKHefR+gZ9UJ0nLGE10oE0CipIiKSQCUFEREppqQgIiLFlBRERKSY\nkoKIiBRTUhARkWJKCiIiUkxJQUREiikpiIhIsf8Hs5uwXg7/QzkAAAAASUVORK5CYII=\n", 848 | "text/plain": [ 849 | "" 850 | ] 851 | }, 852 | "metadata": {}, 853 | "output_type": "display_data" 854 | } 855 | ], 856 | "source": [ 857 | "# 综合以上的参数调优,重新构建一个新的模型\n", 858 | "clf = GradientBoostingClassifier(n_estimators=90, learning_rate=0.1, min_samples_split=300, min_samples_leaf=20,\n", 859 | " max_depth=5, max_features=1.0, subsample=0.8, random_state=10)\n", 860 | "clf.fit(X_train, y_train)\n", 861 | "y_predict_proba = clf.predict_proba(X_test)[:,1]\n", 862 | "fpr, tpr, thresholds = roc_curve(y_test, y_predict_proba)\n", 863 | "roc_auc = auc(fpr, tpr)\n", 864 | "fig = plt.figure(figsize=(6, 6))\n", 865 | "ax = fig.add_subplot(111)\n", 866 | "ax.plot(fpr, tpr, lw=1, label='ROC (area = %.2f)' % (roc_auc))\n", 867 | "ax.plot([0, 1], [0, 1], '--', color='red', label='luck')\n", 868 | "ax.set_xlabel('False Positive Rate')\n", 869 | "ax.set_ylabel('True Positive Rate')\n", 870 | "ax.set_title('Receive Operating Characteristic')\n", 871 | "ax.legend(loc='lower right')\n", 872 | "plt.show()" 873 | ] 874 | }, 875 | { 876 | "cell_type": "code", 877 | "execution_count": null, 878 | "metadata": { 879 | "collapsed": true 880 | }, 881 | "outputs": [], 882 | "source": [] 883 | } 884 | ], 885 | "metadata": { 886 | "kernelspec": { 887 | "display_name": "Python 3", 888 | "language": "python", 889 | "name": "python3" 890 | }, 891 | "language_info": { 892 | "codemirror_mode": { 893 | "name": "ipython", 894 | "version": 3 895 | }, 896 | "file_extension": ".py", 897 | "mimetype": "text/x-python", 898 | "name": "python", 899 | "nbconvert_exporter": "python", 900 | "pygments_lexer": "ipython3", 901 | "version": "3.6.3" 902 | } 903 | }, 904 | "nbformat": 4, 905 | "nbformat_minor": 2 906 | } 907 | -------------------------------------------------------------------------------- /bank churn model - preprocessing.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": { 7 | "collapsed": true 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "import numpy as np\n", 12 | "import pandas as pd\n", 13 | "import random\n", 14 | "import operator\n", 15 | "import numbers" 16 | ] 17 | }, 18 | { 19 | "cell_type": "code", 20 | "execution_count": 2, 21 | "metadata": {}, 22 | "outputs": [ 23 | { 24 | "data": { 25 | "text/html": [ 26 | "
\n", 27 | "\n", 40 | "\n", 41 | " \n", 42 | " \n", 43 | " \n", 44 | " \n", 45 | " \n", 46 | " \n", 47 | " \n", 48 | " \n", 49 | " \n", 50 | " \n", 51 | " \n", 52 | " \n", 53 | " \n", 54 | " \n", 55 | " \n", 56 | " \n", 57 | " \n", 58 | " \n", 59 | " \n", 60 | " \n", 61 | " \n", 62 | " \n", 63 | " \n", 64 | " \n", 65 | " \n", 66 | " \n", 67 | " \n", 68 | " \n", 69 | " \n", 70 | " \n", 71 | " \n", 72 | " \n", 73 | " \n", 74 | " \n", 75 | " \n", 76 | " \n", 77 | " \n", 78 | " \n", 79 | " \n", 80 | " \n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | " \n", 108 | " \n", 109 | " \n", 110 | " \n", 111 | " \n", 112 | " \n", 113 | " \n", 114 | " \n", 115 | " \n", 116 | " \n", 117 | " \n", 118 | " \n", 119 | " \n", 120 | " \n", 121 | " \n", 122 | " \n", 123 | " \n", 124 | " \n", 125 | " \n", 126 | " \n", 127 | " \n", 128 | " \n", 129 | " \n", 130 | " \n", 131 | " \n", 132 | " \n", 133 | " \n", 134 | " \n", 135 | " \n", 136 | " \n", 137 | " \n", 138 | " \n", 139 | " \n", 140 | " \n", 141 | " \n", 142 | " \n", 143 | " \n", 144 | " \n", 145 | " \n", 146 | " \n", 147 | " \n", 148 | " \n", 149 | " \n", 150 | " \n", 151 | " \n", 152 | " \n", 153 | " \n", 154 | " \n", 155 | " \n", 156 | " \n", 157 | " \n", 158 | " \n", 159 | " \n", 160 | " \n", 161 | " \n", 162 | " \n", 163 | " \n", 164 | " \n", 165 | " \n", 166 | " \n", 167 | " \n", 168 | " \n", 169 | " \n", 170 | " \n", 171 | " \n", 172 | " \n", 173 | " \n", 174 | " \n", 175 | " \n", 176 | " \n", 177 | " \n", 178 | " \n", 179 | " \n", 180 | " \n", 181 | " \n", 182 | " \n", 183 | " \n", 184 | " \n", 185 | " \n", 186 | " \n", 187 | " \n", 188 | " \n", 189 | "
CUST_IDOPEN_ACC_DURAGEGENDER_CDHASNT_HOME_ADDRESS_INFHASNT_MOBILE_TEL_NUM_INFLOCAL_CUR_SAV_SLOPELOCAL_CUR_MON_AVG_BALLOCAL_CUR_MON_AVG_BAL_PROPLOCAL_CUR_ACCT_NUM...educ1proptypepcownerethnickid0_2kid3_5kid6_10kid11_15kid16_17car_buy
01231821YN0.0000000.000.0000000...2.0NaNYSUUUUUNew
1248421YN1.00569220149.040.2644353...NaNNaNNaNNUUUUUNew
23102312YN0.00056217.811.0000001...NaNAYFUUYUUNew
3462782YN-0.55091229359.211.0000001...NaNNaNNaNNUUUUUUNKNOWN
4579461YN0.00028834.681.0000001...NaNAYUUUUUYNew
\n", 190 | "

5 rows × 127 columns

\n", 191 | "
" 192 | ], 193 | "text/plain": [ 194 | " CUST_ID OPEN_ACC_DUR AGE GENDER_CD HASNT_HOME_ADDRESS_INF \\\n", 195 | "0 1 231 82 1 Y \n", 196 | "1 2 48 42 1 Y \n", 197 | "2 3 102 31 2 Y \n", 198 | "3 4 62 78 2 Y \n", 199 | "4 5 79 46 1 Y \n", 200 | "\n", 201 | " HASNT_MOBILE_TEL_NUM_INF LOCAL_CUR_SAV_SLOPE LOCAL_CUR_MON_AVG_BAL \\\n", 202 | "0 N 0.000000 0.00 \n", 203 | "1 N 1.005692 20149.04 \n", 204 | "2 N 0.000562 17.81 \n", 205 | "3 N -0.550912 29359.21 \n", 206 | "4 N 0.000288 34.68 \n", 207 | "\n", 208 | " LOCAL_CUR_MON_AVG_BAL_PROP LOCAL_CUR_ACCT_NUM ... educ1 proptype \\\n", 209 | "0 0.000000 0 ... 2.0 NaN \n", 210 | "1 0.264435 3 ... NaN NaN \n", 211 | "2 1.000000 1 ... NaN A \n", 212 | "3 1.000000 1 ... NaN NaN \n", 213 | "4 1.000000 1 ... NaN A \n", 214 | "\n", 215 | " pcowner ethnic kid0_2 kid3_5 kid6_10 kid11_15 kid16_17 car_buy \n", 216 | "0 Y S U U U U U New \n", 217 | "1 NaN N U U U U U New \n", 218 | "2 Y F U U Y U U New \n", 219 | "3 NaN N U U U U U UNKNOWN \n", 220 | "4 Y U U U U U Y New \n", 221 | "\n", 222 | "[5 rows x 127 columns]" 223 | ] 224 | }, 225 | "execution_count": 2, 226 | "metadata": {}, 227 | "output_type": "execute_result" 228 | } 229 | ], 230 | "source": [ 231 | "bank_churn = pd.read_csv('./bankChurn.csv', encoding='utf-8')\n", 232 | "external_data = pd.read_csv('./ExternalData.csv', encoding='utf-8')\n", 233 | "df = pd.merge(bank_churn, external_data, on='CUST_ID')\n", 234 | "df.head()" 235 | ] 236 | }, 237 | { 238 | "cell_type": "code", 239 | "execution_count": 3, 240 | "metadata": { 241 | "collapsed": true 242 | }, 243 | "outputs": [], 244 | "source": [ 245 | "model_data = df.copy()\n", 246 | "indep_cols = list(model_data.columns)\n", 247 | "indep_cols.remove('CHURN_CUST_IND')\n", 248 | "indep_cols.remove('CUST_ID')" 249 | ] 250 | }, 251 | { 252 | "cell_type": "code", 253 | "execution_count": 4, 254 | "metadata": {}, 255 | "outputs": [ 256 | { 257 | "data": { 258 | "text/html": [ 259 | "
\n", 260 | "\n", 273 | "\n", 274 | " \n", 275 | " \n", 276 | " \n", 277 | " \n", 278 | " \n", 279 | " \n", 280 | " \n", 281 | " \n", 282 | " \n", 283 | " \n", 284 | " \n", 285 | " \n", 286 | " \n", 287 | " \n", 288 | " \n", 289 | " \n", 290 | " \n", 291 | " \n", 292 | " \n", 293 | " \n", 294 | " \n", 295 | " \n", 296 | " \n", 297 | " \n", 298 | " \n", 299 | " \n", 300 | " \n", 301 | " \n", 302 | " \n", 303 | " \n", 304 | " \n", 305 | " \n", 306 | " \n", 307 | " \n", 308 | " \n", 309 | " \n", 310 | " \n", 311 | " \n", 312 | " \n", 313 | " \n", 314 | " \n", 315 | " \n", 316 | " \n", 317 | " \n", 318 | " \n", 319 | " \n", 320 | " \n", 321 | " \n", 322 | " \n", 323 | " \n", 324 | " \n", 325 | " \n", 326 | " \n", 327 | " \n", 328 | " \n", 329 | " \n", 330 | " \n", 331 | " \n", 332 | " \n", 333 | " \n", 334 | " \n", 335 | " \n", 336 | " \n", 337 | " \n", 338 | " \n", 339 | " \n", 340 | " \n", 341 | " \n", 342 | " \n", 343 | " \n", 344 | " \n", 345 | " \n", 346 | " \n", 347 | " \n", 348 | " \n", 349 | " \n", 350 | " \n", 351 | " \n", 352 | " \n", 353 | " \n", 354 | " \n", 355 | " \n", 356 | " \n", 357 | " \n", 358 | " \n", 359 | " \n", 360 | " \n", 361 | " \n", 362 | " \n", 363 | " \n", 364 | " \n", 365 | " \n", 366 | " \n", 367 | " \n", 368 | " \n", 369 | " \n", 370 | " \n", 371 | " \n", 372 | " \n", 373 | " \n", 374 | " \n", 375 | " \n", 376 | " \n", 377 | " \n", 378 | " \n", 379 | " \n", 380 | " \n", 381 | " \n", 382 | " \n", 383 | " \n", 384 | " \n", 385 | " \n", 386 | " \n", 387 | " \n", 388 | " \n", 389 | " \n", 390 | " \n", 391 | " \n", 392 | " \n", 393 | " \n", 394 | " \n", 395 | " \n", 396 | " \n", 397 | " \n", 398 | " \n", 399 | " \n", 400 | " \n", 401 | " \n", 402 | " \n", 403 | " \n", 404 | " \n", 405 | " \n", 406 | " \n", 407 | " \n", 408 | " \n", 409 | " \n", 410 | " \n", 411 | " \n", 412 | " \n", 413 | " \n", 414 | " \n", 415 | " \n", 416 | " \n", 417 | " \n", 418 | " \n", 419 | " \n", 420 | " \n", 421 | " \n", 422 | " \n", 423 | " \n", 424 | " \n", 425 | " \n", 426 | " \n", 427 | " \n", 428 | " \n", 429 | " \n", 430 | "
countuniquetopfreqmissing_pct
GENDER_CD172413288750.000000
HASNT_HOME_ADDRESS_INF172412Y147650.000000
HASNT_MOBILE_TEL_NUM_INF172412N117530.000000
crclscod1724147AA62870.000000
asl_flag172412N145760.000000
last_swap76909807/5/2001350.553970
dwlltype119522S85950.306769
marital169355U62370.017748
wrkwoman22301Y22300.870657
proptype49656A44670.712024
pcowner32321Y32320.812540
ethnic1693517N58660.017748
kid0_2169352U162690.017748
kid3_5169352U161650.017748
kid6_10169352U155000.017748
kid11_15169352U153780.017748
kid16_17169352U152770.017748
car_buy169352UNKNOWN96590.017748
\n", 431 | "
" 432 | ], 433 | "text/plain": [ 434 | " count unique top freq missing_pct\n", 435 | "GENDER_CD 17241 3 2 8875 0.000000\n", 436 | "HASNT_HOME_ADDRESS_INF 17241 2 Y 14765 0.000000\n", 437 | "HASNT_MOBILE_TEL_NUM_INF 17241 2 N 11753 0.000000\n", 438 | "crclscod 17241 47 AA 6287 0.000000\n", 439 | "asl_flag 17241 2 N 14576 0.000000\n", 440 | "last_swap 7690 980 7/5/2001 35 0.553970\n", 441 | "dwlltype 11952 2 S 8595 0.306769\n", 442 | "marital 16935 5 U 6237 0.017748\n", 443 | "wrkwoman 2230 1 Y 2230 0.870657\n", 444 | "proptype 4965 6 A 4467 0.712024\n", 445 | "pcowner 3232 1 Y 3232 0.812540\n", 446 | "ethnic 16935 17 N 5866 0.017748\n", 447 | "kid0_2 16935 2 U 16269 0.017748\n", 448 | "kid3_5 16935 2 U 16165 0.017748\n", 449 | "kid6_10 16935 2 U 15500 0.017748\n", 450 | "kid11_15 16935 2 U 15378 0.017748\n", 451 | "kid16_17 16935 2 U 15277 0.017748\n", 452 | "car_buy 16935 2 UNKNOWN 9659 0.017748" 453 | ] 454 | }, 455 | "execution_count": 4, 456 | "metadata": {}, 457 | "output_type": "execute_result" 458 | } 459 | ], 460 | "source": [ 461 | "object_df = model_data.select_dtypes(include=['object']).describe().T.assign(missing_pct = model_data.apply(lambda x: (len(x) - x.count()) / float(len(x))))\n", 462 | "object_df" 463 | ] 464 | }, 465 | { 466 | "cell_type": "code", 467 | "execution_count": 5, 468 | "metadata": {}, 469 | "outputs": [], 470 | "source": [ 471 | "# 去除属性值过多的属性\n", 472 | "model_data.drop(['crclscod', 'ethnic'], axis=1, inplace=True)\n", 473 | "# 去除缺失值过多或无意义的属性\n", 474 | "model_data.drop(['last_swap'], axis=1, inplace=True)\n", 475 | "# wrkwoman表示是否有职场女性、proptype表示房屋类型、pcowner表示家里是否有电脑 这些属性在实际中均有很强的决策意义,以此不能删除\n", 476 | "# 针对这些变量可以采用对缺失值用一个哑变量表示" 477 | ] 478 | }, 479 | { 480 | "cell_type": "code", 481 | "execution_count": 6, 482 | "metadata": {}, 483 | "outputs": [ 484 | { 485 | "data": { 486 | "text/html": [ 487 | "
\n", 488 | "\n", 501 | "\n", 502 | " \n", 503 | " \n", 504 | " \n", 505 | " \n", 506 | " \n", 507 | " \n", 508 | " \n", 509 | " \n", 510 | " \n", 511 | " \n", 512 | " \n", 513 | " \n", 514 | " \n", 515 | " \n", 516 | " \n", 517 | " \n", 518 | " \n", 519 | " \n", 520 | " \n", 521 | " \n", 522 | " \n", 523 | " \n", 524 | " \n", 525 | " \n", 526 | " \n", 527 | " \n", 528 | " \n", 529 | " \n", 530 | " \n", 531 | " \n", 532 | " \n", 533 | " \n", 534 | " \n", 535 | " \n", 536 | " \n", 537 | " \n", 538 | " \n", 539 | " \n", 540 | " \n", 541 | " \n", 542 | " \n", 543 | " \n", 544 | " \n", 545 | " \n", 546 | " \n", 547 | " \n", 548 | " \n", 549 | " \n", 550 | " \n", 551 | " \n", 552 | " \n", 553 | " \n", 554 | " \n", 555 | " \n", 556 | " \n", 557 | " \n", 558 | " \n", 559 | " \n", 560 | " \n", 561 | " \n", 562 | " \n", 563 | " \n", 564 | " \n", 565 | " \n", 566 | " \n", 567 | " \n", 568 | " \n", 569 | " \n", 570 | " \n", 571 | " \n", 572 | " \n", 573 | " \n", 574 | " \n", 575 | " \n", 576 | " \n", 577 | " \n", 578 | " \n", 579 | " \n", 580 | " \n", 581 | " \n", 582 | " \n", 583 | " \n", 584 | " \n", 585 | " \n", 586 | " \n", 587 | " \n", 588 | " \n", 589 | " \n", 590 | " \n", 591 | " \n", 592 | " \n", 593 | " \n", 594 | " \n", 595 | " \n", 596 | " \n", 597 | " \n", 598 | " \n", 599 | " \n", 600 | " \n", 601 | " \n", 602 | " \n", 603 | " \n", 604 | " \n", 605 | " \n", 606 | " \n", 607 | " \n", 608 | " \n", 609 | " \n", 610 | " \n", 611 | " \n", 612 | " \n", 613 | " \n", 614 | " \n", 615 | " \n", 616 | " \n", 617 | " \n", 618 | " \n", 619 | " \n", 620 | " \n", 621 | " \n", 622 | " \n", 623 | " \n", 624 | " \n", 625 | " \n", 626 | " \n", 627 | " \n", 628 | " \n", 629 | " \n", 630 | " \n", 631 | " \n", 632 | " \n", 633 | " \n", 634 | " \n", 635 | " \n", 636 | " \n", 637 | " \n", 638 | " \n", 639 | " \n", 640 | " \n", 641 | " \n", 642 | " \n", 643 | " \n", 644 | " \n", 645 | " \n", 646 | " \n", 647 | " \n", 648 | " \n", 649 | " \n", 650 | " \n", 651 | " \n", 652 | " \n", 653 | " \n", 654 | " \n", 655 | " \n", 656 | " \n", 657 | " \n", 658 | " \n", 659 | " \n", 660 | " \n", 661 | " \n", 662 | " \n", 663 | " \n", 664 | " \n", 665 | " \n", 666 | " \n", 667 | " \n", 668 | " \n", 669 | " \n", 670 | " \n", 671 | " \n", 672 | " \n", 673 | " \n", 674 | " \n", 675 | " \n", 676 | " \n", 677 | " \n", 678 | " \n", 679 | " \n", 680 | " \n", 681 | " \n", 682 | " \n", 683 | " \n", 684 | " \n", 685 | " \n", 686 | " \n", 687 | " \n", 688 | " \n", 689 | " \n", 690 | " \n", 691 | " \n", 692 | " \n", 693 | " \n", 694 | " \n", 695 | " \n", 696 | " \n", 697 | " \n", 698 | " \n", 699 | " \n", 700 | " \n", 701 | " \n", 702 | " \n", 703 | " \n", 704 | " \n", 705 | " \n", 706 | " \n", 707 | " \n", 708 | " \n", 709 | " \n", 710 | " \n", 711 | " \n", 712 | " \n", 713 | " \n", 714 | " \n", 715 | " \n", 716 | " \n", 717 | " \n", 718 | " \n", 719 | " \n", 720 | " \n", 721 | " \n", 722 | " \n", 723 | " \n", 724 | " \n", 725 | " \n", 726 | " \n", 727 | " \n", 728 | " \n", 729 | " \n", 730 | " \n", 731 | " \n", 732 | " \n", 733 | " \n", 734 | " \n", 735 | " \n", 736 | " \n", 737 | " \n", 738 | " \n", 739 | " \n", 740 | " \n", 741 | " \n", 742 | " \n", 743 | " \n", 744 | " \n", 745 | " \n", 746 | " \n", 747 | " \n", 748 | " \n", 749 | " \n", 750 | " \n", 751 | " \n", 752 | " \n", 753 | " \n", 754 | " \n", 755 | " \n", 756 | " \n", 757 | " \n", 758 | " \n", 759 | " \n", 760 | " \n", 761 | " \n", 762 | " \n", 763 | " \n", 764 | " \n", 765 | " \n", 766 | " \n", 767 | " \n", 768 | " \n", 769 | " \n", 770 | " \n", 771 | " \n", 772 | " \n", 773 | " \n", 774 | " \n", 775 | " \n", 776 | " \n", 777 | " \n", 778 | " \n", 779 | " \n", 780 | " \n", 781 | " \n", 782 | " \n", 783 | " \n", 784 | " \n", 785 | " \n", 786 | " \n", 787 | " \n", 788 | " \n", 789 | " \n", 790 | " \n", 791 | " \n", 792 | " \n", 793 | " \n", 794 | " \n", 795 | " \n", 796 | " \n", 797 | " \n", 798 | " \n", 799 | " \n", 800 | " \n", 801 | " \n", 802 | " \n", 803 | " \n", 804 | " \n", 805 | " \n", 806 | " \n", 807 | " \n", 808 | " \n", 809 | " \n", 810 | " \n", 811 | " \n", 812 | " \n", 813 | " \n", 814 | " \n", 815 | " \n", 816 | " \n", 817 | " \n", 818 | " \n", 819 | " \n", 820 | " \n", 821 | " \n", 822 | " \n", 823 | " \n", 824 | " \n", 825 | " \n", 826 | " \n", 827 | " \n", 828 | " \n", 829 | " \n", 830 | " \n", 831 | " \n", 832 | " \n", 833 | " \n", 834 | " \n", 835 | " \n", 836 | " \n", 837 | " \n", 838 | " \n", 839 | " \n", 840 | " \n", 841 | " \n", 842 | " \n", 843 | " \n", 844 | " \n", 845 | " \n", 846 | " \n", 847 | " \n", 848 | " \n", 849 | " \n", 850 | " \n", 851 | " \n", 852 | " \n", 853 | " \n", 854 | " \n", 855 | " \n", 856 | " \n", 857 | " \n", 858 | " \n", 859 | " \n", 860 | " \n", 861 | " \n", 862 | " \n", 863 | " \n", 864 | " \n", 865 | " \n", 866 | " \n", 867 | " \n", 868 | " \n", 869 | " \n", 870 | " \n", 871 | " \n", 872 | " \n", 873 | " \n", 874 | " \n", 875 | " \n", 876 | " \n", 877 | " \n", 878 | " \n", 879 | " \n", 880 | " \n", 881 | " \n", 882 | " \n", 883 | " \n", 884 | " \n", 885 | " \n", 886 | " \n", 887 | " \n", 888 | " \n", 889 | " \n", 890 | " \n", 891 | " \n", 892 | " \n", 893 | " \n", 894 | " \n", 895 | " \n", 896 | " \n", 897 | " \n", 898 | " \n", 899 | " \n", 900 | " \n", 901 | " \n", 902 | " \n", 903 | " \n", 904 | " \n", 905 | " \n", 906 | " \n", 907 | " \n", 908 | " \n", 909 | " \n", 910 | " \n", 911 | " \n", 912 | " \n", 913 | " \n", 914 | "
countmeanstdmin25%50%75%maxmissing_pct
CUST_ID17241.08621.0000004977.1923311.04311.08621.012931.017241.00.0
OPEN_ACC_DUR17241.080.10475034.9443200.056.075.0104.0278.00.0
AGE17241.049.33948115.4312824.038.047.059.0110.00.0
LOCAL_CUR_ACCT_NUM17241.01.7968803.5030540.01.01.02.0399.00.0
LOCAL_CUR_TRANS_TX_NUM17241.00.1778904.1258160.00.00.00.0429.00.0
LOCAL_CUR_LASTSAV_TX_NUM17241.00.4253812.3195460.00.00.00.0112.00.0
LOCAL_CUR_WITHDRAW_TX_NUM17241.00.8235602.5779860.00.00.01.0138.00.0
LOCAL_FIX_OPEN_ACC_TX_NUM17241.00.1599100.4706050.00.00.00.07.00.0
LOCAL_FIX_WITHDRAW_TX_NUM17241.00.0615390.4966070.00.00.00.032.00.0
LOCAL_FIX_CLOSE_ACC_TX_NUM17241.00.1085780.4520350.00.00.00.015.00.0
L6M_INDFINA_ALL_TX_NUM17241.00.0675710.9238490.00.00.00.082.00.0
POS_CONSUME_TX_AMT17241.01050.97691523755.5779440.00.00.00.01794863.00.0
POS_CONSUME_TX_NUM17241.00.1068960.8195690.00.00.00.050.00.0
ATM_ACCT_TX_NUM17241.00.4096054.6819210.00.00.00.0257.00.0
ATM_ACCT_TX_AMT17241.05632.666551208094.1478770.00.00.00.018501490.00.0
ATM_NOT_ACCT_TX_NUM17241.00.1881560.8724190.00.00.00.031.00.0
ATM_ALL_TX_NUM17241.00.5977614.9745490.00.00.00.0261.00.0
COUNTER_NOT_ACCT_TX_NUM17241.00.3223130.8702980.00.00.00.024.00.0
COUNTER_ACCT_TX_AMT17241.032726.390987153197.5404830.00.00.012000.06402993.00.0
COUNTER_ACCT_TX_NUM17241.01.3033474.0933810.00.00.02.0307.00.0
COUNTER_ALL_TX_NUM17241.01.6256604.5608930.00.00.02.0331.00.0
NAT_DEBT_OPEN_ACC_DUR17241.0-1262.14836710929.030591-95877.00.00.00.085.00.0
FINA_OPEN_ACC_DUR17241.0-1017.5579149825.499880-95877.00.00.00.024.00.0
FUND_OPEN_ACC_DUR17241.0-967.3070019583.391244-95877.00.00.00.043.00.0
TELEBANK_ALL_TX_NUM17241.00.0000000.0000000.00.00.00.00.00.0
CHURN_CUST_IND17241.00.1009800.3013110.00.00.00.01.00.0
unan_vce_Range17241.021.10765035.9177860.04.011.025.01395.00.0
unan_dat_Range17241.00.0649610.8995760.00.00.00.083.00.0
callfwdv_Range17241.00.0259270.8090770.00.00.00.059.00.0
totcalls17241.02976.0273774147.0620690.0853.01792.03564.098874.00.0
avg3mou17241.0546.483441554.6524670.0166.0376.0746.05821.00.0
avg3qty17241.0190.337799205.3712960.059.0131.0249.03261.00.0
avg3rev17241.060.08607446.0466751.034.049.072.0835.00.0
\n", 915 | "
" 916 | ], 917 | "text/plain": [ 918 | " count mean std min \\\n", 919 | "CUST_ID 17241.0 8621.000000 4977.192331 1.0 \n", 920 | "OPEN_ACC_DUR 17241.0 80.104750 34.944320 0.0 \n", 921 | "AGE 17241.0 49.339481 15.431282 4.0 \n", 922 | "LOCAL_CUR_ACCT_NUM 17241.0 1.796880 3.503054 0.0 \n", 923 | "LOCAL_CUR_TRANS_TX_NUM 17241.0 0.177890 4.125816 0.0 \n", 924 | "LOCAL_CUR_LASTSAV_TX_NUM 17241.0 0.425381 2.319546 0.0 \n", 925 | "LOCAL_CUR_WITHDRAW_TX_NUM 17241.0 0.823560 2.577986 0.0 \n", 926 | "LOCAL_FIX_OPEN_ACC_TX_NUM 17241.0 0.159910 0.470605 0.0 \n", 927 | "LOCAL_FIX_WITHDRAW_TX_NUM 17241.0 0.061539 0.496607 0.0 \n", 928 | "LOCAL_FIX_CLOSE_ACC_TX_NUM 17241.0 0.108578 0.452035 0.0 \n", 929 | "L6M_INDFINA_ALL_TX_NUM 17241.0 0.067571 0.923849 0.0 \n", 930 | "POS_CONSUME_TX_AMT 17241.0 1050.976915 23755.577944 0.0 \n", 931 | "POS_CONSUME_TX_NUM 17241.0 0.106896 0.819569 0.0 \n", 932 | "ATM_ACCT_TX_NUM 17241.0 0.409605 4.681921 0.0 \n", 933 | "ATM_ACCT_TX_AMT 17241.0 5632.666551 208094.147877 0.0 \n", 934 | "ATM_NOT_ACCT_TX_NUM 17241.0 0.188156 0.872419 0.0 \n", 935 | "ATM_ALL_TX_NUM 17241.0 0.597761 4.974549 0.0 \n", 936 | "COUNTER_NOT_ACCT_TX_NUM 17241.0 0.322313 0.870298 0.0 \n", 937 | "COUNTER_ACCT_TX_AMT 17241.0 32726.390987 153197.540483 0.0 \n", 938 | "COUNTER_ACCT_TX_NUM 17241.0 1.303347 4.093381 0.0 \n", 939 | "COUNTER_ALL_TX_NUM 17241.0 1.625660 4.560893 0.0 \n", 940 | "NAT_DEBT_OPEN_ACC_DUR 17241.0 -1262.148367 10929.030591 -95877.0 \n", 941 | "FINA_OPEN_ACC_DUR 17241.0 -1017.557914 9825.499880 -95877.0 \n", 942 | "FUND_OPEN_ACC_DUR 17241.0 -967.307001 9583.391244 -95877.0 \n", 943 | "TELEBANK_ALL_TX_NUM 17241.0 0.000000 0.000000 0.0 \n", 944 | "CHURN_CUST_IND 17241.0 0.100980 0.301311 0.0 \n", 945 | "unan_vce_Range 17241.0 21.107650 35.917786 0.0 \n", 946 | "unan_dat_Range 17241.0 0.064961 0.899576 0.0 \n", 947 | "callfwdv_Range 17241.0 0.025927 0.809077 0.0 \n", 948 | "totcalls 17241.0 2976.027377 4147.062069 0.0 \n", 949 | "avg3mou 17241.0 546.483441 554.652467 0.0 \n", 950 | "avg3qty 17241.0 190.337799 205.371296 0.0 \n", 951 | "avg3rev 17241.0 60.086074 46.046675 1.0 \n", 952 | "\n", 953 | " 25% 50% 75% max missing_pct \n", 954 | "CUST_ID 4311.0 8621.0 12931.0 17241.0 0.0 \n", 955 | "OPEN_ACC_DUR 56.0 75.0 104.0 278.0 0.0 \n", 956 | "AGE 38.0 47.0 59.0 110.0 0.0 \n", 957 | "LOCAL_CUR_ACCT_NUM 1.0 1.0 2.0 399.0 0.0 \n", 958 | "LOCAL_CUR_TRANS_TX_NUM 0.0 0.0 0.0 429.0 0.0 \n", 959 | "LOCAL_CUR_LASTSAV_TX_NUM 0.0 0.0 0.0 112.0 0.0 \n", 960 | "LOCAL_CUR_WITHDRAW_TX_NUM 0.0 0.0 1.0 138.0 0.0 \n", 961 | "LOCAL_FIX_OPEN_ACC_TX_NUM 0.0 0.0 0.0 7.0 0.0 \n", 962 | "LOCAL_FIX_WITHDRAW_TX_NUM 0.0 0.0 0.0 32.0 0.0 \n", 963 | "LOCAL_FIX_CLOSE_ACC_TX_NUM 0.0 0.0 0.0 15.0 0.0 \n", 964 | "L6M_INDFINA_ALL_TX_NUM 0.0 0.0 0.0 82.0 0.0 \n", 965 | "POS_CONSUME_TX_AMT 0.0 0.0 0.0 1794863.0 0.0 \n", 966 | "POS_CONSUME_TX_NUM 0.0 0.0 0.0 50.0 0.0 \n", 967 | "ATM_ACCT_TX_NUM 0.0 0.0 0.0 257.0 0.0 \n", 968 | "ATM_ACCT_TX_AMT 0.0 0.0 0.0 18501490.0 0.0 \n", 969 | "ATM_NOT_ACCT_TX_NUM 0.0 0.0 0.0 31.0 0.0 \n", 970 | "ATM_ALL_TX_NUM 0.0 0.0 0.0 261.0 0.0 \n", 971 | "COUNTER_NOT_ACCT_TX_NUM 0.0 0.0 0.0 24.0 0.0 \n", 972 | "COUNTER_ACCT_TX_AMT 0.0 0.0 12000.0 6402993.0 0.0 \n", 973 | "COUNTER_ACCT_TX_NUM 0.0 0.0 2.0 307.0 0.0 \n", 974 | "COUNTER_ALL_TX_NUM 0.0 0.0 2.0 331.0 0.0 \n", 975 | "NAT_DEBT_OPEN_ACC_DUR 0.0 0.0 0.0 85.0 0.0 \n", 976 | "FINA_OPEN_ACC_DUR 0.0 0.0 0.0 24.0 0.0 \n", 977 | "FUND_OPEN_ACC_DUR 0.0 0.0 0.0 43.0 0.0 \n", 978 | "TELEBANK_ALL_TX_NUM 0.0 0.0 0.0 0.0 0.0 \n", 979 | "CHURN_CUST_IND 0.0 0.0 0.0 1.0 0.0 \n", 980 | "unan_vce_Range 4.0 11.0 25.0 1395.0 0.0 \n", 981 | "unan_dat_Range 0.0 0.0 0.0 83.0 0.0 \n", 982 | "callfwdv_Range 0.0 0.0 0.0 59.0 0.0 \n", 983 | "totcalls 853.0 1792.0 3564.0 98874.0 0.0 \n", 984 | "avg3mou 166.0 376.0 746.0 5821.0 0.0 \n", 985 | "avg3qty 59.0 131.0 249.0 3261.0 0.0 \n", 986 | "avg3rev 34.0 49.0 72.0 835.0 0.0 " 987 | ] 988 | }, 989 | "execution_count": 6, 990 | "metadata": {}, 991 | "output_type": "execute_result" 992 | } 993 | ], 994 | "source": [ 995 | "int_df = model_data.select_dtypes(include=['int64']).describe().T.assign(missing_pct=model_data.apply(lambda x: (len(x) - x.count())/float(len(x))))\n", 996 | "int_df" 997 | ] 998 | }, 999 | { 1000 | "cell_type": "code", 1001 | "execution_count": 7, 1002 | "metadata": {}, 1003 | "outputs": [], 1004 | "source": [ 1005 | "del model_data['CUST_ID']" 1006 | ] 1007 | }, 1008 | { 1009 | "cell_type": "code", 1010 | "execution_count": 8, 1011 | "metadata": {}, 1012 | "outputs": [ 1013 | { 1014 | "data": { 1015 | "text/html": [ 1016 | "
\n", 1017 | "\n", 1030 | "\n", 1031 | " \n", 1032 | " \n", 1033 | " \n", 1034 | " \n", 1035 | " \n", 1036 | " \n", 1037 | " \n", 1038 | " \n", 1039 | " \n", 1040 | " \n", 1041 | " \n", 1042 | " \n", 1043 | " \n", 1044 | " \n", 1045 | " \n", 1046 | " \n", 1047 | " \n", 1048 | " \n", 1049 | " \n", 1050 | " \n", 1051 | " \n", 1052 | " \n", 1053 | " \n", 1054 | " \n", 1055 | " \n", 1056 | " \n", 1057 | " \n", 1058 | " \n", 1059 | " \n", 1060 | " \n", 1061 | " \n", 1062 | " \n", 1063 | " \n", 1064 | " \n", 1065 | " \n", 1066 | " \n", 1067 | " \n", 1068 | " \n", 1069 | " \n", 1070 | " \n", 1071 | " \n", 1072 | " \n", 1073 | " \n", 1074 | " \n", 1075 | " \n", 1076 | " \n", 1077 | " \n", 1078 | " \n", 1079 | " \n", 1080 | " \n", 1081 | " \n", 1082 | " \n", 1083 | " \n", 1084 | " \n", 1085 | " \n", 1086 | " \n", 1087 | " \n", 1088 | " \n", 1089 | " \n", 1090 | " \n", 1091 | " \n", 1092 | " \n", 1093 | " \n", 1094 | " \n", 1095 | " \n", 1096 | " \n", 1097 | " \n", 1098 | " \n", 1099 | " \n", 1100 | " \n", 1101 | " \n", 1102 | " \n", 1103 | " \n", 1104 | " \n", 1105 | " \n", 1106 | " \n", 1107 | " \n", 1108 | " \n", 1109 | " \n", 1110 | " \n", 1111 | " \n", 1112 | " \n", 1113 | " \n", 1114 | " \n", 1115 | " \n", 1116 | " \n", 1117 | " \n", 1118 | " \n", 1119 | " \n", 1120 | " \n", 1121 | " \n", 1122 | " \n", 1123 | " \n", 1124 | " \n", 1125 | " \n", 1126 | " \n", 1127 | " \n", 1128 | " \n", 1129 | " \n", 1130 | " \n", 1131 | " \n", 1132 | " \n", 1133 | " \n", 1134 | " \n", 1135 | " \n", 1136 | " \n", 1137 | " \n", 1138 | " \n", 1139 | " \n", 1140 | " \n", 1141 | " \n", 1142 | " \n", 1143 | " \n", 1144 | " \n", 1145 | " \n", 1146 | " \n", 1147 | " \n", 1148 | " \n", 1149 | " \n", 1150 | " \n", 1151 | " \n", 1152 | " \n", 1153 | " \n", 1154 | " \n", 1155 | " \n", 1156 | " \n", 1157 | " \n", 1158 | " \n", 1159 | " \n", 1160 | " \n", 1161 | " \n", 1162 | " \n", 1163 | " \n", 1164 | " \n", 1165 | " \n", 1166 | " \n", 1167 | " \n", 1168 | " \n", 1169 | " \n", 1170 | " \n", 1171 | " \n", 1172 | " \n", 1173 | " \n", 1174 | " \n", 1175 | " \n", 1176 | " \n", 1177 | " \n", 1178 | " \n", 1179 | " \n", 1180 | " \n", 1181 | " \n", 1182 | " \n", 1183 | " \n", 1184 | " \n", 1185 | " \n", 1186 | " \n", 1187 | " \n", 1188 | " \n", 1189 | " \n", 1190 | " \n", 1191 | " \n", 1192 | " \n", 1193 | " \n", 1194 | " \n", 1195 | " \n", 1196 | " \n", 1197 | " \n", 1198 | " \n", 1199 | " \n", 1200 | " \n", 1201 | " \n", 1202 | " \n", 1203 | " \n", 1204 | " \n", 1205 | " \n", 1206 | " \n", 1207 | " \n", 1208 | " \n", 1209 | " \n", 1210 | " \n", 1211 | " \n", 1212 | " \n", 1213 | " \n", 1214 | " \n", 1215 | " \n", 1216 | " \n", 1217 | " \n", 1218 | " \n", 1219 | " \n", 1220 | " \n", 1221 | " \n", 1222 | " \n", 1223 | " \n", 1224 | " \n", 1225 | " \n", 1226 | " \n", 1227 | " \n", 1228 | " \n", 1229 | " \n", 1230 | " \n", 1231 | " \n", 1232 | " \n", 1233 | " \n", 1234 | " \n", 1235 | " \n", 1236 | " \n", 1237 | " \n", 1238 | " \n", 1239 | " \n", 1240 | " \n", 1241 | " \n", 1242 | " \n", 1243 | " \n", 1244 | " \n", 1245 | " \n", 1246 | " \n", 1247 | " \n", 1248 | " \n", 1249 | " \n", 1250 | " \n", 1251 | " \n", 1252 | " \n", 1253 | " \n", 1254 | " \n", 1255 | " \n", 1256 | " \n", 1257 | " \n", 1258 | " \n", 1259 | " \n", 1260 | " \n", 1261 | " \n", 1262 | " \n", 1263 | " \n", 1264 | " \n", 1265 | " \n", 1266 | " \n", 1267 | " \n", 1268 | " \n", 1269 | " \n", 1270 | " \n", 1271 | " \n", 1272 | " \n", 1273 | " \n", 1274 | " \n", 1275 | " \n", 1276 | " \n", 1277 | " \n", 1278 | " \n", 1279 | " \n", 1280 | " \n", 1281 | " \n", 1282 | " \n", 1283 | " \n", 1284 | " \n", 1285 | " \n", 1286 | " \n", 1287 | " \n", 1288 | " \n", 1289 | " \n", 1290 | " \n", 1291 | " \n", 1292 | " \n", 1293 | " \n", 1294 | " \n", 1295 | " \n", 1296 | " \n", 1297 | " \n", 1298 | " \n", 1299 | " \n", 1300 | " \n", 1301 | " \n", 1302 | " \n", 1303 | " \n", 1304 | " \n", 1305 | " \n", 1306 | " \n", 1307 | " \n", 1308 | " \n", 1309 | " \n", 1310 | " \n", 1311 | " \n", 1312 | " \n", 1313 | " \n", 1314 | " \n", 1315 | " \n", 1316 | " \n", 1317 | " \n", 1318 | " \n", 1319 | " \n", 1320 | " \n", 1321 | " \n", 1322 | " \n", 1323 | " \n", 1324 | " \n", 1325 | " \n", 1326 | " \n", 1327 | " \n", 1328 | " \n", 1329 | " \n", 1330 | " \n", 1331 | " \n", 1332 | " \n", 1333 | " \n", 1334 | " \n", 1335 | " \n", 1336 | " \n", 1337 | " \n", 1338 | " \n", 1339 | " \n", 1340 | " \n", 1341 | " \n", 1342 | " \n", 1343 | " \n", 1344 | " \n", 1345 | " \n", 1346 | " \n", 1347 | " \n", 1348 | " \n", 1349 | " \n", 1350 | " \n", 1351 | " \n", 1352 | " \n", 1353 | " \n", 1354 | " \n", 1355 | " \n", 1356 | " \n", 1357 | " \n", 1358 | " \n", 1359 | " \n", 1360 | " \n", 1361 | " \n", 1362 | " \n", 1363 | " \n", 1364 | " \n", 1365 | " \n", 1366 | " \n", 1367 | " \n", 1368 | " \n", 1369 | " \n", 1370 | " \n", 1371 | " \n", 1372 | " \n", 1373 | " \n", 1374 | " \n", 1375 | " \n", 1376 | " \n", 1377 | " \n", 1378 | " \n", 1379 | " \n", 1380 | " \n", 1381 | " \n", 1382 | " \n", 1383 | " \n", 1384 | " \n", 1385 | " \n", 1386 | " \n", 1387 | " \n", 1388 | " \n", 1389 | " \n", 1390 | " \n", 1391 | " \n", 1392 | " \n", 1393 | " \n", 1394 | " \n", 1395 | " \n", 1396 | " \n", 1397 | " \n", 1398 | " \n", 1399 | " \n", 1400 | " \n", 1401 | " \n", 1402 | " \n", 1403 | " \n", 1404 | " \n", 1405 | " \n", 1406 | " \n", 1407 | " \n", 1408 | " \n", 1409 | " \n", 1410 | " \n", 1411 | " \n", 1412 | " \n", 1413 | " \n", 1414 | " \n", 1415 | " \n", 1416 | " \n", 1417 | " \n", 1418 | " \n", 1419 | " \n", 1420 | " \n", 1421 | " \n", 1422 | " \n", 1423 | " \n", 1424 | " \n", 1425 | " \n", 1426 | " \n", 1427 | " \n", 1428 | " \n", 1429 | " \n", 1430 | " \n", 1431 | " \n", 1432 | " \n", 1433 | " \n", 1434 | " \n", 1435 | " \n", 1436 | " \n", 1437 | " \n", 1438 | " \n", 1439 | " \n", 1440 | " \n", 1441 | " \n", 1442 | " \n", 1443 | " \n", 1444 | " \n", 1445 | " \n", 1446 | " \n", 1447 | " \n", 1448 | " \n", 1449 | " \n", 1450 | " \n", 1451 | " \n", 1452 | " \n", 1453 | " \n", 1454 | " \n", 1455 | " \n", 1456 | " \n", 1457 | " \n", 1458 | " \n", 1459 | " \n", 1460 | " \n", 1461 | " \n", 1462 | " \n", 1463 | " \n", 1464 | " \n", 1465 | " \n", 1466 | " \n", 1467 | " \n", 1468 | " \n", 1469 | " \n", 1470 | " \n", 1471 | " \n", 1472 | " \n", 1473 | " \n", 1474 | " \n", 1475 | " \n", 1476 | " \n", 1477 | " \n", 1478 | " \n", 1479 | " \n", 1480 | " \n", 1481 | " \n", 1482 | " \n", 1483 | " \n", 1484 | " \n", 1485 | " \n", 1486 | " \n", 1487 | " \n", 1488 | " \n", 1489 | " \n", 1490 | " \n", 1491 | " \n", 1492 | " \n", 1493 | " \n", 1494 | " \n", 1495 | " \n", 1496 | " \n", 1497 | " \n", 1498 | " \n", 1499 | " \n", 1500 | " \n", 1501 | " \n", 1502 | " \n", 1503 | " \n", 1504 | " \n", 1505 | " \n", 1506 | " \n", 1507 | " \n", 1508 | " \n", 1509 | " \n", 1510 | " \n", 1511 | " \n", 1512 | " \n", 1513 | " \n", 1514 | " \n", 1515 | " \n", 1516 | " \n", 1517 | " \n", 1518 | " \n", 1519 | " \n", 1520 | " \n", 1521 | " \n", 1522 | " \n", 1523 | " \n", 1524 | " \n", 1525 | " \n", 1526 | " \n", 1527 | " \n", 1528 | " \n", 1529 | " \n", 1530 | " \n", 1531 | " \n", 1532 | " \n", 1533 | " \n", 1534 | " \n", 1535 | " \n", 1536 | " \n", 1537 | " \n", 1538 | " \n", 1539 | " \n", 1540 | " \n", 1541 | " \n", 1542 | " \n", 1543 | " \n", 1544 | " \n", 1545 | " \n", 1546 | " \n", 1547 | " \n", 1548 | " \n", 1549 | " \n", 1550 | " \n", 1551 | " \n", 1552 | " \n", 1553 | " \n", 1554 | " \n", 1555 | " \n", 1556 | " \n", 1557 | " \n", 1558 | " \n", 1559 | " \n", 1560 | " \n", 1561 | " \n", 1562 | " \n", 1563 | " \n", 1564 | " \n", 1565 | " \n", 1566 | " \n", 1567 | " \n", 1568 | " \n", 1569 | " \n", 1570 | " \n", 1571 | " \n", 1572 | " \n", 1573 | " \n", 1574 | " \n", 1575 | " \n", 1576 | " \n", 1577 | " \n", 1578 | " \n", 1579 | " \n", 1580 | " \n", 1581 | " \n", 1582 | " \n", 1583 | " \n", 1584 | " \n", 1585 | " \n", 1586 | " \n", 1587 | " \n", 1588 | " \n", 1589 | " \n", 1590 | " \n", 1591 | " \n", 1592 | " \n", 1593 | " \n", 1594 | " \n", 1595 | " \n", 1596 | " \n", 1597 | " \n", 1598 | " \n", 1599 | " \n", 1600 | " \n", 1601 | " \n", 1602 | " \n", 1603 | " \n", 1604 | " \n", 1605 | " \n", 1606 | " \n", 1607 | " \n", 1608 | " \n", 1609 | " \n", 1610 | " \n", 1611 | " \n", 1612 | " \n", 1613 | " \n", 1614 | " \n", 1615 | " \n", 1616 | " \n", 1617 | " \n", 1618 | " \n", 1619 | " \n", 1620 | " \n", 1621 | " \n", 1622 | " \n", 1623 | " \n", 1624 | " \n", 1625 | " \n", 1626 | " \n", 1627 | " \n", 1628 | " \n", 1629 | " \n", 1630 | " \n", 1631 | " \n", 1632 | " \n", 1633 | " \n", 1634 | " \n", 1635 | " \n", 1636 | " \n", 1637 | " \n", 1638 | " \n", 1639 | " \n", 1640 | " \n", 1641 | " \n", 1642 | " \n", 1643 | " \n", 1644 | " \n", 1645 | " \n", 1646 | " \n", 1647 | " \n", 1648 | " \n", 1649 | " \n", 1650 | " \n", 1651 | " \n", 1652 | " \n", 1653 | " \n", 1654 | " \n", 1655 | " \n", 1656 | " \n", 1657 | " \n", 1658 | " \n", 1659 | " \n", 1660 | " \n", 1661 | " \n", 1662 | " \n", 1663 | " \n", 1664 | " \n", 1665 | " \n", 1666 | " \n", 1667 | " \n", 1668 | " \n", 1669 | " \n", 1670 | " \n", 1671 | " \n", 1672 | " \n", 1673 | " \n", 1674 | " \n", 1675 | " \n", 1676 | " \n", 1677 | " \n", 1678 | " \n", 1679 | " \n", 1680 | " \n", 1681 | " \n", 1682 | " \n", 1683 | " \n", 1684 | " \n", 1685 | " \n", 1686 | " \n", 1687 | " \n", 1688 | " \n", 1689 | " \n", 1690 | " \n", 1691 | " \n", 1692 | " \n", 1693 | " \n", 1694 | " \n", 1695 | " \n", 1696 | " \n", 1697 | " \n", 1698 | " \n", 1699 | " \n", 1700 | " \n", 1701 | " \n", 1702 | " \n", 1703 | " \n", 1704 | " \n", 1705 | " \n", 1706 | " \n", 1707 | " \n", 1708 | " \n", 1709 | " \n", 1710 | " \n", 1711 | " \n", 1712 | " \n", 1713 | " \n", 1714 | " \n", 1715 | " \n", 1716 | " \n", 1717 | " \n", 1718 | " \n", 1719 | " \n", 1720 | " \n", 1721 | " \n", 1722 | " \n", 1723 | " \n", 1724 | " \n", 1725 | " \n", 1726 | " \n", 1727 | " \n", 1728 | " \n", 1729 | " \n", 1730 | " \n", 1731 | " \n", 1732 | " \n", 1733 | " \n", 1734 | " \n", 1735 | " \n", 1736 | " \n", 1737 | " \n", 1738 | " \n", 1739 | " \n", 1740 | " \n", 1741 | " \n", 1742 | " \n", 1743 | " \n", 1744 | " \n", 1745 | " \n", 1746 | " \n", 1747 | " \n", 1748 | " \n", 1749 | " \n", 1750 | " \n", 1751 | " \n", 1752 | " \n", 1753 | " \n", 1754 | " \n", 1755 | " \n", 1756 | " \n", 1757 | " \n", 1758 | " \n", 1759 | " \n", 1760 | " \n", 1761 | " \n", 1762 | " \n", 1763 | " \n", 1764 | " \n", 1765 | " \n", 1766 | " \n", 1767 | " \n", 1768 | " \n", 1769 | " \n", 1770 | " \n", 1771 | " \n", 1772 | " \n", 1773 | " \n", 1774 | " \n", 1775 | " \n", 1776 | " \n", 1777 | " \n", 1778 | " \n", 1779 | "
countmeanstdmin25%50%75%maxmissing_pct
LOCAL_CUR_SAV_SLOPE17241.0-0.0116270.407344-1.000000-0.0879960.0000000.0745062.000000e+000.000000
totmou17241.07842.9951529244.8766800.0000002450.0000005098.0000009868.0000001.736084e+050.000000
mou_opkd_Mean17241.01.23028024.9951180.0000000.0000000.0000000.0000002.922043e+030.000000
mou_opkv_Mean17241.0175.394446243.8865480.00000021.87666783.176667229.6566672.687313e+030.000000
opk_dat_Mean17241.00.4184024.2645810.0000000.0000000.0000000.0000002.456667e+020.000000
opk_vce_Mean17241.070.56344498.1286280.00000011.66666736.66666791.6666671.438000e+030.000000
mou_pead_Mean17241.00.6974396.7885000.0000000.0000000.0000000.0000002.902433e+020.000000
mou_peav_Mean17241.0183.094184213.0118990.00000043.940000122.703333242.2966672.994580e+030.000000
peak_dat_Mean17241.00.3578293.8780870.0000000.0000000.0000000.0000002.806667e+020.000000
peak_vce_Mean17241.093.384316107.8817290.00000024.33333363.666667124.3333331.921333e+030.000000
mouiwylisv_Mean17241.019.70869142.6750590.0000000.0000003.98333320.6666679.210700e+020.000000
iwylis_vce_Mean17241.08.61572217.2340110.0000000.0000002.3333339.6666673.446667e+020.000000
mouowylisv_Mean17241.030.14479950.8596670.0000002.86000013.01666736.8800001.490253e+030.000000
owylis_vce_Mean17241.026.18664835.9820140.0000003.66666714.00000035.0000006.443333e+020.000000
mou_rvce_Mean17241.0119.739875170.9386530.0000009.46666754.290000161.3366672.138510e+030.000000
mou_cdat_Mean17241.01.93004028.3306850.0000000.0000000.0000000.0000003.032050e+030.000000
unan_vce_Mean17241.028.92138939.4424980.0000005.33333316.66666737.0000008.143333e+020.000000
unan_dat_Mean17241.00.0314170.4587370.0000000.0000000.0000000.0000004.133333e+010.000000
comp_vce_Mean17241.0114.545889122.2146390.00000031.66666779.666667157.3333331.376667e+030.000000
comp_dat_Mean17241.00.7762317.6449590.0000000.0000000.0000000.0000005.263333e+020.000000
custcare_Mean17241.01.9490565.9582780.0000000.0000000.0000001.6666673.656667e+020.000000
cc_mou_Mean17241.03.99594011.3767200.0000000.0000000.0000003.1900003.091133e+020.000000
avgmou17241.0494.788836452.2984120.000000179.710000365.000000667.3300005.164290e+030.000000
avgqty17241.0179.168816179.2000140.00000064.300000129.170000234.6800002.500890e+030.000000
L6M_INDFINA_ALL_TX_AMT17241.04303.36389576935.0231810.0000000.0000000.0000000.0000003.987167e+060.000000
LOCAL_FIX_CLOSE_ACC_TX_AMT17241.03453.77352220665.1057880.0000000.0000000.0000000.0000007.945000e+050.000000
LOCAL_CUR_MON_AVG_BAL17241.027735.41789998608.1955180.00000051.5300003232.02000023113.4600007.610110e+060.000000
LOCAL_CUR_MON_AVG_BAL_PROP17241.00.3732160.4381300.0000000.0057960.0919141.0000001.000000e+000.000000
LOCAL_OVEONEYR_FF_MON_AVG_BAL17241.072023.920103128981.7840410.0000000.00000050000.00000095000.0000005.561334e+060.000000
LOCAL_FIX_MON_AVG_BAL17241.083907.649074360269.6468600.0000000.00000056677.270000101125.0000004.358232e+070.000000
..............................
LOCAL_FIX_OPEN_ACC_TX_AMT17241.05528.39159433922.6755090.0000000.0000000.0000000.0000002.586000e+060.000000
LOCAL_FIX_WITHDRAW_TX_AMT17241.01706.32359518679.5117730.0000000.0000000.0000000.0000001.398000e+060.000000
SAV_SLOPE17241.0-0.0040430.256375-1.000000-0.0005500.0005330.0353371.904013e+000.000000
mou_cvce_Mean17241.0238.802099267.2749430.00000056.900000157.183333329.5233333.661677e+030.000000
vceovr_Range17205.029.37490258.0985430.0000000.0000002.10000037.0000001.215550e+030.002088
vceovr_Mean17205.012.84871629.2760310.0000000.0000000.60000013.2750006.012125e+020.002088
roam_Range17205.03.12754419.6924470.0000000.0000000.0000000.7800001.561470e+030.002088
datovr_Range17205.00.7171825.9642600.0000000.0000000.0000000.0000003.032000e+020.002088
da_Mean17205.00.9209982.1182370.0000000.0000000.2475000.9900005.766750e+010.002088
ovrrev_Range17205.029.84438158.3655860.0000000.0000003.30000037.4500001.215550e+030.002088
ovrmou_Range17205.089.809416177.6566760.0000000.0000009.000000112.0000003.473000e+030.002088
da_Range17205.01.6715782.9879400.0000000.0000000.9900001.9800006.732000e+010.002088
totmrc_Range17205.08.80117527.1750650.0000000.0000000.0000000.0000005.999800e+020.002088
mou_Range17205.0369.560622420.0421600.000000116.000000242.000000468.0000006.865000e+030.002088
rev_Range17205.042.79928970.0356310.0000001.98000015.75000055.5700001.527970e+030.002088
roam_Mean17205.01.1422336.5449580.0000000.0000000.0000000.2575004.162575e+020.002088
datovr_Mean17205.00.2493002.1326510.0000000.0000000.0000000.0000001.007000e+020.002088
ovrmou_Mean17205.039.81502792.8796580.0000000.0000002.75000041.0000001.887250e+030.002088
ovrrev_Mean17205.013.10187629.4939600.0000000.0000000.97500013.7000006.012125e+020.002088
change_mou17153.0-8.395409248.516433-3875.000000-78.250000-3.50000070.0000003.712250e+030.005104
change_rev17153.0-1.69305234.284273-626.357500-7.082500-0.3125001.6125004.191725e+020.005104
hnd_price17043.0106.80569261.6223799.98999859.989990129.989990149.9899904.999900e+020.011484
forgntvl16935.00.0586360.2349490.0000000.0000000.0000000.0000001.000000e+000.017748
age116935.031.66932422.1296900.0000000.00000036.00000048.0000009.600000e+010.017748
age216935.021.22503723.9037940.0000000.0000000.00000042.0000009.800000e+010.017748
avg6qty16688.0187.114334195.0362240.00000061.000000131.000000246.0000002.673000e+030.032075
avg6mou16688.0531.920662517.2947740.000000173.000000374.000000726.2500005.347000e+030.032075
lor12230.06.3775144.7816500.0000002.0000005.00000010.0000001.500000e+010.290644
pre_hnd_price7649.082.76159560.8031609.98999829.98999059.989990129.9899904.999900e+020.556348
educ12419.01.8685410.8478541.0000001.0000002.0000002.0000004.000000e+000.859695
\n", 1780 | "

76 rows × 9 columns

\n", 1781 | "
" 1782 | ], 1783 | "text/plain": [ 1784 | " count mean std \\\n", 1785 | "LOCAL_CUR_SAV_SLOPE 17241.0 -0.011627 0.407344 \n", 1786 | "totmou 17241.0 7842.995152 9244.876680 \n", 1787 | "mou_opkd_Mean 17241.0 1.230280 24.995118 \n", 1788 | "mou_opkv_Mean 17241.0 175.394446 243.886548 \n", 1789 | "opk_dat_Mean 17241.0 0.418402 4.264581 \n", 1790 | "opk_vce_Mean 17241.0 70.563444 98.128628 \n", 1791 | "mou_pead_Mean 17241.0 0.697439 6.788500 \n", 1792 | "mou_peav_Mean 17241.0 183.094184 213.011899 \n", 1793 | "peak_dat_Mean 17241.0 0.357829 3.878087 \n", 1794 | "peak_vce_Mean 17241.0 93.384316 107.881729 \n", 1795 | "mouiwylisv_Mean 17241.0 19.708691 42.675059 \n", 1796 | "iwylis_vce_Mean 17241.0 8.615722 17.234011 \n", 1797 | "mouowylisv_Mean 17241.0 30.144799 50.859667 \n", 1798 | "owylis_vce_Mean 17241.0 26.186648 35.982014 \n", 1799 | "mou_rvce_Mean 17241.0 119.739875 170.938653 \n", 1800 | "mou_cdat_Mean 17241.0 1.930040 28.330685 \n", 1801 | "unan_vce_Mean 17241.0 28.921389 39.442498 \n", 1802 | "unan_dat_Mean 17241.0 0.031417 0.458737 \n", 1803 | "comp_vce_Mean 17241.0 114.545889 122.214639 \n", 1804 | "comp_dat_Mean 17241.0 0.776231 7.644959 \n", 1805 | "custcare_Mean 17241.0 1.949056 5.958278 \n", 1806 | "cc_mou_Mean 17241.0 3.995940 11.376720 \n", 1807 | "avgmou 17241.0 494.788836 452.298412 \n", 1808 | "avgqty 17241.0 179.168816 179.200014 \n", 1809 | "L6M_INDFINA_ALL_TX_AMT 17241.0 4303.363895 76935.023181 \n", 1810 | "LOCAL_FIX_CLOSE_ACC_TX_AMT 17241.0 3453.773522 20665.105788 \n", 1811 | "LOCAL_CUR_MON_AVG_BAL 17241.0 27735.417899 98608.195518 \n", 1812 | "LOCAL_CUR_MON_AVG_BAL_PROP 17241.0 0.373216 0.438130 \n", 1813 | "LOCAL_OVEONEYR_FF_MON_AVG_BAL 17241.0 72023.920103 128981.784041 \n", 1814 | "LOCAL_FIX_MON_AVG_BAL 17241.0 83907.649074 360269.646860 \n", 1815 | "... ... ... ... \n", 1816 | "LOCAL_FIX_OPEN_ACC_TX_AMT 17241.0 5528.391594 33922.675509 \n", 1817 | "LOCAL_FIX_WITHDRAW_TX_AMT 17241.0 1706.323595 18679.511773 \n", 1818 | "SAV_SLOPE 17241.0 -0.004043 0.256375 \n", 1819 | "mou_cvce_Mean 17241.0 238.802099 267.274943 \n", 1820 | "vceovr_Range 17205.0 29.374902 58.098543 \n", 1821 | "vceovr_Mean 17205.0 12.848716 29.276031 \n", 1822 | "roam_Range 17205.0 3.127544 19.692447 \n", 1823 | "datovr_Range 17205.0 0.717182 5.964260 \n", 1824 | "da_Mean 17205.0 0.920998 2.118237 \n", 1825 | "ovrrev_Range 17205.0 29.844381 58.365586 \n", 1826 | "ovrmou_Range 17205.0 89.809416 177.656676 \n", 1827 | "da_Range 17205.0 1.671578 2.987940 \n", 1828 | "totmrc_Range 17205.0 8.801175 27.175065 \n", 1829 | "mou_Range 17205.0 369.560622 420.042160 \n", 1830 | "rev_Range 17205.0 42.799289 70.035631 \n", 1831 | "roam_Mean 17205.0 1.142233 6.544958 \n", 1832 | "datovr_Mean 17205.0 0.249300 2.132651 \n", 1833 | "ovrmou_Mean 17205.0 39.815027 92.879658 \n", 1834 | "ovrrev_Mean 17205.0 13.101876 29.493960 \n", 1835 | "change_mou 17153.0 -8.395409 248.516433 \n", 1836 | "change_rev 17153.0 -1.693052 34.284273 \n", 1837 | "hnd_price 17043.0 106.805692 61.622379 \n", 1838 | "forgntvl 16935.0 0.058636 0.234949 \n", 1839 | "age1 16935.0 31.669324 22.129690 \n", 1840 | "age2 16935.0 21.225037 23.903794 \n", 1841 | "avg6qty 16688.0 187.114334 195.036224 \n", 1842 | "avg6mou 16688.0 531.920662 517.294774 \n", 1843 | "lor 12230.0 6.377514 4.781650 \n", 1844 | "pre_hnd_price 7649.0 82.761595 60.803160 \n", 1845 | "educ1 2419.0 1.868541 0.847854 \n", 1846 | "\n", 1847 | " min 25% 50% \\\n", 1848 | "LOCAL_CUR_SAV_SLOPE -1.000000 -0.087996 0.000000 \n", 1849 | "totmou 0.000000 2450.000000 5098.000000 \n", 1850 | "mou_opkd_Mean 0.000000 0.000000 0.000000 \n", 1851 | "mou_opkv_Mean 0.000000 21.876667 83.176667 \n", 1852 | "opk_dat_Mean 0.000000 0.000000 0.000000 \n", 1853 | "opk_vce_Mean 0.000000 11.666667 36.666667 \n", 1854 | "mou_pead_Mean 0.000000 0.000000 0.000000 \n", 1855 | "mou_peav_Mean 0.000000 43.940000 122.703333 \n", 1856 | "peak_dat_Mean 0.000000 0.000000 0.000000 \n", 1857 | "peak_vce_Mean 0.000000 24.333333 63.666667 \n", 1858 | "mouiwylisv_Mean 0.000000 0.000000 3.983333 \n", 1859 | "iwylis_vce_Mean 0.000000 0.000000 2.333333 \n", 1860 | "mouowylisv_Mean 0.000000 2.860000 13.016667 \n", 1861 | "owylis_vce_Mean 0.000000 3.666667 14.000000 \n", 1862 | "mou_rvce_Mean 0.000000 9.466667 54.290000 \n", 1863 | "mou_cdat_Mean 0.000000 0.000000 0.000000 \n", 1864 | "unan_vce_Mean 0.000000 5.333333 16.666667 \n", 1865 | "unan_dat_Mean 0.000000 0.000000 0.000000 \n", 1866 | "comp_vce_Mean 0.000000 31.666667 79.666667 \n", 1867 | "comp_dat_Mean 0.000000 0.000000 0.000000 \n", 1868 | "custcare_Mean 0.000000 0.000000 0.000000 \n", 1869 | "cc_mou_Mean 0.000000 0.000000 0.000000 \n", 1870 | "avgmou 0.000000 179.710000 365.000000 \n", 1871 | "avgqty 0.000000 64.300000 129.170000 \n", 1872 | "L6M_INDFINA_ALL_TX_AMT 0.000000 0.000000 0.000000 \n", 1873 | "LOCAL_FIX_CLOSE_ACC_TX_AMT 0.000000 0.000000 0.000000 \n", 1874 | "LOCAL_CUR_MON_AVG_BAL 0.000000 51.530000 3232.020000 \n", 1875 | "LOCAL_CUR_MON_AVG_BAL_PROP 0.000000 0.005796 0.091914 \n", 1876 | "LOCAL_OVEONEYR_FF_MON_AVG_BAL 0.000000 0.000000 50000.000000 \n", 1877 | "LOCAL_FIX_MON_AVG_BAL 0.000000 0.000000 56677.270000 \n", 1878 | "... ... ... ... \n", 1879 | "LOCAL_FIX_OPEN_ACC_TX_AMT 0.000000 0.000000 0.000000 \n", 1880 | "LOCAL_FIX_WITHDRAW_TX_AMT 0.000000 0.000000 0.000000 \n", 1881 | "SAV_SLOPE -1.000000 -0.000550 0.000533 \n", 1882 | "mou_cvce_Mean 0.000000 56.900000 157.183333 \n", 1883 | "vceovr_Range 0.000000 0.000000 2.100000 \n", 1884 | "vceovr_Mean 0.000000 0.000000 0.600000 \n", 1885 | "roam_Range 0.000000 0.000000 0.000000 \n", 1886 | "datovr_Range 0.000000 0.000000 0.000000 \n", 1887 | "da_Mean 0.000000 0.000000 0.247500 \n", 1888 | "ovrrev_Range 0.000000 0.000000 3.300000 \n", 1889 | "ovrmou_Range 0.000000 0.000000 9.000000 \n", 1890 | "da_Range 0.000000 0.000000 0.990000 \n", 1891 | "totmrc_Range 0.000000 0.000000 0.000000 \n", 1892 | "mou_Range 0.000000 116.000000 242.000000 \n", 1893 | "rev_Range 0.000000 1.980000 15.750000 \n", 1894 | "roam_Mean 0.000000 0.000000 0.000000 \n", 1895 | "datovr_Mean 0.000000 0.000000 0.000000 \n", 1896 | "ovrmou_Mean 0.000000 0.000000 2.750000 \n", 1897 | "ovrrev_Mean 0.000000 0.000000 0.975000 \n", 1898 | "change_mou -3875.000000 -78.250000 -3.500000 \n", 1899 | "change_rev -626.357500 -7.082500 -0.312500 \n", 1900 | "hnd_price 9.989998 59.989990 129.989990 \n", 1901 | "forgntvl 0.000000 0.000000 0.000000 \n", 1902 | "age1 0.000000 0.000000 36.000000 \n", 1903 | "age2 0.000000 0.000000 0.000000 \n", 1904 | "avg6qty 0.000000 61.000000 131.000000 \n", 1905 | "avg6mou 0.000000 173.000000 374.000000 \n", 1906 | "lor 0.000000 2.000000 5.000000 \n", 1907 | "pre_hnd_price 9.989998 29.989990 59.989990 \n", 1908 | "educ1 1.000000 1.000000 2.000000 \n", 1909 | "\n", 1910 | " 75% max missing_pct \n", 1911 | "LOCAL_CUR_SAV_SLOPE 0.074506 2.000000e+00 0.000000 \n", 1912 | "totmou 9868.000000 1.736084e+05 0.000000 \n", 1913 | "mou_opkd_Mean 0.000000 2.922043e+03 0.000000 \n", 1914 | "mou_opkv_Mean 229.656667 2.687313e+03 0.000000 \n", 1915 | "opk_dat_Mean 0.000000 2.456667e+02 0.000000 \n", 1916 | "opk_vce_Mean 91.666667 1.438000e+03 0.000000 \n", 1917 | "mou_pead_Mean 0.000000 2.902433e+02 0.000000 \n", 1918 | "mou_peav_Mean 242.296667 2.994580e+03 0.000000 \n", 1919 | "peak_dat_Mean 0.000000 2.806667e+02 0.000000 \n", 1920 | "peak_vce_Mean 124.333333 1.921333e+03 0.000000 \n", 1921 | "mouiwylisv_Mean 20.666667 9.210700e+02 0.000000 \n", 1922 | "iwylis_vce_Mean 9.666667 3.446667e+02 0.000000 \n", 1923 | "mouowylisv_Mean 36.880000 1.490253e+03 0.000000 \n", 1924 | "owylis_vce_Mean 35.000000 6.443333e+02 0.000000 \n", 1925 | "mou_rvce_Mean 161.336667 2.138510e+03 0.000000 \n", 1926 | "mou_cdat_Mean 0.000000 3.032050e+03 0.000000 \n", 1927 | "unan_vce_Mean 37.000000 8.143333e+02 0.000000 \n", 1928 | "unan_dat_Mean 0.000000 4.133333e+01 0.000000 \n", 1929 | "comp_vce_Mean 157.333333 1.376667e+03 0.000000 \n", 1930 | "comp_dat_Mean 0.000000 5.263333e+02 0.000000 \n", 1931 | "custcare_Mean 1.666667 3.656667e+02 0.000000 \n", 1932 | "cc_mou_Mean 3.190000 3.091133e+02 0.000000 \n", 1933 | "avgmou 667.330000 5.164290e+03 0.000000 \n", 1934 | "avgqty 234.680000 2.500890e+03 0.000000 \n", 1935 | "L6M_INDFINA_ALL_TX_AMT 0.000000 3.987167e+06 0.000000 \n", 1936 | "LOCAL_FIX_CLOSE_ACC_TX_AMT 0.000000 7.945000e+05 0.000000 \n", 1937 | "LOCAL_CUR_MON_AVG_BAL 23113.460000 7.610110e+06 0.000000 \n", 1938 | "LOCAL_CUR_MON_AVG_BAL_PROP 1.000000 1.000000e+00 0.000000 \n", 1939 | "LOCAL_OVEONEYR_FF_MON_AVG_BAL 95000.000000 5.561334e+06 0.000000 \n", 1940 | "LOCAL_FIX_MON_AVG_BAL 101125.000000 4.358232e+07 0.000000 \n", 1941 | "... ... ... ... \n", 1942 | "LOCAL_FIX_OPEN_ACC_TX_AMT 0.000000 2.586000e+06 0.000000 \n", 1943 | "LOCAL_FIX_WITHDRAW_TX_AMT 0.000000 1.398000e+06 0.000000 \n", 1944 | "SAV_SLOPE 0.035337 1.904013e+00 0.000000 \n", 1945 | "mou_cvce_Mean 329.523333 3.661677e+03 0.000000 \n", 1946 | "vceovr_Range 37.000000 1.215550e+03 0.002088 \n", 1947 | "vceovr_Mean 13.275000 6.012125e+02 0.002088 \n", 1948 | "roam_Range 0.780000 1.561470e+03 0.002088 \n", 1949 | "datovr_Range 0.000000 3.032000e+02 0.002088 \n", 1950 | "da_Mean 0.990000 5.766750e+01 0.002088 \n", 1951 | "ovrrev_Range 37.450000 1.215550e+03 0.002088 \n", 1952 | "ovrmou_Range 112.000000 3.473000e+03 0.002088 \n", 1953 | "da_Range 1.980000 6.732000e+01 0.002088 \n", 1954 | "totmrc_Range 0.000000 5.999800e+02 0.002088 \n", 1955 | "mou_Range 468.000000 6.865000e+03 0.002088 \n", 1956 | "rev_Range 55.570000 1.527970e+03 0.002088 \n", 1957 | "roam_Mean 0.257500 4.162575e+02 0.002088 \n", 1958 | "datovr_Mean 0.000000 1.007000e+02 0.002088 \n", 1959 | "ovrmou_Mean 41.000000 1.887250e+03 0.002088 \n", 1960 | "ovrrev_Mean 13.700000 6.012125e+02 0.002088 \n", 1961 | "change_mou 70.000000 3.712250e+03 0.005104 \n", 1962 | "change_rev 1.612500 4.191725e+02 0.005104 \n", 1963 | "hnd_price 149.989990 4.999900e+02 0.011484 \n", 1964 | "forgntvl 0.000000 1.000000e+00 0.017748 \n", 1965 | "age1 48.000000 9.600000e+01 0.017748 \n", 1966 | "age2 42.000000 9.800000e+01 0.017748 \n", 1967 | "avg6qty 246.000000 2.673000e+03 0.032075 \n", 1968 | "avg6mou 726.250000 5.347000e+03 0.032075 \n", 1969 | "lor 10.000000 1.500000e+01 0.290644 \n", 1970 | "pre_hnd_price 129.989990 4.999900e+02 0.556348 \n", 1971 | "educ1 2.000000 4.000000e+00 0.859695 \n", 1972 | "\n", 1973 | "[76 rows x 9 columns]" 1974 | ] 1975 | }, 1976 | "execution_count": 8, 1977 | "metadata": {}, 1978 | "output_type": "execute_result" 1979 | } 1980 | ], 1981 | "source": [ 1982 | "float_df = model_data.select_dtypes(include=['float']).describe().T.assign(missing_pct=model_data.apply(lambda x: (len(x)-x.count())/float(len(x))))\n", 1983 | "float_df = float_df.sort_values(['missing_pct'])\n", 1984 | "float_df" 1985 | ] 1986 | }, 1987 | { 1988 | "cell_type": "code", 1989 | "execution_count": 9, 1990 | "metadata": {}, 1991 | "outputs": [], 1992 | "source": [ 1993 | "# 特征构造\n", 1994 | "model_data['AVG_LOCAL_CUR_TRANS_TX_AMT'] = model_data['LOCAL_CUR_TRANS_TX_AMT'] / model_data['LOCAL_CUR_TRANS_TX_NUM']\n", 1995 | "model_data['AVG_LOCAL_CUR_LASTSAV_TX_AMT'] = model_data['LOCAL_CUR_LASTSAV_TX_AMT'] / model_data['LOCAL_CUR_LASTSAV_TX_NUM']" 1996 | ] 1997 | }, 1998 | { 1999 | "cell_type": "code", 2000 | "execution_count": 10, 2001 | "metadata": {}, 2002 | "outputs": [], 2003 | "source": [ 2004 | "maxValueFeatures = ['LOCAL_CUR_SAV_SLOPE','LOCAL_BELONEYR_FF_SLOPE','LOCAL_OVEONEYR_FF_SLOPE','LOCAL_SAV_SLOPE','SAV_SLOPE']\n", 2005 | "model_data['volatilityMax']= model_data[maxValueFeatures].apply(max, axis =1)" 2006 | ] 2007 | }, 2008 | { 2009 | "cell_type": "code", 2010 | "execution_count": 11, 2011 | "metadata": {}, 2012 | "outputs": [], 2013 | "source": [ 2014 | "# 本币活期月日均余额占比 = 1 - 本币定期月日均余额占比\n", 2015 | "# 删除冗余特征\n", 2016 | "del model_data['LOCAL_CUR_MON_AVG_BAL_PROP']" 2017 | ] 2018 | }, 2019 | { 2020 | "cell_type": "code", 2021 | "execution_count": 12, 2022 | "metadata": { 2023 | "collapsed": true 2024 | }, 2025 | "outputs": [], 2026 | "source": [ 2027 | "# 某些特征可以相加\n", 2028 | "sumup_cols0 = ['LOCAL_CUR_MON_AVG_BAL','LOCAL_FIX_MON_AVG_BAL']\n", 2029 | "sumup_cols1 = ['LOCAL_CUR_WITHDRAW_TX_NUM','LOCAL_FIX_WITHDRAW_TX_NUM']\n", 2030 | "sumup_cols2 = ['LOCAL_CUR_WITHDRAW_TX_AMT','LOCAL_FIX_WITHDRAW_TX_AMT']\n", 2031 | "sumup_cols3 = ['COUNTER_NOT_ACCT_TX_NUM','COUNTER_ACCT_TX_NUM']\n", 2032 | "sumup_cols4 = ['ATM_ALL_TX_NUM','COUNTER_ALL_TX_NUM']\n", 2033 | "sumup_cols5 = ['ATM_ACCT_TX_NUM','COUNTER_ACCT_TX_NUM']\n", 2034 | "sumup_cols6 = ['ATM_ACCT_TX_AMT','COUNTER_ACCT_TX_AMT']\n", 2035 | "sumup_cols7 = ['ATM_NOT_ACCT_TX_NUM','COUNTER_NOT_ACCT_TX_NUM']\n", 2036 | "\n", 2037 | "model_data['TOTAL_LOCAL_MON_AVG_BAL'] = model_data[sumup_cols0].apply(sum, axis = 1)\n", 2038 | "model_data['TOTAL_WITHDRAW_TX_NUM'] = model_data[sumup_cols1].apply(sum, axis = 1)\n", 2039 | "model_data['TOTAL_WITHDRAW_TX_AMT'] = model_data[sumup_cols2].apply(sum, axis = 1)\n", 2040 | "model_data['TOTAL_COUNTER_TX_NUM'] = model_data[sumup_cols3].apply(sum, axis = 1)\n", 2041 | "model_data['TOTAL_ALL_TX_NUM'] = model_data[sumup_cols4].apply(sum, axis = 1)\n", 2042 | "model_data['TOTAL_ACCT_TX_NUM'] = model_data[sumup_cols5].apply(sum, axis = 1)\n", 2043 | "model_data['TOTAL_ACCT_TX_AMT'] = model_data[sumup_cols6].apply(sum, axis = 1)\n", 2044 | "model_data['TOTAL_NOT_ACCT_TX_NUM'] = model_data[sumup_cols7].apply(sum, axis = 1)" 2045 | ] 2046 | }, 2047 | { 2048 | "cell_type": "code", 2049 | "execution_count": 13, 2050 | "metadata": {}, 2051 | "outputs": [ 2052 | { 2053 | "data": { 2054 | "text/plain": [ 2055 | "(17241, 133)" 2056 | ] 2057 | }, 2058 | "execution_count": 13, 2059 | "metadata": {}, 2060 | "output_type": "execute_result" 2061 | } 2062 | ], 2063 | "source": [ 2064 | "model_data.shape" 2065 | ] 2066 | }, 2067 | { 2068 | "cell_type": "code", 2069 | "execution_count": 14, 2070 | "metadata": { 2071 | "collapsed": true 2072 | }, 2073 | "outputs": [], 2074 | "source": [ 2075 | "### 特征构造: 比率\n", 2076 | "numerator_cols = ['LOCAL_SAV_CUR_ALL_BAL','SAV_CUR_ALL_BAL','ASSET_CUR_ALL_BAL','LOCAL_CUR_WITHDRAW_TX_NUM','LOCAL_CUR_WITHDRAW_TX_AMT','COUNTER_NOT_ACCT_TX_NUM',\n", 2077 | " 'ATM_ALL_TX_NUM','ATM_ACCT_TX_AMT','ATM_NOT_ACCT_TX_NUM']\n", 2078 | "denominator_cols = ['LOCAL_SAV_MON_AVG_BAL','SAV_MON_AVG_BAL','ASSET_MON_AVG_BAL','TOTAL_WITHDRAW_TX_NUM','TOTAL_WITHDRAW_TX_AMT','TOTAL_COUNTER_TX_NUM',\n", 2079 | " 'TOTAL_ACCT_TX_NUM','TOTAL_ACCT_TX_AMT','TOTAL_NOT_ACCT_TX_NUM']\n", 2080 | "\n", 2081 | "new_col_name = [\"RATIO_\"+str(i) for i in range(len(numerator_cols))]\n", 2082 | "for i in range(len(numerator_cols)):\n", 2083 | " model_data[new_col_name[i]] = model_data[numerator_cols[i]] / model_data[denominator_cols[i]]" 2084 | ] 2085 | }, 2086 | { 2087 | "cell_type": "code", 2088 | "execution_count": 15, 2089 | "metadata": {}, 2090 | "outputs": [ 2091 | { 2092 | "data": { 2093 | "text/plain": [ 2094 | "(17241, 142)" 2095 | ] 2096 | }, 2097 | "execution_count": 15, 2098 | "metadata": {}, 2099 | "output_type": "execute_result" 2100 | } 2101 | ], 2102 | "source": [ 2103 | "model_data.shape" 2104 | ] 2105 | }, 2106 | { 2107 | "cell_type": "code", 2108 | "execution_count": 16, 2109 | "metadata": { 2110 | "collapsed": true 2111 | }, 2112 | "outputs": [], 2113 | "source": [ 2114 | "object_df = model_data.select_dtypes(include=['object']).describe().T\n", 2115 | "int_df = model_data.select_dtypes(include=['int64']).describe().T\n", 2116 | "float_df = model_data.select_dtypes(include=['float']).describe().T" 2117 | ] 2118 | }, 2119 | { 2120 | "cell_type": "code", 2121 | "execution_count": 17, 2122 | "metadata": {}, 2123 | "outputs": [], 2124 | "source": [ 2125 | "# 对float和int类型的特征中的缺失值不全为0\n", 2126 | "model_data_to_fillna = pd.concat([model_data[float_df.index], model_data[int_df.index]], axis=1)\n", 2127 | "model_data_to_fillna.replace(float('inf'), 1, inplace=True)\n", 2128 | "model_data_to_fillna.fillna(0, inplace=True)" 2129 | ] 2130 | }, 2131 | { 2132 | "cell_type": "code", 2133 | "execution_count": 18, 2134 | "metadata": {}, 2135 | "outputs": [], 2136 | "source": [ 2137 | "model_data = pd.concat([model_data[object_df.index], model_data_to_fillna], axis=1)" 2138 | ] 2139 | }, 2140 | { 2141 | "cell_type": "code", 2142 | "execution_count": 19, 2143 | "metadata": {}, 2144 | "outputs": [], 2145 | "source": [ 2146 | "# 进行One_hot编码,并且对object类型的特征中的缺失值都设定一个哑变量\n", 2147 | "model_data = pd.get_dummies(model_data, dummy_na=True)" 2148 | ] 2149 | }, 2150 | { 2151 | "cell_type": "code", 2152 | "execution_count": 20, 2153 | "metadata": {}, 2154 | "outputs": [ 2155 | { 2156 | "data": { 2157 | "text/html": [ 2158 | "
\n", 2159 | "\n", 2172 | "\n", 2173 | " \n", 2174 | " \n", 2175 | " \n", 2176 | " \n", 2177 | " \n", 2178 | " \n", 2179 | " \n", 2180 | " \n", 2181 | " \n", 2182 | " \n", 2183 | " \n", 2184 | " \n", 2185 | " \n", 2186 | " \n", 2187 | " \n", 2188 | " \n", 2189 | " \n", 2190 | " \n", 2191 | " \n", 2192 | " \n", 2193 | " \n", 2194 | " \n", 2195 | " \n", 2196 | " \n", 2197 | " \n", 2198 | " \n", 2199 | " \n", 2200 | " \n", 2201 | " \n", 2202 | " \n", 2203 | " \n", 2204 | " \n", 2205 | " \n", 2206 | " \n", 2207 | " \n", 2208 | " \n", 2209 | " \n", 2210 | " \n", 2211 | " \n", 2212 | " \n", 2213 | " \n", 2214 | " \n", 2215 | " \n", 2216 | " \n", 2217 | " \n", 2218 | " \n", 2219 | " \n", 2220 | " \n", 2221 | " \n", 2222 | " \n", 2223 | " \n", 2224 | " \n", 2225 | " \n", 2226 | " \n", 2227 | " \n", 2228 | " \n", 2229 | " \n", 2230 | " \n", 2231 | " \n", 2232 | " \n", 2233 | " \n", 2234 | " \n", 2235 | " \n", 2236 | " \n", 2237 | " \n", 2238 | " \n", 2239 | " \n", 2240 | " \n", 2241 | " \n", 2242 | " \n", 2243 | " \n", 2244 | " \n", 2245 | " \n", 2246 | " \n", 2247 | " \n", 2248 | " \n", 2249 | " \n", 2250 | " \n", 2251 | " \n", 2252 | " \n", 2253 | " \n", 2254 | " \n", 2255 | " \n", 2256 | " \n", 2257 | " \n", 2258 | " \n", 2259 | " \n", 2260 | " \n", 2261 | " \n", 2262 | " \n", 2263 | " \n", 2264 | " \n", 2265 | " \n", 2266 | " \n", 2267 | " \n", 2268 | " \n", 2269 | " \n", 2270 | " \n", 2271 | " \n", 2272 | " \n", 2273 | " \n", 2274 | " \n", 2275 | " \n", 2276 | " \n", 2277 | " \n", 2278 | " \n", 2279 | " \n", 2280 | " \n", 2281 | " \n", 2282 | " \n", 2283 | " \n", 2284 | " \n", 2285 | " \n", 2286 | " \n", 2287 | " \n", 2288 | " \n", 2289 | " \n", 2290 | " \n", 2291 | " \n", 2292 | " \n", 2293 | " \n", 2294 | " \n", 2295 | " \n", 2296 | " \n", 2297 | " \n", 2298 | " \n", 2299 | " \n", 2300 | " \n", 2301 | " \n", 2302 | " \n", 2303 | " \n", 2304 | " \n", 2305 | " \n", 2306 | " \n", 2307 | " \n", 2308 | " \n", 2309 | " \n", 2310 | " \n", 2311 | " \n", 2312 | " \n", 2313 | " \n", 2314 | " \n", 2315 | " \n", 2316 | " \n", 2317 | " \n", 2318 | " \n", 2319 | " \n", 2320 | " \n", 2321 | "
LOCAL_CUR_SAV_SLOPELOCAL_CUR_MON_AVG_BALLOCAL_OVEONEYR_FF_MON_AVG_BALLOCAL_FIX_MON_AVG_BALLOCAL_FIX_MON_AVG_BAL_PROPLOCAL_BELONEYR_FF_SLOPELOCAL_BELONEYR_FF_MON_AVG_BALLOCAL_OVEONEYR_FF_SLOPELOCAL_SAV_SLOPELOCAL_SAV_CUR_ALL_BAL...kid6_10_nankid11_15_Ukid11_15_Ykid11_15_nankid16_17_Ukid16_17_Ykid16_17_nancar_buy_Newcar_buy_UNKNOWNcar_buy_nan
00.0000000.0050429.6850429.681.0000000.00.0-0.032395-0.03239541000.00...0100100100
11.00569220149.0456047.5056047.500.7355650.00.0-0.1488570.00396531929.10...0100100100
20.00056217.810.000.000.0000000.00.00.0000000.00056217.82...0100100100
3-0.55091229359.210.000.000.0000000.00.00.000000-0.55091233273.57...0100100010
40.00028834.680.000.000.0000000.00.00.0000000.00028834.70...0100010100
\n", 2322 | "

5 rows × 178 columns

\n", 2323 | "
" 2324 | ], 2325 | "text/plain": [ 2326 | " LOCAL_CUR_SAV_SLOPE LOCAL_CUR_MON_AVG_BAL LOCAL_OVEONEYR_FF_MON_AVG_BAL \\\n", 2327 | "0 0.000000 0.00 50429.68 \n", 2328 | "1 1.005692 20149.04 56047.50 \n", 2329 | "2 0.000562 17.81 0.00 \n", 2330 | "3 -0.550912 29359.21 0.00 \n", 2331 | "4 0.000288 34.68 0.00 \n", 2332 | "\n", 2333 | " LOCAL_FIX_MON_AVG_BAL LOCAL_FIX_MON_AVG_BAL_PROP LOCAL_BELONEYR_FF_SLOPE \\\n", 2334 | "0 50429.68 1.000000 0.0 \n", 2335 | "1 56047.50 0.735565 0.0 \n", 2336 | "2 0.00 0.000000 0.0 \n", 2337 | "3 0.00 0.000000 0.0 \n", 2338 | "4 0.00 0.000000 0.0 \n", 2339 | "\n", 2340 | " LOCAL_BELONEYR_FF_MON_AVG_BAL LOCAL_OVEONEYR_FF_SLOPE LOCAL_SAV_SLOPE \\\n", 2341 | "0 0.0 -0.032395 -0.032395 \n", 2342 | "1 0.0 -0.148857 0.003965 \n", 2343 | "2 0.0 0.000000 0.000562 \n", 2344 | "3 0.0 0.000000 -0.550912 \n", 2345 | "4 0.0 0.000000 0.000288 \n", 2346 | "\n", 2347 | " LOCAL_SAV_CUR_ALL_BAL ... kid6_10_nan kid11_15_U kid11_15_Y \\\n", 2348 | "0 41000.00 ... 0 1 0 \n", 2349 | "1 31929.10 ... 0 1 0 \n", 2350 | "2 17.82 ... 0 1 0 \n", 2351 | "3 33273.57 ... 0 1 0 \n", 2352 | "4 34.70 ... 0 1 0 \n", 2353 | "\n", 2354 | " kid11_15_nan kid16_17_U kid16_17_Y kid16_17_nan car_buy_New \\\n", 2355 | "0 0 1 0 0 1 \n", 2356 | "1 0 1 0 0 1 \n", 2357 | "2 0 1 0 0 1 \n", 2358 | "3 0 1 0 0 0 \n", 2359 | "4 0 0 1 0 1 \n", 2360 | "\n", 2361 | " car_buy_UNKNOWN car_buy_nan \n", 2362 | "0 0 0 \n", 2363 | "1 0 0 \n", 2364 | "2 0 0 \n", 2365 | "3 1 0 \n", 2366 | "4 0 0 \n", 2367 | "\n", 2368 | "[5 rows x 178 columns]" 2369 | ] 2370 | }, 2371 | "execution_count": 20, 2372 | "metadata": {}, 2373 | "output_type": "execute_result" 2374 | } 2375 | ], 2376 | "source": [ 2377 | "model_data.head()" 2378 | ] 2379 | }, 2380 | { 2381 | "cell_type": "code", 2382 | "execution_count": 21, 2383 | "metadata": {}, 2384 | "outputs": [ 2385 | { 2386 | "data": { 2387 | "text/plain": [ 2388 | "(17241, 178)" 2389 | ] 2390 | }, 2391 | "execution_count": 21, 2392 | "metadata": {}, 2393 | "output_type": "execute_result" 2394 | } 2395 | ], 2396 | "source": [ 2397 | "model_data.shape" 2398 | ] 2399 | }, 2400 | { 2401 | "cell_type": "code", 2402 | "execution_count": 22, 2403 | "metadata": { 2404 | "collapsed": true 2405 | }, 2406 | "outputs": [], 2407 | "source": [ 2408 | "# 处理各个特征取值的范围不一致性\n", 2409 | "# 将全部特征的取值均限制在[0,1]之间\n", 2410 | "model_data = (model_data - model_data.min()) / (model_data.max() - model_data.min())\n", 2411 | "model_data.replace(float('inf'), 1, inplace=True)\n", 2412 | "model_data.fillna(0, inplace=True)" 2413 | ] 2414 | }, 2415 | { 2416 | "cell_type": "code", 2417 | "execution_count": 23, 2418 | "metadata": {}, 2419 | "outputs": [ 2420 | { 2421 | "name": "stdout", 2422 | "output_type": "stream", 2423 | "text": [ 2424 | "False\n", 2425 | "False\n", 2426 | "False\n", 2427 | "False\n", 2428 | "False\n", 2429 | "False\n", 2430 | "False\n", 2431 | "False\n", 2432 | "False\n", 2433 | "False\n", 2434 | "False\n", 2435 | "False\n", 2436 | "False\n", 2437 | "False\n", 2438 | "False\n", 2439 | "False\n", 2440 | "False\n", 2441 | "False\n", 2442 | "False\n", 2443 | "False\n", 2444 | "False\n", 2445 | "False\n", 2446 | "False\n", 2447 | "False\n", 2448 | "False\n", 2449 | "False\n", 2450 | "False\n", 2451 | "False\n", 2452 | "False\n", 2453 | "False\n", 2454 | "False\n", 2455 | "False\n", 2456 | "False\n", 2457 | "False\n", 2458 | "False\n", 2459 | "False\n", 2460 | "False\n", 2461 | "False\n", 2462 | "False\n", 2463 | "False\n", 2464 | "False\n", 2465 | "False\n", 2466 | "False\n", 2467 | "False\n", 2468 | "False\n", 2469 | "False\n", 2470 | "False\n", 2471 | "False\n", 2472 | "False\n", 2473 | "False\n", 2474 | "False\n", 2475 | "False\n", 2476 | "False\n", 2477 | "False\n", 2478 | "False\n", 2479 | "False\n", 2480 | "False\n", 2481 | "False\n", 2482 | "False\n", 2483 | "False\n", 2484 | "False\n", 2485 | "False\n", 2486 | "False\n", 2487 | "False\n", 2488 | "False\n", 2489 | "False\n", 2490 | "False\n", 2491 | "False\n", 2492 | "False\n", 2493 | "False\n", 2494 | "False\n", 2495 | "False\n", 2496 | "False\n", 2497 | "False\n", 2498 | "False\n", 2499 | "False\n", 2500 | "False\n", 2501 | "False\n", 2502 | "False\n", 2503 | "False\n", 2504 | "False\n", 2505 | "False\n", 2506 | "False\n", 2507 | "False\n", 2508 | "False\n", 2509 | "False\n", 2510 | "False\n", 2511 | "False\n", 2512 | "False\n", 2513 | "False\n", 2514 | "False\n", 2515 | "False\n", 2516 | "False\n", 2517 | "False\n", 2518 | "False\n", 2519 | "False\n", 2520 | "False\n", 2521 | "False\n", 2522 | "False\n", 2523 | "False\n", 2524 | "False\n", 2525 | "False\n", 2526 | "False\n", 2527 | "False\n", 2528 | "False\n", 2529 | "False\n", 2530 | "False\n", 2531 | "False\n", 2532 | "False\n", 2533 | "False\n", 2534 | "False\n", 2535 | "False\n", 2536 | "False\n", 2537 | "False\n", 2538 | "False\n", 2539 | "False\n", 2540 | "False\n", 2541 | "False\n", 2542 | "False\n", 2543 | "False\n", 2544 | "False\n", 2545 | "False\n", 2546 | "False\n", 2547 | "False\n", 2548 | "False\n", 2549 | "False\n", 2550 | "False\n", 2551 | "False\n", 2552 | "False\n", 2553 | "False\n", 2554 | "False\n", 2555 | "False\n", 2556 | "False\n", 2557 | "False\n", 2558 | "False\n", 2559 | "False\n", 2560 | "False\n", 2561 | "False\n", 2562 | "False\n", 2563 | "False\n", 2564 | "False\n", 2565 | "False\n", 2566 | "False\n", 2567 | "False\n", 2568 | "False\n", 2569 | "False\n", 2570 | "False\n", 2571 | "False\n", 2572 | "False\n", 2573 | "False\n", 2574 | "False\n", 2575 | "False\n", 2576 | "False\n", 2577 | "False\n", 2578 | "False\n", 2579 | "False\n", 2580 | "False\n", 2581 | "False\n", 2582 | "False\n", 2583 | "False\n", 2584 | "False\n", 2585 | "False\n", 2586 | "False\n", 2587 | "False\n", 2588 | "False\n", 2589 | "False\n", 2590 | "False\n", 2591 | "False\n", 2592 | "False\n", 2593 | "False\n", 2594 | "False\n", 2595 | "False\n", 2596 | "False\n", 2597 | "False\n", 2598 | "False\n", 2599 | "False\n", 2600 | "False\n", 2601 | "False\n" 2602 | ] 2603 | } 2604 | ], 2605 | "source": [ 2606 | "null_test = model_data.isnull().any()\n", 2607 | "for i in null_test:\n", 2608 | " print(i)" 2609 | ] 2610 | }, 2611 | { 2612 | "cell_type": "code", 2613 | "execution_count": 24, 2614 | "metadata": { 2615 | "collapsed": true 2616 | }, 2617 | "outputs": [], 2618 | "source": [ 2619 | "model_data.to_csv('./model_data.csv', encoding='utf-8', index=False)" 2620 | ] 2621 | }, 2622 | { 2623 | "cell_type": "code", 2624 | "execution_count": 25, 2625 | "metadata": {}, 2626 | "outputs": [ 2627 | { 2628 | "data": { 2629 | "text/plain": [ 2630 | "0.0 15500\n", 2631 | "1.0 1741\n", 2632 | "Name: CHURN_CUST_IND, dtype: int64" 2633 | ] 2634 | }, 2635 | "execution_count": 25, 2636 | "metadata": {}, 2637 | "output_type": "execute_result" 2638 | } 2639 | ], 2640 | "source": [ 2641 | "model_data['CHURN_CUST_IND'].value_counts()" 2642 | ] 2643 | }, 2644 | { 2645 | "cell_type": "code", 2646 | "execution_count": 26, 2647 | "metadata": { 2648 | "collapsed": true 2649 | }, 2650 | "outputs": [], 2651 | "source": [ 2652 | "model_data_des = model_data.describe().T" 2653 | ] 2654 | }, 2655 | { 2656 | "cell_type": "code", 2657 | "execution_count": 27, 2658 | "metadata": {}, 2659 | "outputs": [ 2660 | { 2661 | "data": { 2662 | "text/html": [ 2663 | "
\n", 2664 | "\n", 2677 | "\n", 2678 | " \n", 2679 | " \n", 2680 | " \n", 2681 | " \n", 2682 | " \n", 2683 | " \n", 2684 | " \n", 2685 | " \n", 2686 | " \n", 2687 | " \n", 2688 | " \n", 2689 | " \n", 2690 | " \n", 2691 | " \n", 2692 | " \n", 2693 | " \n", 2694 | " \n", 2695 | " \n", 2696 | " \n", 2697 | " \n", 2698 | " \n", 2699 | " \n", 2700 | " \n", 2701 | " \n", 2702 | " \n", 2703 | " \n", 2704 | " \n", 2705 | " \n", 2706 | " \n", 2707 | " \n", 2708 | " \n", 2709 | " \n", 2710 | " \n", 2711 | " \n", 2712 | " \n", 2713 | " \n", 2714 | " \n", 2715 | " \n", 2716 | " \n", 2717 | " \n", 2718 | " \n", 2719 | " \n", 2720 | " \n", 2721 | " \n", 2722 | " \n", 2723 | " \n", 2724 | " \n", 2725 | " \n", 2726 | " \n", 2727 | " \n", 2728 | " \n", 2729 | " \n", 2730 | " \n", 2731 | " \n", 2732 | " \n", 2733 | " \n", 2734 | " \n", 2735 | " \n", 2736 | " \n", 2737 | " \n", 2738 | " \n", 2739 | " \n", 2740 | " \n", 2741 | " \n", 2742 | " \n", 2743 | " \n", 2744 | " \n", 2745 | " \n", 2746 | " \n", 2747 | " \n", 2748 | " \n", 2749 | " \n", 2750 | " \n", 2751 | " \n", 2752 | " \n", 2753 | " \n", 2754 | " \n", 2755 | " \n", 2756 | " \n", 2757 | " \n", 2758 | " \n", 2759 | " \n", 2760 | " \n", 2761 | " \n", 2762 | " \n", 2763 | " \n", 2764 | " \n", 2765 | " \n", 2766 | " \n", 2767 | " \n", 2768 | " \n", 2769 | " \n", 2770 | " \n", 2771 | " \n", 2772 | " \n", 2773 | " \n", 2774 | " \n", 2775 | " \n", 2776 | " \n", 2777 | " \n", 2778 | " \n", 2779 | " \n", 2780 | " \n", 2781 | " \n", 2782 | " \n", 2783 | " \n", 2784 | " \n", 2785 | " \n", 2786 | " \n", 2787 | " \n", 2788 | " \n", 2789 | " \n", 2790 | " \n", 2791 | " \n", 2792 | " \n", 2793 | " \n", 2794 | " \n", 2795 | " \n", 2796 | " \n", 2797 | " \n", 2798 | " \n", 2799 | " \n", 2800 | " \n", 2801 | " \n", 2802 | " \n", 2803 | " \n", 2804 | " \n", 2805 | " \n", 2806 | " \n", 2807 | " \n", 2808 | " \n", 2809 | " \n", 2810 | " \n", 2811 | " \n", 2812 | " \n", 2813 | " \n", 2814 | " \n", 2815 | " \n", 2816 | " \n", 2817 | " \n", 2818 | " \n", 2819 | " \n", 2820 | " \n", 2821 | " \n", 2822 | " \n", 2823 | " \n", 2824 | " \n", 2825 | " \n", 2826 | " \n", 2827 | " \n", 2828 | " \n", 2829 | " \n", 2830 | " \n", 2831 | " \n", 2832 | " \n", 2833 | " \n", 2834 | " \n", 2835 | " \n", 2836 | " \n", 2837 | " \n", 2838 | " \n", 2839 | " \n", 2840 | " \n", 2841 | " \n", 2842 | " \n", 2843 | " \n", 2844 | " \n", 2845 | " \n", 2846 | " \n", 2847 | " \n", 2848 | " \n", 2849 | " \n", 2850 | " \n", 2851 | " \n", 2852 | " \n", 2853 | " \n", 2854 | " \n", 2855 | " \n", 2856 | " \n", 2857 | " \n", 2858 | " \n", 2859 | " \n", 2860 | " \n", 2861 | " \n", 2862 | " \n", 2863 | " \n", 2864 | " \n", 2865 | " \n", 2866 | " \n", 2867 | " \n", 2868 | " \n", 2869 | " \n", 2870 | " \n", 2871 | " \n", 2872 | " \n", 2873 | " \n", 2874 | " \n", 2875 | " \n", 2876 | " \n", 2877 | " \n", 2878 | " \n", 2879 | " \n", 2880 | " \n", 2881 | " \n", 2882 | " \n", 2883 | " \n", 2884 | " \n", 2885 | " \n", 2886 | " \n", 2887 | " \n", 2888 | " \n", 2889 | " \n", 2890 | " \n", 2891 | " \n", 2892 | " \n", 2893 | " \n", 2894 | " \n", 2895 | " \n", 2896 | " \n", 2897 | " \n", 2898 | " \n", 2899 | " \n", 2900 | " \n", 2901 | " \n", 2902 | " \n", 2903 | " \n", 2904 | " \n", 2905 | " \n", 2906 | " \n", 2907 | " \n", 2908 | " \n", 2909 | " \n", 2910 | " \n", 2911 | " \n", 2912 | " \n", 2913 | " \n", 2914 | " \n", 2915 | " \n", 2916 | " \n", 2917 | " \n", 2918 | " \n", 2919 | " \n", 2920 | " \n", 2921 | " \n", 2922 | " \n", 2923 | " \n", 2924 | " \n", 2925 | " \n", 2926 | " \n", 2927 | " \n", 2928 | " \n", 2929 | " \n", 2930 | " \n", 2931 | " \n", 2932 | " \n", 2933 | " \n", 2934 | " \n", 2935 | " \n", 2936 | " \n", 2937 | " \n", 2938 | " \n", 2939 | " \n", 2940 | " \n", 2941 | " \n", 2942 | " \n", 2943 | " \n", 2944 | " \n", 2945 | " \n", 2946 | " \n", 2947 | " \n", 2948 | " \n", 2949 | " \n", 2950 | " \n", 2951 | " \n", 2952 | " \n", 2953 | " \n", 2954 | " \n", 2955 | " \n", 2956 | " \n", 2957 | " \n", 2958 | " \n", 2959 | " \n", 2960 | " \n", 2961 | " \n", 2962 | " \n", 2963 | " \n", 2964 | " \n", 2965 | " \n", 2966 | " \n", 2967 | " \n", 2968 | " \n", 2969 | " \n", 2970 | " \n", 2971 | " \n", 2972 | " \n", 2973 | " \n", 2974 | " \n", 2975 | " \n", 2976 | " \n", 2977 | " \n", 2978 | " \n", 2979 | " \n", 2980 | " \n", 2981 | " \n", 2982 | " \n", 2983 | " \n", 2984 | " \n", 2985 | " \n", 2986 | " \n", 2987 | " \n", 2988 | " \n", 2989 | " \n", 2990 | " \n", 2991 | " \n", 2992 | " \n", 2993 | " \n", 2994 | " \n", 2995 | " \n", 2996 | " \n", 2997 | " \n", 2998 | " \n", 2999 | " \n", 3000 | " \n", 3001 | " \n", 3002 | " \n", 3003 | " \n", 3004 | " \n", 3005 | " \n", 3006 | " \n", 3007 | " \n", 3008 | " \n", 3009 | " \n", 3010 | " \n", 3011 | " \n", 3012 | " \n", 3013 | " \n", 3014 | " \n", 3015 | " \n", 3016 | " \n", 3017 | " \n", 3018 | " \n", 3019 | " \n", 3020 | " \n", 3021 | " \n", 3022 | " \n", 3023 | " \n", 3024 | " \n", 3025 | " \n", 3026 | " \n", 3027 | " \n", 3028 | " \n", 3029 | " \n", 3030 | " \n", 3031 | " \n", 3032 | " \n", 3033 | " \n", 3034 | " \n", 3035 | " \n", 3036 | " \n", 3037 | " \n", 3038 | " \n", 3039 | " \n", 3040 | " \n", 3041 | " \n", 3042 | " \n", 3043 | " \n", 3044 | " \n", 3045 | " \n", 3046 | " \n", 3047 | " \n", 3048 | " \n", 3049 | " \n", 3050 | " \n", 3051 | " \n", 3052 | " \n", 3053 | " \n", 3054 | " \n", 3055 | " \n", 3056 | " \n", 3057 | " \n", 3058 | " \n", 3059 | " \n", 3060 | " \n", 3061 | " \n", 3062 | " \n", 3063 | " \n", 3064 | " \n", 3065 | " \n", 3066 | " \n", 3067 | " \n", 3068 | " \n", 3069 | " \n", 3070 | " \n", 3071 | " \n", 3072 | " \n", 3073 | " \n", 3074 | " \n", 3075 | " \n", 3076 | " \n", 3077 | " \n", 3078 | " \n", 3079 | " \n", 3080 | " \n", 3081 | " \n", 3082 | " \n", 3083 | " \n", 3084 | " \n", 3085 | " \n", 3086 | " \n", 3087 | " \n", 3088 | " \n", 3089 | " \n", 3090 | " \n", 3091 | " \n", 3092 | " \n", 3093 | " \n", 3094 | " \n", 3095 | " \n", 3096 | " \n", 3097 | " \n", 3098 | " \n", 3099 | " \n", 3100 | " \n", 3101 | " \n", 3102 | " \n", 3103 | " \n", 3104 | " \n", 3105 | " \n", 3106 | " \n", 3107 | " \n", 3108 | " \n", 3109 | " \n", 3110 | " \n", 3111 | " \n", 3112 | " \n", 3113 | " \n", 3114 | " \n", 3115 | " \n", 3116 | " \n", 3117 | " \n", 3118 | " \n", 3119 | " \n", 3120 | " \n", 3121 | " \n", 3122 | " \n", 3123 | " \n", 3124 | " \n", 3125 | " \n", 3126 | " \n", 3127 | " \n", 3128 | " \n", 3129 | " \n", 3130 | " \n", 3131 | " \n", 3132 | " \n", 3133 | " \n", 3134 | " \n", 3135 | " \n", 3136 | " \n", 3137 | " \n", 3138 | " \n", 3139 | " \n", 3140 | " \n", 3141 | " \n", 3142 | " \n", 3143 | " \n", 3144 | " \n", 3145 | " \n", 3146 | " \n", 3147 | " \n", 3148 | " \n", 3149 | " \n", 3150 | " \n", 3151 | " \n", 3152 | " \n", 3153 | " \n", 3154 | " \n", 3155 | " \n", 3156 | " \n", 3157 | " \n", 3158 | " \n", 3159 | " \n", 3160 | " \n", 3161 | " \n", 3162 | " \n", 3163 | " \n", 3164 | " \n", 3165 | " \n", 3166 | " \n", 3167 | " \n", 3168 | " \n", 3169 | " \n", 3170 | " \n", 3171 | " \n", 3172 | " \n", 3173 | " \n", 3174 | " \n", 3175 | " \n", 3176 | " \n", 3177 | " \n", 3178 | " \n", 3179 | " \n", 3180 | " \n", 3181 | " \n", 3182 | " \n", 3183 | " \n", 3184 | " \n", 3185 | " \n", 3186 | " \n", 3187 | " \n", 3188 | " \n", 3189 | " \n", 3190 | " \n", 3191 | " \n", 3192 | " \n", 3193 | " \n", 3194 | " \n", 3195 | " \n", 3196 | " \n", 3197 | " \n", 3198 | " \n", 3199 | " \n", 3200 | " \n", 3201 | " \n", 3202 | " \n", 3203 | " \n", 3204 | " \n", 3205 | " \n", 3206 | " \n", 3207 | " \n", 3208 | " \n", 3209 | " \n", 3210 | " \n", 3211 | " \n", 3212 | " \n", 3213 | " \n", 3214 | " \n", 3215 | " \n", 3216 | " \n", 3217 | " \n", 3218 | " \n", 3219 | " \n", 3220 | " \n", 3221 | " \n", 3222 | " \n", 3223 | " \n", 3224 | " \n", 3225 | " \n", 3226 | " \n", 3227 | " \n", 3228 | " \n", 3229 | " \n", 3230 | " \n", 3231 | " \n", 3232 | " \n", 3233 | " \n", 3234 | " \n", 3235 | " \n", 3236 | " \n", 3237 | " \n", 3238 | " \n", 3239 | " \n", 3240 | " \n", 3241 | " \n", 3242 | " \n", 3243 | " \n", 3244 | " \n", 3245 | " \n", 3246 | " \n", 3247 | " \n", 3248 | " \n", 3249 | " \n", 3250 | " \n", 3251 | " \n", 3252 | " \n", 3253 | " \n", 3254 | " \n", 3255 | " \n", 3256 | " \n", 3257 | " \n", 3258 | " \n", 3259 | " \n", 3260 | " \n", 3261 | " \n", 3262 | " \n", 3263 | " \n", 3264 | " \n", 3265 | " \n", 3266 | " \n", 3267 | " \n", 3268 | " \n", 3269 | " \n", 3270 | " \n", 3271 | " \n", 3272 | " \n", 3273 | " \n", 3274 | " \n", 3275 | " \n", 3276 | " \n", 3277 | " \n", 3278 | " \n", 3279 | " \n", 3280 | " \n", 3281 | " \n", 3282 | " \n", 3283 | " \n", 3284 | " \n", 3285 | " \n", 3286 | " \n", 3287 | " \n", 3288 | " \n", 3289 | " \n", 3290 | " \n", 3291 | " \n", 3292 | " \n", 3293 | " \n", 3294 | " \n", 3295 | " \n", 3296 | " \n", 3297 | " \n", 3298 | " \n", 3299 | " \n", 3300 | " \n", 3301 | " \n", 3302 | " \n", 3303 | " \n", 3304 | " \n", 3305 | " \n", 3306 | " \n", 3307 | " \n", 3308 | " \n", 3309 | " \n", 3310 | " \n", 3311 | " \n", 3312 | " \n", 3313 | " \n", 3314 | " \n", 3315 | " \n", 3316 | " \n", 3317 | " \n", 3318 | " \n", 3319 | " \n", 3320 | " \n", 3321 | " \n", 3322 | " \n", 3323 | " \n", 3324 | " \n", 3325 | " \n", 3326 | " \n", 3327 | " \n", 3328 | " \n", 3329 | " \n", 3330 | " \n", 3331 | " \n", 3332 | " \n", 3333 | " \n", 3334 | " \n", 3335 | " \n", 3336 | " \n", 3337 | " \n", 3338 | " \n", 3339 | " \n", 3340 | " \n", 3341 | " \n", 3342 | " \n", 3343 | " \n", 3344 | " \n", 3345 | " \n", 3346 | " \n", 3347 | " \n", 3348 | " \n", 3349 | " \n", 3350 | " \n", 3351 | " \n", 3352 | " \n", 3353 | " \n", 3354 | " \n", 3355 | " \n", 3356 | " \n", 3357 | " \n", 3358 | " \n", 3359 | " \n", 3360 | " \n", 3361 | " \n", 3362 | " \n", 3363 | " \n", 3364 | "
countmeanstdmin25%50%75%max
LOCAL_CUR_SAV_SLOPE17241.00.3294580.1357810.00.3040010.3333330.3581691.0
LOCAL_CUR_MON_AVG_BAL17241.00.0036450.0129580.00.0000070.0004250.0030371.0
LOCAL_OVEONEYR_FF_MON_AVG_BAL17241.00.0129510.0231930.00.0000000.0089910.0170821.0
LOCAL_FIX_MON_AVG_BAL17241.00.0019250.0082660.00.0000000.0013000.0023201.0
LOCAL_FIX_MON_AVG_BAL_PROP17241.00.5878040.4432460.00.0000000.8543960.9904361.0
LOCAL_BELONEYR_FF_SLOPE17241.00.3508140.0884050.00.3444370.3444370.3444371.0
LOCAL_BELONEYR_FF_MON_AVG_BAL17241.00.0002710.0077340.00.0000000.0000000.0000001.0
LOCAL_OVEONEYR_FF_SLOPE17241.00.3505900.0704180.00.3444420.3444420.3450261.0
LOCAL_SAV_SLOPE17241.00.3429970.0883930.00.3441640.3445350.3565391.0
LOCAL_SAV_CUR_ALL_BAL17241.00.0025090.0085240.00.0011310.0017100.0028071.0
LOCAL_SAV_MON_AVG_BAL17241.00.0025620.0085400.00.0011600.0017490.0028611.0
SAV_SLOPE17241.00.3429590.0882830.00.3441620.3445350.3565191.0
SAV_CUR_ALL_BAL17241.00.0025110.0085250.00.0011310.0017130.0028091.0
SAV_MON_AVG_BAL17241.00.0025630.0085410.00.0011610.0017510.0028621.0
FR_SAV_CUR_ALL_BAL17241.00.0004410.0144290.00.0000000.0000000.0000001.0
ASSET_CUR_ALL_BAL17241.00.0026500.0085980.00.0011420.0017810.0029411.0
ASSET_MON_AVG_BAL17241.00.0027010.0086130.00.0011930.0018160.0029881.0
LOCAL_CUR_TRANS_TX_AMT17241.00.0004170.0107610.00.0000000.0000000.0000001.0
LOCAL_CUR_LASTSAV_TX_AMT17241.00.0016280.0138960.00.0000000.0000000.0000001.0
LOCAL_CUR_WITHDRAW_TX_AMT17241.00.0029780.0187740.00.0000000.0000000.0004921.0
LOCAL_FIX_OPEN_ACC_TX_AMT17241.00.0021380.0131180.00.0000000.0000000.0000001.0
LOCAL_FIX_WITHDRAW_TX_AMT17241.00.0012210.0133620.00.0000000.0000000.0000001.0
LOCAL_FIX_CLOSE_ACC_TX_AMT17241.00.0043470.0260100.00.0000000.0000000.0000001.0
L6M_INDFINA_ALL_TX_AMT17241.00.0010790.0192960.00.0000000.0000000.0000001.0
da_Mean17241.00.0159370.0367010.00.0000000.0042920.0171671.0
ovrmou_Mean17241.00.0210530.0491720.00.0000000.0014570.0215921.0
ovrrev_Mean17241.00.0217470.0490160.00.0000000.0016220.0227661.0
vceovr_Mean17241.00.0213270.0486540.00.0000000.0008730.0219761.0
datovr_Mean17241.00.0024700.0211560.00.0000000.0000000.0000001.0
roam_Mean17241.00.0027380.0157070.00.0000000.0000000.0006191.0
...........................
marital_nan17241.00.0177480.1320390.00.0000000.0000000.0000001.0
wrkwoman_Y17241.00.1293430.3355890.00.0000000.0000000.0000001.0
wrkwoman_nan17241.00.8706570.3355890.01.0000001.0000001.0000001.0
proptype_A17241.00.2590920.4381490.00.0000000.0000001.0000001.0
proptype_B17241.00.0153700.1230240.00.0000000.0000000.0000001.0
proptype_D17241.00.0069020.0827940.00.0000000.0000000.0000001.0
proptype_E17241.00.0037700.0612870.00.0000000.0000000.0000001.0
proptype_G17241.00.0008700.0294840.00.0000000.0000000.0000001.0
proptype_M17241.00.0019720.0443650.00.0000000.0000000.0000001.0
proptype_nan17241.00.7120240.4528330.00.0000001.0000001.0000001.0
pcowner_Y17241.00.1874600.3902920.00.0000000.0000000.0000001.0
pcowner_nan17241.00.8125400.3902920.01.0000001.0000001.0000001.0
kid0_2_U17241.00.9436230.2306550.01.0000001.0000001.0000001.0
kid0_2_Y17241.00.0386290.1927140.00.0000000.0000000.0000001.0
kid0_2_nan17241.00.0177480.1320390.00.0000000.0000000.0000001.0
kid3_5_U17241.00.9375910.2419050.01.0000001.0000001.0000001.0
kid3_5_Y17241.00.0446610.2065640.00.0000000.0000000.0000001.0
kid3_5_nan17241.00.0177480.1320390.00.0000000.0000000.0000001.0
kid6_10_U17241.00.8990200.3013110.01.0000001.0000001.0000001.0
kid6_10_Y17241.00.0832320.2762400.00.0000000.0000000.0000001.0
kid6_10_nan17241.00.0177480.1320390.00.0000000.0000000.0000001.0
kid11_15_U17241.00.8919440.3104610.01.0000001.0000001.0000001.0
kid11_15_Y17241.00.0903080.2866310.00.0000000.0000000.0000001.0
kid11_15_nan17241.00.0177480.1320390.00.0000000.0000000.0000001.0
kid16_17_U17241.00.8860850.3177170.01.0000001.0000001.0000001.0
kid16_17_Y17241.00.0961660.2948270.00.0000000.0000000.0000001.0
kid16_17_nan17241.00.0177480.1320390.00.0000000.0000000.0000001.0
car_buy_New17241.00.4220170.4938960.00.0000000.0000001.0000001.0
car_buy_UNKNOWN17241.00.5602340.4963730.00.0000001.0000001.0000001.0
car_buy_nan17241.00.0177480.1320390.00.0000000.0000000.0000001.0
\n", 3365 | "

178 rows × 8 columns

\n", 3366 | "
" 3367 | ], 3368 | "text/plain": [ 3369 | " count mean std min 25% \\\n", 3370 | "LOCAL_CUR_SAV_SLOPE 17241.0 0.329458 0.135781 0.0 0.304001 \n", 3371 | "LOCAL_CUR_MON_AVG_BAL 17241.0 0.003645 0.012958 0.0 0.000007 \n", 3372 | "LOCAL_OVEONEYR_FF_MON_AVG_BAL 17241.0 0.012951 0.023193 0.0 0.000000 \n", 3373 | "LOCAL_FIX_MON_AVG_BAL 17241.0 0.001925 0.008266 0.0 0.000000 \n", 3374 | "LOCAL_FIX_MON_AVG_BAL_PROP 17241.0 0.587804 0.443246 0.0 0.000000 \n", 3375 | "LOCAL_BELONEYR_FF_SLOPE 17241.0 0.350814 0.088405 0.0 0.344437 \n", 3376 | "LOCAL_BELONEYR_FF_MON_AVG_BAL 17241.0 0.000271 0.007734 0.0 0.000000 \n", 3377 | "LOCAL_OVEONEYR_FF_SLOPE 17241.0 0.350590 0.070418 0.0 0.344442 \n", 3378 | "LOCAL_SAV_SLOPE 17241.0 0.342997 0.088393 0.0 0.344164 \n", 3379 | "LOCAL_SAV_CUR_ALL_BAL 17241.0 0.002509 0.008524 0.0 0.001131 \n", 3380 | "LOCAL_SAV_MON_AVG_BAL 17241.0 0.002562 0.008540 0.0 0.001160 \n", 3381 | "SAV_SLOPE 17241.0 0.342959 0.088283 0.0 0.344162 \n", 3382 | "SAV_CUR_ALL_BAL 17241.0 0.002511 0.008525 0.0 0.001131 \n", 3383 | "SAV_MON_AVG_BAL 17241.0 0.002563 0.008541 0.0 0.001161 \n", 3384 | "FR_SAV_CUR_ALL_BAL 17241.0 0.000441 0.014429 0.0 0.000000 \n", 3385 | "ASSET_CUR_ALL_BAL 17241.0 0.002650 0.008598 0.0 0.001142 \n", 3386 | "ASSET_MON_AVG_BAL 17241.0 0.002701 0.008613 0.0 0.001193 \n", 3387 | "LOCAL_CUR_TRANS_TX_AMT 17241.0 0.000417 0.010761 0.0 0.000000 \n", 3388 | "LOCAL_CUR_LASTSAV_TX_AMT 17241.0 0.001628 0.013896 0.0 0.000000 \n", 3389 | "LOCAL_CUR_WITHDRAW_TX_AMT 17241.0 0.002978 0.018774 0.0 0.000000 \n", 3390 | "LOCAL_FIX_OPEN_ACC_TX_AMT 17241.0 0.002138 0.013118 0.0 0.000000 \n", 3391 | "LOCAL_FIX_WITHDRAW_TX_AMT 17241.0 0.001221 0.013362 0.0 0.000000 \n", 3392 | "LOCAL_FIX_CLOSE_ACC_TX_AMT 17241.0 0.004347 0.026010 0.0 0.000000 \n", 3393 | "L6M_INDFINA_ALL_TX_AMT 17241.0 0.001079 0.019296 0.0 0.000000 \n", 3394 | "da_Mean 17241.0 0.015937 0.036701 0.0 0.000000 \n", 3395 | "ovrmou_Mean 17241.0 0.021053 0.049172 0.0 0.000000 \n", 3396 | "ovrrev_Mean 17241.0 0.021747 0.049016 0.0 0.000000 \n", 3397 | "vceovr_Mean 17241.0 0.021327 0.048654 0.0 0.000000 \n", 3398 | "datovr_Mean 17241.0 0.002470 0.021156 0.0 0.000000 \n", 3399 | "roam_Mean 17241.0 0.002738 0.015707 0.0 0.000000 \n", 3400 | "... ... ... ... ... ... \n", 3401 | "marital_nan 17241.0 0.017748 0.132039 0.0 0.000000 \n", 3402 | "wrkwoman_Y 17241.0 0.129343 0.335589 0.0 0.000000 \n", 3403 | "wrkwoman_nan 17241.0 0.870657 0.335589 0.0 1.000000 \n", 3404 | "proptype_A 17241.0 0.259092 0.438149 0.0 0.000000 \n", 3405 | "proptype_B 17241.0 0.015370 0.123024 0.0 0.000000 \n", 3406 | "proptype_D 17241.0 0.006902 0.082794 0.0 0.000000 \n", 3407 | "proptype_E 17241.0 0.003770 0.061287 0.0 0.000000 \n", 3408 | "proptype_G 17241.0 0.000870 0.029484 0.0 0.000000 \n", 3409 | "proptype_M 17241.0 0.001972 0.044365 0.0 0.000000 \n", 3410 | "proptype_nan 17241.0 0.712024 0.452833 0.0 0.000000 \n", 3411 | "pcowner_Y 17241.0 0.187460 0.390292 0.0 0.000000 \n", 3412 | "pcowner_nan 17241.0 0.812540 0.390292 0.0 1.000000 \n", 3413 | "kid0_2_U 17241.0 0.943623 0.230655 0.0 1.000000 \n", 3414 | "kid0_2_Y 17241.0 0.038629 0.192714 0.0 0.000000 \n", 3415 | "kid0_2_nan 17241.0 0.017748 0.132039 0.0 0.000000 \n", 3416 | "kid3_5_U 17241.0 0.937591 0.241905 0.0 1.000000 \n", 3417 | "kid3_5_Y 17241.0 0.044661 0.206564 0.0 0.000000 \n", 3418 | "kid3_5_nan 17241.0 0.017748 0.132039 0.0 0.000000 \n", 3419 | "kid6_10_U 17241.0 0.899020 0.301311 0.0 1.000000 \n", 3420 | "kid6_10_Y 17241.0 0.083232 0.276240 0.0 0.000000 \n", 3421 | "kid6_10_nan 17241.0 0.017748 0.132039 0.0 0.000000 \n", 3422 | "kid11_15_U 17241.0 0.891944 0.310461 0.0 1.000000 \n", 3423 | "kid11_15_Y 17241.0 0.090308 0.286631 0.0 0.000000 \n", 3424 | "kid11_15_nan 17241.0 0.017748 0.132039 0.0 0.000000 \n", 3425 | "kid16_17_U 17241.0 0.886085 0.317717 0.0 1.000000 \n", 3426 | "kid16_17_Y 17241.0 0.096166 0.294827 0.0 0.000000 \n", 3427 | "kid16_17_nan 17241.0 0.017748 0.132039 0.0 0.000000 \n", 3428 | "car_buy_New 17241.0 0.422017 0.493896 0.0 0.000000 \n", 3429 | "car_buy_UNKNOWN 17241.0 0.560234 0.496373 0.0 0.000000 \n", 3430 | "car_buy_nan 17241.0 0.017748 0.132039 0.0 0.000000 \n", 3431 | "\n", 3432 | " 50% 75% max \n", 3433 | "LOCAL_CUR_SAV_SLOPE 0.333333 0.358169 1.0 \n", 3434 | "LOCAL_CUR_MON_AVG_BAL 0.000425 0.003037 1.0 \n", 3435 | "LOCAL_OVEONEYR_FF_MON_AVG_BAL 0.008991 0.017082 1.0 \n", 3436 | "LOCAL_FIX_MON_AVG_BAL 0.001300 0.002320 1.0 \n", 3437 | "LOCAL_FIX_MON_AVG_BAL_PROP 0.854396 0.990436 1.0 \n", 3438 | "LOCAL_BELONEYR_FF_SLOPE 0.344437 0.344437 1.0 \n", 3439 | "LOCAL_BELONEYR_FF_MON_AVG_BAL 0.000000 0.000000 1.0 \n", 3440 | "LOCAL_OVEONEYR_FF_SLOPE 0.344442 0.345026 1.0 \n", 3441 | "LOCAL_SAV_SLOPE 0.344535 0.356539 1.0 \n", 3442 | "LOCAL_SAV_CUR_ALL_BAL 0.001710 0.002807 1.0 \n", 3443 | "LOCAL_SAV_MON_AVG_BAL 0.001749 0.002861 1.0 \n", 3444 | "SAV_SLOPE 0.344535 0.356519 1.0 \n", 3445 | "SAV_CUR_ALL_BAL 0.001713 0.002809 1.0 \n", 3446 | "SAV_MON_AVG_BAL 0.001751 0.002862 1.0 \n", 3447 | "FR_SAV_CUR_ALL_BAL 0.000000 0.000000 1.0 \n", 3448 | "ASSET_CUR_ALL_BAL 0.001781 0.002941 1.0 \n", 3449 | "ASSET_MON_AVG_BAL 0.001816 0.002988 1.0 \n", 3450 | "LOCAL_CUR_TRANS_TX_AMT 0.000000 0.000000 1.0 \n", 3451 | "LOCAL_CUR_LASTSAV_TX_AMT 0.000000 0.000000 1.0 \n", 3452 | "LOCAL_CUR_WITHDRAW_TX_AMT 0.000000 0.000492 1.0 \n", 3453 | "LOCAL_FIX_OPEN_ACC_TX_AMT 0.000000 0.000000 1.0 \n", 3454 | "LOCAL_FIX_WITHDRAW_TX_AMT 0.000000 0.000000 1.0 \n", 3455 | "LOCAL_FIX_CLOSE_ACC_TX_AMT 0.000000 0.000000 1.0 \n", 3456 | "L6M_INDFINA_ALL_TX_AMT 0.000000 0.000000 1.0 \n", 3457 | "da_Mean 0.004292 0.017167 1.0 \n", 3458 | "ovrmou_Mean 0.001457 0.021592 1.0 \n", 3459 | "ovrrev_Mean 0.001622 0.022766 1.0 \n", 3460 | "vceovr_Mean 0.000873 0.021976 1.0 \n", 3461 | "datovr_Mean 0.000000 0.000000 1.0 \n", 3462 | "roam_Mean 0.000000 0.000619 1.0 \n", 3463 | "... ... ... ... \n", 3464 | "marital_nan 0.000000 0.000000 1.0 \n", 3465 | "wrkwoman_Y 0.000000 0.000000 1.0 \n", 3466 | "wrkwoman_nan 1.000000 1.000000 1.0 \n", 3467 | "proptype_A 0.000000 1.000000 1.0 \n", 3468 | "proptype_B 0.000000 0.000000 1.0 \n", 3469 | "proptype_D 0.000000 0.000000 1.0 \n", 3470 | "proptype_E 0.000000 0.000000 1.0 \n", 3471 | "proptype_G 0.000000 0.000000 1.0 \n", 3472 | "proptype_M 0.000000 0.000000 1.0 \n", 3473 | "proptype_nan 1.000000 1.000000 1.0 \n", 3474 | "pcowner_Y 0.000000 0.000000 1.0 \n", 3475 | "pcowner_nan 1.000000 1.000000 1.0 \n", 3476 | "kid0_2_U 1.000000 1.000000 1.0 \n", 3477 | "kid0_2_Y 0.000000 0.000000 1.0 \n", 3478 | "kid0_2_nan 0.000000 0.000000 1.0 \n", 3479 | "kid3_5_U 1.000000 1.000000 1.0 \n", 3480 | "kid3_5_Y 0.000000 0.000000 1.0 \n", 3481 | "kid3_5_nan 0.000000 0.000000 1.0 \n", 3482 | "kid6_10_U 1.000000 1.000000 1.0 \n", 3483 | "kid6_10_Y 0.000000 0.000000 1.0 \n", 3484 | "kid6_10_nan 0.000000 0.000000 1.0 \n", 3485 | "kid11_15_U 1.000000 1.000000 1.0 \n", 3486 | "kid11_15_Y 0.000000 0.000000 1.0 \n", 3487 | "kid11_15_nan 0.000000 0.000000 1.0 \n", 3488 | "kid16_17_U 1.000000 1.000000 1.0 \n", 3489 | "kid16_17_Y 0.000000 0.000000 1.0 \n", 3490 | "kid16_17_nan 0.000000 0.000000 1.0 \n", 3491 | "car_buy_New 0.000000 1.000000 1.0 \n", 3492 | "car_buy_UNKNOWN 1.000000 1.000000 1.0 \n", 3493 | "car_buy_nan 0.000000 0.000000 1.0 \n", 3494 | "\n", 3495 | "[178 rows x 8 columns]" 3496 | ] 3497 | }, 3498 | "execution_count": 27, 3499 | "metadata": {}, 3500 | "output_type": "execute_result" 3501 | } 3502 | ], 3503 | "source": [ 3504 | "model_data_des" 3505 | ] 3506 | }, 3507 | { 3508 | "cell_type": "code", 3509 | "execution_count": null, 3510 | "metadata": { 3511 | "collapsed": true 3512 | }, 3513 | "outputs": [], 3514 | "source": [] 3515 | }, 3516 | { 3517 | "cell_type": "code", 3518 | "execution_count": null, 3519 | "metadata": { 3520 | "collapsed": true 3521 | }, 3522 | "outputs": [], 3523 | "source": [] 3524 | } 3525 | ], 3526 | "metadata": { 3527 | "kernelspec": { 3528 | "display_name": "Python 3", 3529 | "language": "python", 3530 | "name": "python3" 3531 | }, 3532 | "language_info": { 3533 | "codemirror_mode": { 3534 | "name": "ipython", 3535 | "version": 3 3536 | }, 3537 | "file_extension": ".py", 3538 | "mimetype": "text/x-python", 3539 | "name": "python", 3540 | "nbconvert_exporter": "python", 3541 | "pygments_lexer": "ipython3", 3542 | "version": "3.6.3" 3543 | } 3544 | }, 3545 | "nbformat": 4, 3546 | "nbformat_minor": 2 3547 | } 3548 | --------------------------------------------------------------------------------