├── LSTM评分卡.py ├── 决策树规则挖掘.ipynb ├── 常用反欺诈特征.ipynb ├── 异常检测.ipynb ├── 深度学习与金融.ipynb └── 社交网络分析.ipynb /LSTM评分卡.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Tue Oct 15 00:33:10 2019 4 | RNN时序建模 5 | @author: kjc 6 | """ 7 | 8 | import torch 9 | import torch.nn as nn 10 | import random 11 | from sklearn.model_selection import train_test_split 12 | import torchvision.transforms as transforms 13 | import torchvision.datasets as dsets 14 | from torch.autograd import Variable 15 | 16 | random_st = random.choice(range(10000)) 17 | train_images, test_images = train_test_split(train_images,test_size=0.15, 18 | random_state=random_st) 19 | 20 | train_data = MyDataset(train_images) 21 | test_data = MyDataset(test_images) 22 | 23 | train_loader = torch.utils.data.DataLoader(train_data, batch_size=50, 24 | shuffle=True, num_workers=0) 25 | test_loader = torch.utils.data.DataLoader(test_data, batch_size=25, 26 | shuffle=False, num_workers=0) 27 | 28 | #搭建LSTM网络 29 | class Rnn(nn.Module): 30 | def __init__(self, in_dim, hidden_dim, n_layer, n_class): 31 | super(Rnn, self).__init__() 32 | self.n_layer = n_layer 33 | self.hidden_dim = hidden_dim 34 | self.LSTM = nn.LSTM(in_dim, hidden_dim, 35 | n_layer,batch_first=True) 36 | self.linear = nn.Linear(hidden_dim,n_class) 37 | self.sigmoid = nn.Sigmoid() 38 | 39 | def forward(self, x): 40 | x = x.sum(dim = 1) 41 | out, _ = self.LSTM(x) 42 | out = out[:, -1, :] 43 | out = self.linear(out) 44 | out = self.sigmoid(out) 45 | return out 46 | 47 | #指定网络参数。 48 | 49 | #28个特征,42个月切片,2个隐层,2分类 50 | model = Rnn(28,42,2,2) 51 | device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") 52 | model = model.to(device) 53 | #使用二分类对数损失函数 54 | criterion = nn.SoftMarginLoss(reduction='mean') 55 | opt = torch.optim.Adam(model.parameters()) 56 | total_step = len(train_loader) 57 | total_step_test = len(test_loader) 58 | num_epochs = 50 59 | 60 | #训练得到LSTM模型并计算单模型的KS值和AUC值。 61 | for epoch in range(num_epochs): 62 | train_label = [] 63 | train_pred = [] 64 | model.train() 65 | for i, (images, labels) in enumerate(train_loader): 66 | images = images.to(device) 67 | labels = labels.to(device) 68 | #网络训练 69 | out = model(images) 70 | loss = criterion(out, labels) 71 | opt.zero_grad() 72 | loss.backward() 73 | opt.step() 74 | #每一百轮打印一次 75 | if i%100 == 0: 76 | print('train epoch: {}/{}, round: {}/{}, 77 | loss: {}'.format(epoch + 1, num_epochs, 78 | i + 1, total_step, loss)) 79 | #真实标记和预测值 80 | train_label.extend(labels.cpu().numpy().flatten().tolist()) 81 | train_pred.extend(out.detach().cpu().numpy().flatten().tolist()) 82 | #计算真正率和假正率 83 | fpr_lm_train, tpr_lm_train, _ = roc_curve(np.array(train_label), 84 | np.array(train_pred)) 85 | #计算KS和AUC 86 | print('train epoch: {}/{}, KS: {}, ROC: {}'.format( 87 | epoch + 1, num_epochs,abs(fpr_lm_train - tpr_lm_train).max(), 88 | metrics.auc(fpr_lm_train, tpr_lm_train))) 89 | 90 | test_label = [] 91 | test_pred = [] 92 | 93 | model.eval() 94 | #计算测试集上的KS值和AUC值 95 | for i, (images, labels) in enumerate(test_loader): 96 | 97 | images = images.to(device) 98 | labels = labels.to(device) 99 | out = model(images) 100 | loss = criterion(out, labels) 101 | 102 | #计算KS和AUC 103 | if i%100 == 0: 104 | print('test epoch: {}/{}, round: {}/{}, 105 | loss: {}'.format(epoch + 1, num_epochs, 106 | i + 1, total_step_test, loss)) 107 | test_label.extend(labels.cpu().numpy().flatten().tolist()) 108 | test_pred.extend(out.detach().cpu().numpy().flatten().tolist()) 109 | 110 | fpr_lm_test, tpr_lm_test, _ = roc_curve(np.array(test_label), 111 | np.array(test_pred)) 112 | 113 | print('test epoch: {}/{}, KS: {}, ROC: {}'.format( 114 | epoch + 1, num_epochs, 115 | abs(fpr_lm_test - tpr_lm_test).max(), 116 | 117 | -------------------------------------------------------------------------------- /决策树规则挖掘.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import pandas as pd\n", 10 | "import numpy as np\n", 11 | "import os\n", 12 | "os.environ[\"PATH\"] += os.pathsep + 'C:/Program Files (x86)/Graphviz2.38/bin/'" 13 | ] 14 | }, 15 | { 16 | "cell_type": "code", 17 | "execution_count": 2, 18 | "metadata": {}, 19 | "outputs": [ 20 | { 21 | "data": { 22 | "text/html": [ 23 | "
\n", 24 | "\n", 37 | "\n", 38 | " \n", 39 | " \n", 40 | " \n", 41 | " \n", 42 | " \n", 43 | " \n", 44 | " \n", 45 | " \n", 46 | " \n", 47 | " \n", 48 | " \n", 49 | " \n", 50 | " \n", 51 | " \n", 52 | " \n", 53 | " \n", 54 | " \n", 55 | " \n", 56 | " \n", 57 | " \n", 58 | " \n", 59 | " \n", 60 | " \n", 61 | " \n", 62 | " \n", 63 | " \n", 64 | " \n", 65 | " \n", 66 | " \n", 67 | " \n", 68 | " \n", 69 | " \n", 70 | " \n", 71 | " \n", 72 | " \n", 73 | " \n", 74 | " \n", 75 | " \n", 76 | " \n", 77 | " \n", 78 | " \n", 79 | " \n", 80 | " \n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | " \n", 108 | " \n", 109 | " \n", 110 | " \n", 111 | " \n", 112 | " \n", 113 | " \n", 114 | " \n", 115 | " \n", 116 | " \n", 117 | " \n", 118 | " \n", 119 | " \n", 120 | " \n", 121 | " \n", 122 | " \n", 123 | " \n", 124 | " \n", 125 | " \n", 126 | " \n", 127 | " \n", 128 | " \n", 129 | " \n", 130 | " \n", 131 | " \n", 132 | " \n", 133 | " \n", 134 | " \n", 135 | " \n", 136 | " \n", 137 | " \n", 138 | " \n", 139 | " \n", 140 | " \n", 141 | " \n", 142 | " \n", 143 | " \n", 144 | " \n", 145 | " \n", 146 | " \n", 147 | " \n", 148 | " \n", 149 | " \n", 150 | " \n", 151 | " \n", 152 | " \n", 153 | " \n", 154 | " \n", 155 | " \n", 156 | " \n", 157 | " \n", 158 | " \n", 159 | " \n", 160 | " \n", 161 | " \n", 162 | " \n", 163 | " \n", 164 | " \n", 165 | " \n", 166 | " \n", 167 | " \n", 168 | " \n", 169 | " \n", 170 | " \n", 171 | " \n", 172 | " \n", 173 | " \n", 174 | "
uidoil_actv_dtcreate_dttotal_oil_cntpay_amount_totalclass_newbad_indoil_amountdiscount_amountsale_amountamountpay_amountcoupon_amountpayment_coupon_amountchannel_codeoil_codescenesource_appcall_source
0A82177102018-08-192018-08-17137.024147747.2B01653.78880040.0898000.0865540.04327700.00.0100.013203
1A82177102018-08-192018-08-16137.024147747.2B02336.841243522.01268900.01218922.06094610.00.0100.013203
2A82177102018-08-192018-08-15137.024147747.2B0936.03488922.0498900.0480922.02404610.00.0200.012203
3A82177102018-08-192018-08-14137.024147747.2B02418.391263220.01289000.01242220.06211100.00.0300.012203
4A82177102018-08-192018-08-13137.024147747.2B01292.69675220.0689000.0664220.03321100.00.0100.012203
\n", 175 | "
" 176 | ], 177 | "text/plain": [ 178 | " uid oil_actv_dt create_dt total_oil_cnt pay_amount_total class_new \\\n", 179 | "0 A8217710 2018-08-19 2018-08-17 137.0 24147747.2 B \n", 180 | "1 A8217710 2018-08-19 2018-08-16 137.0 24147747.2 B \n", 181 | "2 A8217710 2018-08-19 2018-08-15 137.0 24147747.2 B \n", 182 | "3 A8217710 2018-08-19 2018-08-14 137.0 24147747.2 B \n", 183 | "4 A8217710 2018-08-19 2018-08-13 137.0 24147747.2 B \n", 184 | "\n", 185 | " bad_ind oil_amount discount_amount sale_amount amount pay_amount \\\n", 186 | "0 0 1653.78 880040.0 898000.0 865540.0 4327700.0 \n", 187 | "1 0 2336.84 1243522.0 1268900.0 1218922.0 6094610.0 \n", 188 | "2 0 936.03 488922.0 498900.0 480922.0 2404610.0 \n", 189 | "3 0 2418.39 1263220.0 1289000.0 1242220.0 6211100.0 \n", 190 | "4 0 1292.69 675220.0 689000.0 664220.0 3321100.0 \n", 191 | "\n", 192 | " coupon_amount payment_coupon_amount channel_code oil_code scene \\\n", 193 | "0 0.0 100.0 1 3 2 \n", 194 | "1 0.0 100.0 1 3 2 \n", 195 | "2 0.0 200.0 1 2 2 \n", 196 | "3 0.0 300.0 1 2 2 \n", 197 | "4 0.0 100.0 1 2 2 \n", 198 | "\n", 199 | " source_app call_source \n", 200 | "0 0 3 \n", 201 | "1 0 3 \n", 202 | "2 0 3 \n", 203 | "3 0 3 \n", 204 | "4 0 3 " 205 | ] 206 | }, 207 | "execution_count": 2, 208 | "metadata": {}, 209 | "output_type": "execute_result" 210 | } 211 | ], 212 | "source": [ 213 | "data = pd.read_excel( 'oil_data_for_tree.xlsx')\n", 214 | "data.head()" 215 | ] 216 | }, 217 | { 218 | "cell_type": "code", 219 | "execution_count": 3, 220 | "metadata": {}, 221 | "outputs": [ 222 | { 223 | "data": { 224 | "text/plain": [ 225 | "{'A', 'B', 'C', 'D', 'E', 'F'}" 226 | ] 227 | }, 228 | "execution_count": 3, 229 | "metadata": {}, 230 | "output_type": "execute_result" 231 | } 232 | ], 233 | "source": [ 234 | "set(data.class_new)" 235 | ] 236 | }, 237 | { 238 | "cell_type": "markdown", 239 | "metadata": {}, 240 | "source": [ 241 | "org_lst 不需要做特殊变换,直接去重 \n", 242 | "agg_lst 数值型变量做聚合 \n", 243 | "dstc_lst 文本型变量做cnt " 244 | ] 245 | }, 246 | { 247 | "cell_type": "code", 248 | "execution_count": 14, 249 | "metadata": {}, 250 | "outputs": [], 251 | "source": [ 252 | "org_lst = ['uid','create_dt','oil_actv_dt','class_new','bad_ind']\n", 253 | "agg_lst = ['oil_amount','discount_amount','sale_amount','amount','pay_amount','coupon_amount','payment_coupon_amount']\n", 254 | "dstc_lst = ['channel_code','oil_code','scene','source_app','call_source']" 255 | ] 256 | }, 257 | { 258 | "cell_type": "markdown", 259 | "metadata": {}, 260 | "source": [ 261 | "数据重组" 262 | ] 263 | }, 264 | { 265 | "cell_type": "code", 266 | "execution_count": 15, 267 | "metadata": {}, 268 | "outputs": [], 269 | "source": [ 270 | "df = data[org_lst].copy()\n", 271 | "df[agg_lst] = data[agg_lst].copy()\n", 272 | "df[dstc_lst] = data[dstc_lst].copy()" 273 | ] 274 | }, 275 | { 276 | "cell_type": "markdown", 277 | "metadata": {}, 278 | "source": [ 279 | "看一下缺失情况" 280 | ] 281 | }, 282 | { 283 | "cell_type": "code", 284 | "execution_count": 16, 285 | "metadata": {}, 286 | "outputs": [ 287 | { 288 | "data": { 289 | "text/plain": [ 290 | "uid 0\n", 291 | "create_dt 4944\n", 292 | "oil_actv_dt 0\n", 293 | "class_new 0\n", 294 | "bad_ind 0\n", 295 | "oil_amount 4944\n", 296 | "discount_amount 4944\n", 297 | "sale_amount 4944\n", 298 | "amount 4944\n", 299 | "pay_amount 4944\n", 300 | "coupon_amount 4944\n", 301 | "payment_coupon_amount 4946\n", 302 | "channel_code 0\n", 303 | "oil_code 0\n", 304 | "scene 0\n", 305 | "source_app 0\n", 306 | "call_source 0\n", 307 | "dtype: int64" 308 | ] 309 | }, 310 | "execution_count": 16, 311 | "metadata": {}, 312 | "output_type": "execute_result" 313 | } 314 | ], 315 | "source": [ 316 | "df.isna().sum()" 317 | ] 318 | }, 319 | { 320 | "cell_type": "markdown", 321 | "metadata": {}, 322 | "source": [ 323 | "看一下基础变量的describe" 324 | ] 325 | }, 326 | { 327 | "cell_type": "code", 328 | "execution_count": 17, 329 | "metadata": {}, 330 | "outputs": [ 331 | { 332 | "data": { 333 | "text/html": [ 334 | "
\n", 335 | "\n", 348 | "\n", 349 | " \n", 350 | " \n", 351 | " \n", 352 | " \n", 353 | " \n", 354 | " \n", 355 | " \n", 356 | " \n", 357 | " \n", 358 | " \n", 359 | " \n", 360 | " \n", 361 | " \n", 362 | " \n", 363 | " \n", 364 | " \n", 365 | " \n", 366 | " \n", 367 | " \n", 368 | " \n", 369 | " \n", 370 | " \n", 371 | " \n", 372 | " \n", 373 | " \n", 374 | " \n", 375 | " \n", 376 | " \n", 377 | " \n", 378 | " \n", 379 | " \n", 380 | " \n", 381 | " \n", 382 | " \n", 383 | " \n", 384 | " \n", 385 | " \n", 386 | " \n", 387 | " \n", 388 | " \n", 389 | " \n", 390 | " \n", 391 | " \n", 392 | " \n", 393 | " \n", 394 | " \n", 395 | " \n", 396 | " \n", 397 | " \n", 398 | " \n", 399 | " \n", 400 | " \n", 401 | " \n", 402 | " \n", 403 | " \n", 404 | " \n", 405 | " \n", 406 | " \n", 407 | " \n", 408 | " \n", 409 | " \n", 410 | " \n", 411 | " \n", 412 | " \n", 413 | " \n", 414 | " \n", 415 | " \n", 416 | " \n", 417 | " \n", 418 | " \n", 419 | " \n", 420 | " \n", 421 | " \n", 422 | " \n", 423 | " \n", 424 | " \n", 425 | " \n", 426 | " \n", 427 | " \n", 428 | " \n", 429 | " \n", 430 | " \n", 431 | " \n", 432 | " \n", 433 | " \n", 434 | " \n", 435 | " \n", 436 | " \n", 437 | " \n", 438 | " \n", 439 | " \n", 440 | " \n", 441 | " \n", 442 | " \n", 443 | " \n", 444 | " \n", 445 | " \n", 446 | " \n", 447 | " \n", 448 | " \n", 449 | " \n", 450 | " \n", 451 | " \n", 452 | " \n", 453 | " \n", 454 | " \n", 455 | " \n", 456 | " \n", 457 | " \n", 458 | " \n", 459 | " \n", 460 | " \n", 461 | " \n", 462 | " \n", 463 | " \n", 464 | " \n", 465 | " \n", 466 | " \n", 467 | " \n", 468 | " \n", 469 | " \n", 470 | " \n", 471 | " \n", 472 | " \n", 473 | " \n", 474 | " \n", 475 | " \n", 476 | " \n", 477 | " \n", 478 | " \n", 479 | " \n", 480 | " \n", 481 | " \n", 482 | " \n", 483 | " \n", 484 | " \n", 485 | " \n", 486 | " \n", 487 | " \n", 488 | " \n", 489 | " \n", 490 | " \n", 491 | " \n", 492 | " \n", 493 | " \n", 494 | " \n", 495 | " \n", 496 | " \n", 497 | "
bad_indoil_amountdiscount_amountsale_amountamountpay_amountcoupon_amountpayment_coupon_amountchannel_codeoil_codescenesource_appcall_source
count50609.00000045665.0000004.566500e+044.566500e+044.566500e+044.566500e+0445665.045663.00000050609.00000050609.00000050609.00000050609.00000050609.000000
mean0.017764212.1880541.091035e+051.121195e+051.077312e+055.386562e+050.0417.0553841.4763781.6178941.9065190.3060722.900729
std0.132093200.2981221.010993e+051.031804e+059.953775e+044.976888e+050.0968.2502731.5114703.0741660.3672800.8936820.726231
min0.0000000.0000000.000000e+000.000000e+001.000000e+005.000000e+000.00.0000000.0000000.0000000.0000000.0000000.000000
25%0.00000087.2200004.854000e+045.000000e+044.820000e+042.410000e+050.00.0000001.0000000.0000002.0000000.0000003.000000
50%0.000000167.5800008.820000e+049.000000e+048.709600e+044.354800e+050.0100.0000001.0000000.0000002.0000000.0000003.000000
75%0.000000278.3000001.391600e+051.430000e+051.371150e+056.855750e+050.0500.0000001.0000000.0000002.0000000.0000003.000000
max1.0000003975.9100001.958040e+061.998000e+061.925540e+069.627700e+060.050000.0000006.0000009.0000002.0000003.0000004.000000
\n", 498 | "
" 499 | ], 500 | "text/plain": [ 501 | " bad_ind oil_amount discount_amount sale_amount \\\n", 502 | "count 50609.000000 45665.000000 4.566500e+04 4.566500e+04 \n", 503 | "mean 0.017764 212.188054 1.091035e+05 1.121195e+05 \n", 504 | "std 0.132093 200.298122 1.010993e+05 1.031804e+05 \n", 505 | "min 0.000000 0.000000 0.000000e+00 0.000000e+00 \n", 506 | "25% 0.000000 87.220000 4.854000e+04 5.000000e+04 \n", 507 | "50% 0.000000 167.580000 8.820000e+04 9.000000e+04 \n", 508 | "75% 0.000000 278.300000 1.391600e+05 1.430000e+05 \n", 509 | "max 1.000000 3975.910000 1.958040e+06 1.998000e+06 \n", 510 | "\n", 511 | " amount pay_amount coupon_amount payment_coupon_amount \\\n", 512 | "count 4.566500e+04 4.566500e+04 45665.0 45663.000000 \n", 513 | "mean 1.077312e+05 5.386562e+05 0.0 417.055384 \n", 514 | "std 9.953775e+04 4.976888e+05 0.0 968.250273 \n", 515 | "min 1.000000e+00 5.000000e+00 0.0 0.000000 \n", 516 | "25% 4.820000e+04 2.410000e+05 0.0 0.000000 \n", 517 | "50% 8.709600e+04 4.354800e+05 0.0 100.000000 \n", 518 | "75% 1.371150e+05 6.855750e+05 0.0 500.000000 \n", 519 | "max 1.925540e+06 9.627700e+06 0.0 50000.000000 \n", 520 | "\n", 521 | " channel_code oil_code scene source_app call_source \n", 522 | "count 50609.000000 50609.000000 50609.000000 50609.000000 50609.000000 \n", 523 | "mean 1.476378 1.617894 1.906519 0.306072 2.900729 \n", 524 | "std 1.511470 3.074166 0.367280 0.893682 0.726231 \n", 525 | "min 0.000000 0.000000 0.000000 0.000000 0.000000 \n", 526 | "25% 1.000000 0.000000 2.000000 0.000000 3.000000 \n", 527 | "50% 1.000000 0.000000 2.000000 0.000000 3.000000 \n", 528 | "75% 1.000000 0.000000 2.000000 0.000000 3.000000 \n", 529 | "max 6.000000 9.000000 2.000000 3.000000 4.000000 " 530 | ] 531 | }, 532 | "execution_count": 17, 533 | "metadata": {}, 534 | "output_type": "execute_result" 535 | } 536 | ], 537 | "source": [ 538 | "df.describe()" 539 | ] 540 | }, 541 | { 542 | "cell_type": "markdown", 543 | "metadata": {}, 544 | "source": [ 545 | "对creat_dt做补全,用oil_actv_dt来填补,并且截取6个月的数据。 \n", 546 | "构造变量的时候不能直接对历史所有数据做累加。 \n", 547 | "否则随着时间推移,变量分布会有很大的变化。" 548 | ] 549 | }, 550 | { 551 | "cell_type": "code", 552 | "execution_count": 18, 553 | "metadata": {}, 554 | "outputs": [ 555 | { 556 | "data": { 557 | "text/html": [ 558 | "
\n", 559 | "\n", 572 | "\n", 573 | " \n", 574 | " \n", 575 | " \n", 576 | " \n", 577 | " \n", 578 | " \n", 579 | " \n", 580 | " \n", 581 | " \n", 582 | " \n", 583 | " \n", 584 | " \n", 585 | " \n", 586 | " \n", 587 | " \n", 588 | " \n", 589 | " \n", 590 | " \n", 591 | " \n", 592 | " \n", 593 | " \n", 594 | " \n", 595 | " \n", 596 | " \n", 597 | " \n", 598 | " \n", 599 | " \n", 600 | " \n", 601 | " \n", 602 | " \n", 603 | " \n", 604 | " \n", 605 | " \n", 606 | " \n", 607 | " \n", 608 | " \n", 609 | " \n", 610 | " \n", 611 | " \n", 612 | " \n", 613 | " \n", 614 | " \n", 615 | " \n", 616 | " \n", 617 | " \n", 618 | " \n", 619 | " \n", 620 | " \n", 621 | " \n", 622 | " \n", 623 | " \n", 624 | " \n", 625 | " \n", 626 | " \n", 627 | " \n", 628 | " \n", 629 | " \n", 630 | " \n", 631 | " \n", 632 | " \n", 633 | " \n", 634 | " \n", 635 | " \n", 636 | " \n", 637 | " \n", 638 | " \n", 639 | " \n", 640 | " \n", 641 | " \n", 642 | " \n", 643 | " \n", 644 | " \n", 645 | " \n", 646 | " \n", 647 | " \n", 648 | " \n", 649 | " \n", 650 | " \n", 651 | " \n", 652 | " \n", 653 | " \n", 654 | " \n", 655 | " \n", 656 | " \n", 657 | " \n", 658 | " \n", 659 | " \n", 660 | " \n", 661 | " \n", 662 | " \n", 663 | " \n", 664 | " \n", 665 | " \n", 666 | " \n", 667 | " \n", 668 | " \n", 669 | " \n", 670 | " \n", 671 | " \n", 672 | " \n", 673 | " \n", 674 | " \n", 675 | " \n", 676 | " \n", 677 | " \n", 678 | " \n", 679 | " \n", 680 | " \n", 681 | " \n", 682 | " \n", 683 | " \n", 684 | " \n", 685 | " \n", 686 | " \n", 687 | " \n", 688 | " \n", 689 | " \n", 690 | " \n", 691 | " \n", 692 | " \n", 693 | " \n", 694 | " \n", 695 | " \n", 696 | " \n", 697 | " \n", 698 | " \n", 699 | " \n", 700 | " \n", 701 | " \n", 702 | " \n", 703 | "
uidcreate_dtoil_actv_dtclass_newbad_indoil_amountdiscount_amountsale_amountamountpay_amountcoupon_amountpayment_coupon_amountchannel_codeoil_codescenesource_appcall_sourcedtn
50608B964363919850357032018-10-082018-10-08B0NaNNaNNaNNaNNaNNaNNaN692340
50607B964363919846933972018-10-112018-10-11E0NaNNaNNaNNaNNaNNaNNaN692340
50606B964363919772174682018-10-172018-10-17B0NaNNaNNaNNaNNaNNaNNaN692340
50605B964363919764808922018-09-282018-09-28B0NaNNaNNaNNaNNaNNaNNaN692340
50604B964363919721060432018-10-192018-10-19A0NaNNaNNaNNaNNaNNaNNaN692340
\n", 704 | "
" 705 | ], 706 | "text/plain": [ 707 | " uid create_dt oil_actv_dt class_new bad_ind \\\n", 708 | "50608 B96436391985035703 2018-10-08 2018-10-08 B 0 \n", 709 | "50607 B96436391984693397 2018-10-11 2018-10-11 E 0 \n", 710 | "50606 B96436391977217468 2018-10-17 2018-10-17 B 0 \n", 711 | "50605 B96436391976480892 2018-09-28 2018-09-28 B 0 \n", 712 | "50604 B96436391972106043 2018-10-19 2018-10-19 A 0 \n", 713 | "\n", 714 | " oil_amount discount_amount sale_amount amount pay_amount \\\n", 715 | "50608 NaN NaN NaN NaN NaN \n", 716 | "50607 NaN NaN NaN NaN NaN \n", 717 | "50606 NaN NaN NaN NaN NaN \n", 718 | "50605 NaN NaN NaN NaN NaN \n", 719 | "50604 NaN NaN NaN NaN NaN \n", 720 | "\n", 721 | " coupon_amount payment_coupon_amount channel_code oil_code scene \\\n", 722 | "50608 NaN NaN 6 9 2 \n", 723 | "50607 NaN NaN 6 9 2 \n", 724 | "50606 NaN NaN 6 9 2 \n", 725 | "50605 NaN NaN 6 9 2 \n", 726 | "50604 NaN NaN 6 9 2 \n", 727 | "\n", 728 | " source_app call_source dtn \n", 729 | "50608 3 4 0 \n", 730 | "50607 3 4 0 \n", 731 | "50606 3 4 0 \n", 732 | "50605 3 4 0 \n", 733 | "50604 3 4 0 " 734 | ] 735 | }, 736 | "execution_count": 18, 737 | "metadata": {}, 738 | "output_type": "execute_result" 739 | } 740 | ], 741 | "source": [ 742 | "def time_isna(x,y):\n", 743 | " if str(x) == 'NaT':\n", 744 | " x = y\n", 745 | " else:\n", 746 | " x = x\n", 747 | " return x\n", 748 | "df2 = df.sort_values(['uid','create_dt'],ascending = False)\n", 749 | "df2['create_dt'] = df2.apply(lambda x: time_isna(x.create_dt,x.oil_actv_dt),axis = 1)\n", 750 | "df2['dtn'] = (df2.oil_actv_dt - df2.create_dt).apply(lambda x :x.days)\n", 751 | "df = df2[df2['dtn']<180]\n", 752 | "df.head()" 753 | ] 754 | }, 755 | { 756 | "cell_type": "markdown", 757 | "metadata": {}, 758 | "source": [ 759 | "对org_list变量求历史贷款天数的最大间隔,并且去重" 760 | ] 761 | }, 762 | { 763 | "cell_type": "code", 764 | "execution_count": 28, 765 | "metadata": {}, 766 | "outputs": [ 767 | { 768 | "name": "stderr", 769 | "output_type": "stream", 770 | "text": [ 771 | "C:\\ProgramData\\Anaconda3\\lib\\site-packages\\ipykernel_launcher.py:2: SettingWithCopyWarning: \n", 772 | "A value is trying to be set on a copy of a slice from a DataFrame.\n", 773 | "Try using .loc[row_indexer,col_indexer] = value instead\n", 774 | "\n", 775 | "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n", 776 | " \n" 777 | ] 778 | }, 779 | { 780 | "data": { 781 | "text/plain": [ 782 | "(11099, 6)" 783 | ] 784 | }, 785 | "execution_count": 28, 786 | "metadata": {}, 787 | "output_type": "execute_result" 788 | } 789 | ], 790 | "source": [ 791 | "base = df[org_lst]\n", 792 | "base['dtn'] = df['dtn']\n", 793 | "base = base.sort_values(['uid','create_dt'],ascending = False)\n", 794 | "base = base.drop_duplicates(['uid'],keep = 'first')\n", 795 | "base.shape" 796 | ] 797 | }, 798 | { 799 | "cell_type": "markdown", 800 | "metadata": {}, 801 | "source": [ 802 | "做变量衍生" 803 | ] 804 | }, 805 | { 806 | "cell_type": "code", 807 | "execution_count": 20, 808 | "metadata": {}, 809 | "outputs": [ 810 | { 811 | "name": "stderr", 812 | "output_type": "stream", 813 | "text": [ 814 | "C:\\ProgramData\\Anaconda3\\lib\\site-packages\\ipykernel_launcher.py:21: RuntimeWarning: Mean of empty slice\n", 815 | "C:\\ProgramData\\Anaconda3\\lib\\site-packages\\ipykernel_launcher.py:27: RuntimeWarning: All-NaN axis encountered\n", 816 | "C:\\ProgramData\\Anaconda3\\lib\\site-packages\\ipykernel_launcher.py:33: RuntimeWarning: All-NaN axis encountered\n", 817 | "C:\\ProgramData\\Anaconda3\\lib\\site-packages\\ipykernel_launcher.py:39: RuntimeWarning: Degrees of freedom <= 0 for slice.\n", 818 | "C:\\ProgramData\\Anaconda3\\lib\\site-packages\\ipykernel_launcher.py:45: RuntimeWarning: All-NaN axis encountered\n", 819 | "C:\\ProgramData\\Anaconda3\\lib\\site-packages\\ipykernel_launcher.py:51: RuntimeWarning: Mean of empty slice\n", 820 | "C:\\ProgramData\\Anaconda3\\lib\\site-packages\\ipykernel_launcher.py:51: RuntimeWarning: Degrees of freedom <= 0 for slice.\n" 821 | ] 822 | } 823 | ], 824 | "source": [ 825 | "gn = pd.DataFrame()\n", 826 | "for i in agg_lst:\n", 827 | " tp = pd.DataFrame(df.groupby('uid').apply(lambda df:len(df[i])).reset_index())\n", 828 | " tp.columns = ['uid',i + '_cnt']\n", 829 | " if gn.empty == True:\n", 830 | " gn = tp\n", 831 | " else:\n", 832 | " gn = pd.merge(gn,tp,on = 'uid',how = 'left')\n", 833 | " tp = pd.DataFrame(df.groupby('uid').apply(lambda df:np.where(df[i]>0,1,0).sum()).reset_index())\n", 834 | " tp.columns = ['uid',i + '_num']\n", 835 | " if gn.empty == True:\n", 836 | " gn = tp\n", 837 | " else:\n", 838 | " gn = pd.merge(gn,tp,on = 'uid',how = 'left')\n", 839 | " tp = pd.DataFrame(df.groupby('uid').apply(lambda df:np.nansum(df[i])).reset_index())\n", 840 | " tp.columns = ['uid',i + '_tot']\n", 841 | " if gn.empty == True:\n", 842 | " gn = tp\n", 843 | " else:\n", 844 | " gn = pd.merge(gn,tp,on = 'uid',how = 'left')\n", 845 | " tp = pd.DataFrame(df.groupby('uid').apply(lambda df:np.nanmean(df[i])).reset_index())\n", 846 | " tp.columns = ['uid',i + '_avg']\n", 847 | " if gn.empty == True:\n", 848 | " gn = tp\n", 849 | " else:\n", 850 | " gn = pd.merge(gn,tp,on = 'uid',how = 'left')\n", 851 | " tp = pd.DataFrame(df.groupby('uid').apply(lambda df:np.nanmax(df[i])).reset_index())\n", 852 | " tp.columns = ['uid',i + '_max']\n", 853 | " if gn.empty == True:\n", 854 | " gn = tp\n", 855 | " else:\n", 856 | " gn = pd.merge(gn,tp,on = 'uid',how = 'left')\n", 857 | " tp = pd.DataFrame(df.groupby('uid').apply(lambda df:np.nanmin(df[i])).reset_index())\n", 858 | " tp.columns = ['uid',i + '_min']\n", 859 | " if gn.empty == True:\n", 860 | " gn = tp\n", 861 | " else:\n", 862 | " gn = pd.merge(gn,tp,on = 'uid',how = 'left')\n", 863 | " tp = pd.DataFrame(df.groupby('uid').apply(lambda df:np.nanvar(df[i])).reset_index())\n", 864 | " tp.columns = ['uid',i + '_var']\n", 865 | " if gn.empty == True:\n", 866 | " gn = tp\n", 867 | " else:\n", 868 | " gn = pd.merge(gn,tp,on = 'uid',how = 'left')\n", 869 | " tp = pd.DataFrame(df.groupby('uid').apply(lambda df:np.nanmax(df[i]) -np.nanmin(df[i]) ).reset_index())\n", 870 | " tp.columns = ['uid',i + '_var']\n", 871 | " if gn.empty == True:\n", 872 | " gn = tp\n", 873 | " else:\n", 874 | " gn = pd.merge(gn,tp,on = 'uid',how = 'left')\n", 875 | " tp = pd.DataFrame(df.groupby('uid').apply(lambda df:np.nanmean(df[i])/max(np.nanvar(df[i]),1)).reset_index())\n", 876 | " tp.columns = ['uid',i + '_var']\n", 877 | " if gn.empty == True:\n", 878 | " gn = tp\n", 879 | " else:\n", 880 | " gn = pd.merge(gn,tp,on = 'uid',how = 'left')" 881 | ] 882 | }, 883 | { 884 | "cell_type": "markdown", 885 | "metadata": {}, 886 | "source": [ 887 | "对dstc_lst变量求distinct个数" 888 | ] 889 | }, 890 | { 891 | "cell_type": "code", 892 | "execution_count": 22, 893 | "metadata": {}, 894 | "outputs": [], 895 | "source": [ 896 | "gc = pd.DataFrame()\n", 897 | "for i in dstc_lst:\n", 898 | " tp = pd.DataFrame(df.groupby('uid').apply(lambda df: len(set(df[i]))).reset_index())\n", 899 | " tp.columns = ['uid',i + '_dstc']\n", 900 | " if gc.empty == True:\n", 901 | " gc = tp\n", 902 | " else:\n", 903 | " gc = pd.merge(gc,tp,on = 'uid',how = 'left')" 904 | ] 905 | }, 906 | { 907 | "cell_type": "markdown", 908 | "metadata": {}, 909 | "source": [ 910 | "将变量组合在一起" 911 | ] 912 | }, 913 | { 914 | "cell_type": "code", 915 | "execution_count": 29, 916 | "metadata": {}, 917 | "outputs": [ 918 | { 919 | "data": { 920 | "text/plain": [ 921 | "(11099, 74)" 922 | ] 923 | }, 924 | "execution_count": 29, 925 | "metadata": {}, 926 | "output_type": "execute_result" 927 | } 928 | ], 929 | "source": [ 930 | "fn = pd.merge(base,gn,on= 'uid')\n", 931 | "fn = pd.merge(fn,gc,on= 'uid') \n", 932 | "fn.shape" 933 | ] 934 | }, 935 | { 936 | "cell_type": "code", 937 | "execution_count": 35, 938 | "metadata": {}, 939 | "outputs": [], 940 | "source": [ 941 | "fn = fn.fillna(0)" 942 | ] 943 | }, 944 | { 945 | "cell_type": "code", 946 | "execution_count": 36, 947 | "metadata": {}, 948 | "outputs": [ 949 | { 950 | "data": { 951 | "text/html": [ 952 | "
\n", 953 | "\n", 966 | "\n", 967 | " \n", 968 | " \n", 969 | " \n", 970 | " \n", 971 | " \n", 972 | " \n", 973 | " \n", 974 | " \n", 975 | " \n", 976 | " \n", 977 | " \n", 978 | " \n", 979 | " \n", 980 | " \n", 981 | " \n", 982 | " \n", 983 | " \n", 984 | " \n", 985 | " \n", 986 | " \n", 987 | " \n", 988 | " \n", 989 | " \n", 990 | " \n", 991 | " \n", 992 | " \n", 993 | " \n", 994 | " \n", 995 | " \n", 996 | " \n", 997 | " \n", 998 | " \n", 999 | " \n", 1000 | " \n", 1001 | " \n", 1002 | " \n", 1003 | " \n", 1004 | " \n", 1005 | " \n", 1006 | " \n", 1007 | " \n", 1008 | " \n", 1009 | " \n", 1010 | " \n", 1011 | " \n", 1012 | " \n", 1013 | " \n", 1014 | " \n", 1015 | " \n", 1016 | " \n", 1017 | " \n", 1018 | " \n", 1019 | " \n", 1020 | " \n", 1021 | " \n", 1022 | " \n", 1023 | " \n", 1024 | " \n", 1025 | " \n", 1026 | " \n", 1027 | " \n", 1028 | " \n", 1029 | " \n", 1030 | " \n", 1031 | " \n", 1032 | " \n", 1033 | " \n", 1034 | " \n", 1035 | " \n", 1036 | " \n", 1037 | " \n", 1038 | " \n", 1039 | " \n", 1040 | " \n", 1041 | " \n", 1042 | " \n", 1043 | " \n", 1044 | " \n", 1045 | " \n", 1046 | " \n", 1047 | " \n", 1048 | " \n", 1049 | " \n", 1050 | " \n", 1051 | " \n", 1052 | " \n", 1053 | " \n", 1054 | " \n", 1055 | " \n", 1056 | " \n", 1057 | " \n", 1058 | " \n", 1059 | " \n", 1060 | " \n", 1061 | " \n", 1062 | " \n", 1063 | " \n", 1064 | " \n", 1065 | " \n", 1066 | " \n", 1067 | " \n", 1068 | " \n", 1069 | " \n", 1070 | " \n", 1071 | " \n", 1072 | " \n", 1073 | " \n", 1074 | " \n", 1075 | " \n", 1076 | " \n", 1077 | " \n", 1078 | " \n", 1079 | " \n", 1080 | " \n", 1081 | " \n", 1082 | " \n", 1083 | " \n", 1084 | " \n", 1085 | " \n", 1086 | " \n", 1087 | " \n", 1088 | " \n", 1089 | " \n", 1090 | " \n", 1091 | " \n", 1092 | " \n", 1093 | " \n", 1094 | " \n", 1095 | " \n", 1096 | " \n", 1097 | " \n", 1098 | " \n", 1099 | " \n", 1100 | " \n", 1101 | " \n", 1102 | " \n", 1103 | " \n", 1104 | " \n", 1105 | " \n", 1106 | " \n", 1107 | " \n", 1108 | " \n", 1109 | " \n", 1110 | " \n", 1111 | " \n", 1112 | " \n", 1113 | " \n", 1114 | " \n", 1115 | " \n", 1116 | " \n", 1117 | " \n", 1118 | " \n", 1119 | " \n", 1120 | " \n", 1121 | " \n", 1122 | " \n", 1123 | " \n", 1124 | " \n", 1125 | " \n", 1126 | " \n", 1127 | " \n", 1128 | " \n", 1129 | " \n", 1130 | " \n", 1131 | " \n", 1132 | " \n", 1133 | " \n", 1134 | " \n", 1135 | " \n", 1136 | " \n", 1137 | " \n", 1138 | " \n", 1139 | " \n", 1140 | " \n", 1141 | " \n", 1142 | " \n", 1143 | " \n", 1144 | " \n", 1145 | " \n", 1146 | " \n", 1147 | " \n", 1148 | " \n", 1149 | " \n", 1150 | " \n", 1151 | " \n", 1152 | " \n", 1153 | " \n", 1154 | " \n", 1155 | " \n", 1156 | " \n", 1157 | " \n", 1158 | " \n", 1159 | " \n", 1160 | " \n", 1161 | " \n", 1162 | " \n", 1163 | " \n", 1164 | " \n", 1165 | " \n", 1166 | " \n", 1167 | " \n", 1168 | " \n", 1169 | " \n", 1170 | " \n", 1171 | " \n", 1172 | " \n", 1173 | " \n", 1174 | " \n", 1175 | " \n", 1176 | " \n", 1177 | " \n", 1178 | " \n", 1179 | " \n", 1180 | " \n", 1181 | " \n", 1182 | " \n", 1183 | " \n", 1184 | " \n", 1185 | " \n", 1186 | " \n", 1187 | " \n", 1188 | " \n", 1189 | " \n", 1190 | " \n", 1191 | " \n", 1192 | " \n", 1193 | " \n", 1194 | " \n", 1195 | " \n", 1196 | " \n", 1197 | " \n", 1198 | " \n", 1199 | " \n", 1200 | " \n", 1201 | " \n", 1202 | " \n", 1203 | " \n", 1204 | " \n", 1205 | " \n", 1206 | " \n", 1207 | " \n", 1208 | " \n", 1209 | " \n", 1210 | " \n", 1211 | " \n", 1212 | " \n", 1213 | " \n", 1214 | " \n", 1215 | " \n", 1216 | " \n", 1217 | " \n", 1218 | " \n", 1219 | " \n", 1220 | " \n", 1221 | " \n", 1222 | " \n", 1223 | " \n", 1224 | " \n", 1225 | " \n", 1226 | " \n", 1227 | " \n", 1228 | " \n", 1229 | " \n", 1230 | " \n", 1231 | " \n", 1232 | " \n", 1233 | " \n", 1234 | " \n", 1235 | " \n", 1236 | " \n", 1237 | " \n", 1238 | " \n", 1239 | " \n", 1240 | " \n", 1241 | " \n", 1242 | " \n", 1243 | " \n", 1244 | " \n", 1245 | " \n", 1246 | " \n", 1247 | " \n", 1248 | " \n", 1249 | " \n", 1250 | " \n", 1251 | " \n", 1252 | " \n", 1253 | " \n", 1254 | " \n", 1255 | " \n", 1256 | " \n", 1257 | " \n", 1258 | " \n", 1259 | " \n", 1260 | " \n", 1261 | " \n", 1262 | " \n", 1263 | " \n", 1264 | " \n", 1265 | " \n", 1266 | " \n", 1267 | " \n", 1268 | " \n", 1269 | " \n", 1270 | " \n", 1271 | " \n", 1272 | " \n", 1273 | " \n", 1274 | " \n", 1275 | " \n", 1276 | " \n", 1277 | " \n", 1278 | " \n", 1279 | " \n", 1280 | " \n", 1281 | " \n", 1282 | " \n", 1283 | " \n", 1284 | " \n", 1285 | " \n", 1286 | " \n", 1287 | " \n", 1288 | " \n", 1289 | " \n", 1290 | " \n", 1291 | " \n", 1292 | " \n", 1293 | " \n", 1294 | " \n", 1295 | " \n", 1296 | " \n", 1297 | " \n", 1298 | " \n", 1299 | " \n", 1300 | " \n", 1301 | " \n", 1302 | " \n", 1303 | " \n", 1304 | " \n", 1305 | " \n", 1306 | " \n", 1307 | " \n", 1308 | " \n", 1309 | " \n", 1310 | " \n", 1311 | " \n", 1312 | " \n", 1313 | " \n", 1314 | " \n", 1315 | " \n", 1316 | " \n", 1317 | " \n", 1318 | " \n", 1319 | " \n", 1320 | " \n", 1321 | " \n", 1322 | " \n", 1323 | " \n", 1324 | " \n", 1325 | " \n", 1326 | " \n", 1327 | " \n", 1328 | " \n", 1329 | " \n", 1330 | " \n", 1331 | " \n", 1332 | " \n", 1333 | " \n", 1334 | " \n", 1335 | " \n", 1336 | " \n", 1337 | " \n", 1338 | " \n", 1339 | " \n", 1340 | " \n", 1341 | " \n", 1342 | " \n", 1343 | " \n", 1344 | " \n", 1345 | " \n", 1346 | " \n", 1347 | " \n", 1348 | " \n", 1349 | " \n", 1350 | " \n", 1351 | " \n", 1352 | " \n", 1353 | " \n", 1354 | " \n", 1355 | " \n", 1356 | " \n", 1357 | " \n", 1358 | " \n", 1359 | " \n", 1360 | " \n", 1361 | " \n", 1362 | " \n", 1363 | " \n", 1364 | " \n", 1365 | " \n", 1366 | " \n", 1367 | " \n", 1368 | " \n", 1369 | " \n", 1370 | " \n", 1371 | " \n", 1372 | " \n", 1373 | " \n", 1374 | " \n", 1375 | " \n", 1376 | " \n", 1377 | " \n", 1378 | " \n", 1379 | " \n", 1380 | " \n", 1381 | " \n", 1382 | " \n", 1383 | " \n", 1384 | " \n", 1385 | " \n", 1386 | " \n", 1387 | " \n", 1388 | " \n", 1389 | " \n", 1390 | " \n", 1391 | " \n", 1392 | " \n", 1393 | " \n", 1394 | " \n", 1395 | " \n", 1396 | " \n", 1397 | " \n", 1398 | " \n", 1399 | " \n", 1400 | " \n", 1401 | " \n", 1402 | " \n", 1403 | " \n", 1404 | " \n", 1405 | " \n", 1406 | " \n", 1407 | " \n", 1408 | " \n", 1409 | " \n", 1410 | " \n", 1411 | " \n", 1412 | " \n", 1413 | " \n", 1414 | " \n", 1415 | " \n", 1416 | " \n", 1417 | " \n", 1418 | " \n", 1419 | " \n", 1420 | " \n", 1421 | " \n", 1422 | " \n", 1423 | " \n", 1424 | " \n", 1425 | " \n", 1426 | " \n", 1427 | " \n", 1428 | " \n", 1429 | " \n", 1430 | " \n", 1431 | " \n", 1432 | " \n", 1433 | " \n", 1434 | " \n", 1435 | " \n", 1436 | " \n", 1437 | " \n", 1438 | " \n", 1439 | " \n", 1440 | " \n", 1441 | " \n", 1442 | " \n", 1443 | " \n", 1444 | " \n", 1445 | " \n", 1446 | " \n", 1447 | " \n", 1448 | " \n", 1449 | " \n", 1450 | " \n", 1451 | " \n", 1452 | " \n", 1453 | " \n", 1454 | " \n", 1455 | " \n", 1456 | " \n", 1457 | " \n", 1458 | " \n", 1459 | " \n", 1460 | " \n", 1461 | " \n", 1462 | " \n", 1463 | " \n", 1464 | " \n", 1465 | " \n", 1466 | " \n", 1467 | " \n", 1468 | " \n", 1469 | " \n", 1470 | " \n", 1471 | " \n", 1472 | " \n", 1473 | " \n", 1474 | " \n", 1475 | " \n", 1476 | " \n", 1477 | " \n", 1478 | " \n", 1479 | " \n", 1480 | " \n", 1481 | " \n", 1482 | " \n", 1483 | " \n", 1484 | " \n", 1485 | " \n", 1486 | " \n", 1487 | " \n", 1488 | " \n", 1489 | " \n", 1490 | " \n", 1491 | " \n", 1492 | " \n", 1493 | " \n", 1494 | " \n", 1495 | " \n", 1496 | " \n", 1497 | " \n", 1498 | " \n", 1499 | " \n", 1500 | " \n", 1501 | " \n", 1502 | " \n", 1503 | " \n", 1504 | " \n", 1505 | " \n", 1506 | " \n", 1507 | " \n", 1508 | " \n", 1509 | " \n", 1510 | " \n", 1511 | " \n", 1512 | " \n", 1513 | " \n", 1514 | " \n", 1515 | " \n", 1516 | " \n", 1517 | " \n", 1518 | " \n", 1519 | " \n", 1520 | " \n", 1521 | " \n", 1522 | " \n", 1523 | " \n", 1524 | " \n", 1525 | " \n", 1526 | " \n", 1527 | " \n", 1528 | " \n", 1529 | " \n", 1530 | " \n", 1531 | " \n", 1532 | " \n", 1533 | " \n", 1534 | " \n", 1535 | " \n", 1536 | " \n", 1537 | " \n", 1538 | " \n", 1539 | " \n", 1540 | " \n", 1541 | " \n", 1542 | " \n", 1543 | " \n", 1544 | " \n", 1545 | " \n", 1546 | " \n", 1547 | " \n", 1548 | " \n", 1549 | " \n", 1550 | " \n", 1551 | " \n", 1552 | " \n", 1553 | " \n", 1554 | " \n", 1555 | " \n", 1556 | " \n", 1557 | " \n", 1558 | " \n", 1559 | " \n", 1560 | " \n", 1561 | " \n", 1562 | " \n", 1563 | " \n", 1564 | " \n", 1565 | " \n", 1566 | " \n", 1567 | " \n", 1568 | " \n", 1569 | " \n", 1570 | " \n", 1571 | " \n", 1572 | " \n", 1573 | " \n", 1574 | " \n", 1575 | " \n", 1576 | " \n", 1577 | " \n", 1578 | " \n", 1579 | " \n", 1580 | " \n", 1581 | " \n", 1582 | " \n", 1583 | " \n", 1584 | " \n", 1585 | " \n", 1586 | " \n", 1587 | " \n", 1588 | " \n", 1589 | " \n", 1590 | " \n", 1591 | " \n", 1592 | " \n", 1593 | " \n", 1594 | " \n", 1595 | " \n", 1596 | " \n", 1597 | " \n", 1598 | " \n", 1599 | " \n", 1600 | " \n", 1601 | " \n", 1602 | " \n", 1603 | " \n", 1604 | " \n", 1605 | " \n", 1606 | " \n", 1607 | " \n", 1608 | " \n", 1609 | " \n", 1610 | " \n", 1611 | " \n", 1612 | " \n", 1613 | " \n", 1614 | " \n", 1615 | " \n", 1616 | " \n", 1617 | " \n", 1618 | " \n", 1619 | " \n", 1620 | " \n", 1621 | " \n", 1622 | " \n", 1623 | " \n", 1624 | " \n", 1625 | " \n", 1626 | " \n", 1627 | " \n", 1628 | " \n", 1629 | " \n", 1630 | " \n", 1631 | " \n", 1632 | " \n", 1633 | " \n", 1634 | " \n", 1635 | " \n", 1636 | " \n", 1637 | " \n", 1638 | " \n", 1639 | " \n", 1640 | " \n", 1641 | " \n", 1642 | " \n", 1643 | " \n", 1644 | " \n", 1645 | " \n", 1646 | " \n", 1647 | " \n", 1648 | " \n", 1649 | " \n", 1650 | " \n", 1651 | " \n", 1652 | " \n", 1653 | " \n", 1654 | " \n", 1655 | " \n", 1656 | " \n", 1657 | " \n", 1658 | " \n", 1659 | " \n", 1660 | " \n", 1661 | " \n", 1662 | " \n", 1663 | " \n", 1664 | " \n", 1665 | " \n", 1666 | " \n", 1667 | " \n", 1668 | " \n", 1669 | " \n", 1670 | " \n", 1671 | " \n", 1672 | " \n", 1673 | " \n", 1674 | " \n", 1675 | " \n", 1676 | " \n", 1677 | " \n", 1678 | " \n", 1679 | " \n", 1680 | " \n", 1681 | " \n", 1682 | " \n", 1683 | " \n", 1684 | " \n", 1685 | " \n", 1686 | " \n", 1687 | " \n", 1688 | " \n", 1689 | " \n", 1690 | " \n", 1691 | " \n", 1692 | " \n", 1693 | " \n", 1694 | " \n", 1695 | " \n", 1696 | " \n", 1697 | " \n", 1698 | " \n", 1699 | " \n", 1700 | " \n", 1701 | " \n", 1702 | " \n", 1703 | " \n", 1704 | " \n", 1705 | " \n", 1706 | " \n", 1707 | " \n", 1708 | " \n", 1709 | " \n", 1710 | " \n", 1711 | " \n", 1712 | " \n", 1713 | " \n", 1714 | " \n", 1715 | " \n", 1716 | " \n", 1717 | " \n", 1718 | " \n", 1719 | " \n", 1720 | " \n", 1721 | " \n", 1722 | " \n", 1723 | " \n", 1724 | " \n", 1725 | " \n", 1726 | " \n", 1727 | " \n", 1728 | " \n", 1729 | " \n", 1730 | " \n", 1731 | " \n", 1732 | " \n", 1733 | " \n", 1734 | " \n", 1735 | " \n", 1736 | " \n", 1737 | " \n", 1738 | " \n", 1739 | " \n", 1740 | " \n", 1741 | " \n", 1742 | " \n", 1743 | " \n", 1744 | " \n", 1745 | " \n", 1746 | " \n", 1747 | " \n", 1748 | " \n", 1749 | " \n", 1750 | " \n", 1751 | " \n", 1752 | " \n", 1753 | " \n", 1754 | " \n", 1755 | " \n", 1756 | " \n", 1757 | " \n", 1758 | " \n", 1759 | " \n", 1760 | " \n", 1761 | " \n", 1762 | " \n", 1763 | " \n", 1764 | " \n", 1765 | " \n", 1766 | " \n", 1767 | " \n", 1768 | " \n", 1769 | " \n", 1770 | " \n", 1771 | " \n", 1772 | " \n", 1773 | " \n", 1774 | " \n", 1775 | " \n", 1776 | " \n", 1777 | " \n", 1778 | " \n", 1779 | " \n", 1780 | " \n", 1781 | " \n", 1782 | " \n", 1783 | " \n", 1784 | " \n", 1785 | " \n", 1786 | " \n", 1787 | " \n", 1788 | " \n", 1789 | " \n", 1790 | " \n", 1791 | " \n", 1792 | " \n", 1793 | " \n", 1794 | " \n", 1795 | " \n", 1796 | " \n", 1797 | " \n", 1798 | " \n", 1799 | " \n", 1800 | " \n", 1801 | " \n", 1802 | " \n", 1803 | " \n", 1804 | " \n", 1805 | " \n", 1806 | " \n", 1807 | " \n", 1808 | " \n", 1809 | " \n", 1810 | " \n", 1811 | " \n", 1812 | " \n", 1813 | " \n", 1814 | " \n", 1815 | " \n", 1816 | " \n", 1817 | " \n", 1818 | " \n", 1819 | " \n", 1820 | " \n", 1821 | " \n", 1822 | " \n", 1823 | " \n", 1824 | " \n", 1825 | " \n", 1826 | " \n", 1827 | " \n", 1828 | " \n", 1829 | " \n", 1830 | " \n", 1831 | " \n", 1832 | " \n", 1833 | " \n", 1834 | " \n", 1835 | " \n", 1836 | " \n", 1837 | " \n", 1838 | " \n", 1839 | " \n", 1840 | " \n", 1841 | " \n", 1842 | " \n", 1843 | " \n", 1844 | " \n", 1845 | " \n", 1846 | " \n", 1847 | " \n", 1848 | " \n", 1849 | " \n", 1850 | " \n", 1851 | " \n", 1852 | " \n", 1853 | " \n", 1854 | " \n", 1855 | " \n", 1856 | " \n", 1857 | " \n", 1858 | " \n", 1859 | " \n", 1860 | " \n", 1861 | " \n", 1862 | " \n", 1863 | " \n", 1864 | " \n", 1865 | " \n", 1866 | " \n", 1867 | " \n", 1868 | " \n", 1869 | " \n", 1870 | " \n", 1871 | " \n", 1872 | " \n", 1873 | " \n", 1874 | " \n", 1875 | " \n", 1876 | " \n", 1877 | " \n", 1878 | " \n", 1879 | " \n", 1880 | " \n", 1881 | " \n", 1882 | " \n", 1883 | " \n", 1884 | " \n", 1885 | " \n", 1886 | " \n", 1887 | " \n", 1888 | " \n", 1889 | " \n", 1890 | " \n", 1891 | " \n", 1892 | " \n", 1893 | " \n", 1894 | " \n", 1895 | " \n", 1896 | " \n", 1897 | " \n", 1898 | " \n", 1899 | " \n", 1900 | " \n", 1901 | " \n", 1902 | " \n", 1903 | " \n", 1904 | " \n", 1905 | " \n", 1906 | " \n", 1907 | " \n", 1908 | " \n", 1909 | " \n", 1910 | " \n", 1911 | " \n", 1912 | " \n", 1913 | " \n", 1914 | " \n", 1915 | " \n", 1916 | " \n", 1917 | " \n", 1918 | " \n", 1919 | " \n", 1920 | " \n", 1921 | " \n", 1922 | " \n", 1923 | " \n", 1924 | " \n", 1925 | " \n", 1926 | " \n", 1927 | " \n", 1928 | " \n", 1929 | " \n", 1930 | " \n", 1931 | " \n", 1932 | " \n", 1933 | " \n", 1934 | " \n", 1935 | " \n", 1936 | " \n", 1937 | " \n", 1938 | " \n", 1939 | " \n", 1940 | " \n", 1941 | " \n", 1942 | " \n", 1943 | " \n", 1944 | " \n", 1945 | " \n", 1946 | " \n", 1947 | " \n", 1948 | " \n", 1949 | " \n", 1950 | " \n", 1951 | " \n", 1952 | " \n", 1953 | " \n", 1954 | " \n", 1955 | " \n", 1956 | " \n", 1957 | " \n", 1958 | " \n", 1959 | " \n", 1960 | " \n", 1961 | " \n", 1962 | " \n", 1963 | " \n", 1964 | " \n", 1965 | " \n", 1966 | " \n", 1967 | " \n", 1968 | " \n", 1969 | " \n", 1970 | " \n", 1971 | " \n", 1972 | " \n", 1973 | " \n", 1974 | " \n", 1975 | " \n", 1976 | " \n", 1977 | " \n", 1978 | " \n", 1979 | " \n", 1980 | " \n", 1981 | " \n", 1982 | " \n", 1983 | " \n", 1984 | " \n", 1985 | " \n", 1986 | " \n", 1987 | " \n", 1988 | " \n", 1989 | " \n", 1990 | " \n", 1991 | " \n", 1992 | " \n", 1993 | " \n", 1994 | " \n", 1995 | " \n", 1996 | " \n", 1997 | " \n", 1998 | " \n", 1999 | " \n", 2000 | " \n", 2001 | " \n", 2002 | " \n", 2003 | " \n", 2004 | " \n", 2005 | " \n", 2006 | " \n", 2007 | " \n", 2008 | " \n", 2009 | " \n", 2010 | " \n", 2011 | " \n", 2012 | " \n", 2013 | " \n", 2014 | " \n", 2015 | " \n", 2016 | " \n", 2017 | " \n", 2018 | " \n", 2019 | " \n", 2020 | " \n", 2021 | " \n", 2022 | " \n", 2023 | " \n", 2024 | " \n", 2025 | " \n", 2026 | " \n", 2027 | " \n", 2028 | " \n", 2029 | " \n", 2030 | " \n", 2031 | " \n", 2032 | " \n", 2033 | " \n", 2034 | " \n", 2035 | " \n", 2036 | " \n", 2037 | " \n", 2038 | " \n", 2039 | " \n", 2040 | " \n", 2041 | " \n", 2042 | " \n", 2043 | " \n", 2044 | " \n", 2045 | " \n", 2046 | " \n", 2047 | " \n", 2048 | " \n", 2049 | " \n", 2050 | " \n", 2051 | " \n", 2052 | " \n", 2053 | " \n", 2054 | " \n", 2055 | " \n", 2056 | " \n", 2057 | " \n", 2058 | " \n", 2059 | " \n", 2060 | " \n", 2061 | " \n", 2062 | " \n", 2063 | " \n", 2064 | " \n", 2065 | " \n", 2066 | " \n", 2067 | " \n", 2068 | " \n", 2069 | " \n", 2070 | " \n", 2071 | " \n", 2072 | " \n", 2073 | " \n", 2074 | " \n", 2075 | " \n", 2076 | " \n", 2077 | " \n", 2078 | " \n", 2079 | " \n", 2080 | " \n", 2081 | " \n", 2082 | " \n", 2083 | " \n", 2084 | " \n", 2085 | " \n", 2086 | " \n", 2087 | " \n", 2088 | " \n", 2089 | " \n", 2090 | " \n", 2091 | " \n", 2092 | " \n", 2093 | " \n", 2094 | " \n", 2095 | " \n", 2096 | " \n", 2097 | " \n", 2098 | " \n", 2099 | " \n", 2100 | " \n", 2101 | " \n", 2102 | " \n", 2103 | " \n", 2104 | " \n", 2105 | " \n", 2106 | " \n", 2107 | " \n", 2108 | " \n", 2109 | " \n", 2110 | " \n", 2111 | " \n", 2112 | " \n", 2113 | " \n", 2114 | " \n", 2115 | " \n", 2116 | " \n", 2117 | " \n", 2118 | " \n", 2119 | " \n", 2120 | " \n", 2121 | " \n", 2122 | " \n", 2123 | " \n", 2124 | " \n", 2125 | " \n", 2126 | " \n", 2127 | " \n", 2128 | " \n", 2129 | " \n", 2130 | " \n", 2131 | " \n", 2132 | " \n", 2133 | " \n", 2134 | " \n", 2135 | " \n", 2136 | " \n", 2137 | " \n", 2138 | " \n", 2139 | " \n", 2140 | " \n", 2141 | " \n", 2142 | " \n", 2143 | " \n", 2144 | " \n", 2145 | " \n", 2146 | " \n", 2147 | " \n", 2148 | " \n", 2149 | " \n", 2150 | " \n", 2151 | " \n", 2152 | " \n", 2153 | " \n", 2154 | " \n", 2155 | " \n", 2156 | " \n", 2157 | " \n", 2158 | " \n", 2159 | " \n", 2160 | " \n", 2161 | " \n", 2162 | " \n", 2163 | " \n", 2164 | " \n", 2165 | " \n", 2166 | " \n", 2167 | " \n", 2168 | " \n", 2169 | " \n", 2170 | " \n", 2171 | " \n", 2172 | " \n", 2173 | " \n", 2174 | " \n", 2175 | " \n", 2176 | " \n", 2177 | " \n", 2178 | " \n", 2179 | " \n", 2180 | " \n", 2181 | " \n", 2182 | " \n", 2183 | " \n", 2184 | " \n", 2185 | " \n", 2186 | " \n", 2187 | " \n", 2188 | " \n", 2189 | " \n", 2190 | " \n", 2191 | " \n", 2192 | " \n", 2193 | " \n", 2194 | " \n", 2195 | " \n", 2196 | " \n", 2197 | " \n", 2198 | " \n", 2199 | " \n", 2200 | " \n", 2201 | " \n", 2202 | " \n", 2203 | " \n", 2204 | " \n", 2205 | " \n", 2206 | " \n", 2207 | " \n", 2208 | " \n", 2209 | " \n", 2210 | " \n", 2211 | " \n", 2212 | " \n", 2213 | " \n", 2214 | " \n", 2215 | " \n", 2216 | " \n", 2217 | " \n", 2218 | " \n", 2219 | " \n", 2220 | " \n", 2221 | " \n", 2222 | " \n", 2223 | " \n", 2224 | " \n", 2225 | " \n", 2226 | " \n", 2227 | " \n", 2228 | " \n", 2229 | " \n", 2230 | " \n", 2231 | " \n", 2232 | " \n", 2233 | " \n", 2234 | " \n", 2235 | " \n", 2236 | " \n", 2237 | " \n", 2238 | " \n", 2239 | " \n", 2240 | " \n", 2241 | " \n", 2242 | " \n", 2243 | " \n", 2244 | " \n", 2245 | " \n", 2246 | " \n", 2247 | " \n", 2248 | " \n", 2249 | " \n", 2250 | " \n", 2251 | " \n", 2252 | " \n", 2253 | " \n", 2254 | " \n", 2255 | " \n", 2256 | " \n", 2257 | " \n", 2258 | " \n", 2259 | " \n", 2260 | " \n", 2261 | " \n", 2262 | " \n", 2263 | " \n", 2264 | " \n", 2265 | " \n", 2266 | " \n", 2267 | " \n", 2268 | " \n", 2269 | " \n", 2270 | " \n", 2271 | " \n", 2272 | " \n", 2273 | " \n", 2274 | " \n", 2275 | " \n", 2276 | " \n", 2277 | " \n", 2278 | " \n", 2279 | " \n", 2280 | " \n", 2281 | " \n", 2282 | " \n", 2283 | " \n", 2284 | " \n", 2285 | " \n", 2286 | " \n", 2287 | " \n", 2288 | " \n", 2289 | " \n", 2290 | " \n", 2291 | " \n", 2292 | " \n", 2293 | " \n", 2294 | " \n", 2295 | " \n", 2296 | " \n", 2297 | " \n", 2298 | " \n", 2299 | " \n", 2300 | " \n", 2301 | " \n", 2302 | " \n", 2303 | " \n", 2304 | " \n", 2305 | " \n", 2306 | " \n", 2307 | " \n", 2308 | " \n", 2309 | " \n", 2310 | " \n", 2311 | " \n", 2312 | " \n", 2313 | " \n", 2314 | " \n", 2315 | " \n", 2316 | " \n", 2317 | " \n", 2318 | " \n", 2319 | " \n", 2320 | " \n", 2321 | " \n", 2322 | " \n", 2323 | " \n", 2324 | " \n", 2325 | " \n", 2326 | " \n", 2327 | " \n", 2328 | " \n", 2329 | " \n", 2330 | " \n", 2331 | " \n", 2332 | " \n", 2333 | " \n", 2334 | " \n", 2335 | " \n", 2336 | " \n", 2337 | " \n", 2338 | " \n", 2339 | " \n", 2340 | " \n", 2341 | " \n", 2342 | " \n", 2343 | " \n", 2344 | " \n", 2345 | " \n", 2346 | " \n", 2347 | " \n", 2348 | " \n", 2349 | " \n", 2350 | " \n", 2351 | " \n", 2352 | " \n", 2353 | " \n", 2354 | " \n", 2355 | " \n", 2356 | " \n", 2357 | " \n", 2358 | " \n", 2359 | " \n", 2360 | " \n", 2361 | " \n", 2362 | " \n", 2363 | " \n", 2364 | " \n", 2365 | " \n", 2366 | " \n", 2367 | " \n", 2368 | " \n", 2369 | " \n", 2370 | " \n", 2371 | " \n", 2372 | " \n", 2373 | " \n", 2374 | " \n", 2375 | " \n", 2376 | " \n", 2377 | " \n", 2378 | " \n", 2379 | " \n", 2380 | " \n", 2381 | " \n", 2382 | " \n", 2383 | " \n", 2384 | " \n", 2385 | " \n", 2386 | " \n", 2387 | " \n", 2388 | " \n", 2389 | " \n", 2390 | " \n", 2391 | " \n", 2392 | " \n", 2393 | " \n", 2394 | " \n", 2395 | " \n", 2396 | " \n", 2397 | " \n", 2398 | " \n", 2399 | " \n", 2400 | " \n", 2401 | " \n", 2402 | " \n", 2403 | " \n", 2404 | " \n", 2405 | " \n", 2406 | " \n", 2407 | " \n", 2408 | " \n", 2409 | " \n", 2410 | " \n", 2411 | " \n", 2412 | " \n", 2413 | " \n", 2414 | " \n", 2415 | " \n", 2416 | " \n", 2417 | " \n", 2418 | " \n", 2419 | " \n", 2420 | " \n", 2421 | " \n", 2422 | " \n", 2423 | " \n", 2424 | " \n", 2425 | " \n", 2426 | " \n", 2427 | " \n", 2428 | " \n", 2429 | " \n", 2430 | " \n", 2431 | " \n", 2432 | " \n", 2433 | " \n", 2434 | " \n", 2435 | " \n", 2436 | " \n", 2437 | " \n", 2438 | " \n", 2439 | " \n", 2440 | " \n", 2441 | " \n", 2442 | " \n", 2443 | " \n", 2444 | " \n", 2445 | " \n", 2446 | " \n", 2447 | " \n", 2448 | " \n", 2449 | " \n", 2450 | " \n", 2451 | " \n", 2452 | " \n", 2453 | " \n", 2454 | " \n", 2455 | " \n", 2456 | " \n", 2457 | " \n", 2458 | " \n", 2459 | "
uidcreate_dtoil_actv_dtclass_newbad_inddtnoil_amount_cntoil_amount_numoil_amount_totoil_amount_avg...payment_coupon_amount_maxpayment_coupon_amount_minpayment_coupon_amount_var_xpayment_coupon_amount_var_ypayment_coupon_amount_varchannel_code_dstcoil_code_dstcscene_dstcsource_app_dstccall_source_dstc
0B964363919850357032018-10-082018-10-08B00100.000.00...0.00.00.00.00.011111
1B964363919846933972018-10-112018-10-11E00100.000.00...0.00.00.00.00.011111
2B964363919772174682018-10-172018-10-17B00100.000.00...0.00.00.00.00.011111
3B964363919764808922018-09-282018-09-28B00100.000.00...0.00.00.00.00.011111
4B964363919721060432018-10-192018-10-19A00100.000.00...0.00.00.00.00.011111
5B964363919644893552018-09-292018-09-29C00100.000.00...0.00.00.00.00.011111
6B964363919421997112018-10-182018-10-18E00100.000.00...0.00.00.00.00.011111
7B964363919415306902018-09-292018-09-29B00100.000.00...0.00.00.00.00.011111
8B964363918998810512018-10-102018-10-10A00100.000.00...0.00.00.00.00.011111
9B964363918895632112018-09-252018-09-25E00100.000.00...0.00.00.00.00.011111
10B964363918644234582018-10-272018-10-27D00100.000.00...0.00.00.00.00.011111
11B964363918617837452018-09-272018-09-27E00100.000.00...0.00.00.00.00.011111
12B964363918514652872018-10-192018-10-19D00100.000.00...0.00.00.00.00.011111
13B964363918122227122018-10-122018-10-12B00100.000.00...0.00.00.00.00.011111
14B964363917617781172018-10-112018-10-11C00100.000.00...0.00.00.00.00.011111
15B964363917588361592018-09-292018-09-29E00100.000.00...0.00.00.00.00.011111
16B964363917330148912018-06-262018-06-26E00100.000.00...0.00.00.00.00.011111
17B964363916795803422018-10-102018-10-10D00100.000.00...0.00.00.00.00.011111
18B964363916351233852018-09-192018-09-19D00100.000.00...0.00.00.00.00.011111
19B964363916071453272018-10-202018-10-20B00100.000.00...0.00.00.00.00.011111
20B964363916019434732018-10-252018-10-25D00100.000.00...0.00.00.00.00.011111
21B964363915946956972018-09-242018-09-24F00100.000.00...0.00.00.00.00.011111
22B964363915930551782018-09-282018-09-28D00100.000.00...0.00.00.00.00.011111
23B964363915841196012018-10-102018-10-10D00100.000.00...0.00.00.00.00.011111
24B964363915588209682018-10-162018-10-16D00100.000.00...0.00.00.00.00.011111
25B964363915460064532018-10-262018-10-26B00100.000.00...0.00.00.00.00.011111
26B964363915448189202018-10-062018-10-06C00100.000.00...0.00.00.00.00.011111
27B964363915435114432018-10-242018-10-24C00100.000.00...0.00.00.00.00.011111
28B964363914772006312018-10-052018-10-05B00100.000.00...0.00.00.00.00.011111
29B964363914743107272018-10-192018-10-19F00100.000.00...0.00.00.00.00.011111
..................................................................
70B964363905076025212018-10-292018-10-29C00100.000.00...0.00.00.00.00.011111
71B964363904975970082018-09-242018-09-24E00100.000.00...0.00.00.00.00.011111
72B964363904952359322018-10-032018-10-03D00100.000.00...0.00.00.00.00.011111
73B964363904877778322018-09-222018-09-22D00100.000.00...0.00.00.00.00.011111
74B964363904807773862018-09-162018-09-16F00100.000.00...0.00.00.00.00.011111
75B964363904790741522018-10-152018-10-15C00100.000.00...0.00.00.00.00.011111
76B964363904753334492018-10-162018-10-16C00100.000.00...0.00.00.00.00.011111
77B964363904545805562018-09-262018-09-26D00100.000.00...0.00.00.00.00.011111
78B964363904278923162018-10-112018-10-11D00100.000.00...0.00.00.00.00.011111
79B964363904256848332018-09-272018-09-27D00100.000.00...0.00.00.00.00.011111
80B964363904158601382018-10-282018-10-28F00100.000.00...0.00.00.00.00.011111
81B964363904146972172018-10-112018-10-11D00100.000.00...0.00.00.00.00.011111
82B964363904137161572018-10-302018-10-30A00100.000.00...0.00.00.00.00.011111
83B963805880675882032018-07-202018-07-20E00100.000.00...0.00.00.00.00.011111
84B963805880548696252018-10-292018-10-29C00100.000.00...0.00.00.00.00.011111
85B963805880507984442018-10-212018-10-21F00100.000.00...0.00.00.00.00.011111
86B963805880296328822018-10-262018-10-26B00100.000.00...0.00.00.00.00.011111
87B963152737592942632018-10-312018-10-31B00100.000.00...0.00.00.00.00.011111
88B963152737481632472018-10-142018-10-14E00100.000.00...0.00.00.00.00.011111
89B963152737474686402018-09-132018-10-18D03511201.68201.68...300.0300.00.00.0300.011111
90B963152737371579512018-09-242018-09-24B00100.000.00...0.00.00.00.00.011111
91B963152737325817022018-10-162018-10-16B00100.000.00...0.00.00.00.00.011111
92B962681917493707312018-09-282018-09-28B00100.000.00...0.00.00.00.00.011111
93B962681917461042922018-09-272018-09-27C00100.000.00...0.00.00.00.00.011111
94B962681917350403742018-10-242018-10-24E00100.000.00...0.00.00.00.00.011111
95B961173703323551902018-10-192018-10-19B00100.000.00...0.00.00.00.00.011111
96B961173703301016582018-10-122018-10-12B00100.000.00...0.00.00.00.00.011111
97B961173703300663472018-10-012018-10-01D00100.000.00...0.00.00.00.00.011111
98B961173703287243502018-09-202018-09-20C00100.000.00...0.00.00.00.00.011111
99B961173703211590332018-10-082018-10-08D00100.000.00...0.00.00.00.00.011111
\n", 2460 | "

100 rows × 74 columns

\n", 2461 | "
" 2462 | ], 2463 | "text/plain": [ 2464 | " uid create_dt oil_actv_dt class_new bad_ind dtn \\\n", 2465 | "0 B96436391985035703 2018-10-08 2018-10-08 B 0 0 \n", 2466 | "1 B96436391984693397 2018-10-11 2018-10-11 E 0 0 \n", 2467 | "2 B96436391977217468 2018-10-17 2018-10-17 B 0 0 \n", 2468 | "3 B96436391976480892 2018-09-28 2018-09-28 B 0 0 \n", 2469 | "4 B96436391972106043 2018-10-19 2018-10-19 A 0 0 \n", 2470 | "5 B96436391964489355 2018-09-29 2018-09-29 C 0 0 \n", 2471 | "6 B96436391942199711 2018-10-18 2018-10-18 E 0 0 \n", 2472 | "7 B96436391941530690 2018-09-29 2018-09-29 B 0 0 \n", 2473 | "8 B96436391899881051 2018-10-10 2018-10-10 A 0 0 \n", 2474 | "9 B96436391889563211 2018-09-25 2018-09-25 E 0 0 \n", 2475 | "10 B96436391864423458 2018-10-27 2018-10-27 D 0 0 \n", 2476 | "11 B96436391861783745 2018-09-27 2018-09-27 E 0 0 \n", 2477 | "12 B96436391851465287 2018-10-19 2018-10-19 D 0 0 \n", 2478 | "13 B96436391812222712 2018-10-12 2018-10-12 B 0 0 \n", 2479 | "14 B96436391761778117 2018-10-11 2018-10-11 C 0 0 \n", 2480 | "15 B96436391758836159 2018-09-29 2018-09-29 E 0 0 \n", 2481 | "16 B96436391733014891 2018-06-26 2018-06-26 E 0 0 \n", 2482 | "17 B96436391679580342 2018-10-10 2018-10-10 D 0 0 \n", 2483 | "18 B96436391635123385 2018-09-19 2018-09-19 D 0 0 \n", 2484 | "19 B96436391607145327 2018-10-20 2018-10-20 B 0 0 \n", 2485 | "20 B96436391601943473 2018-10-25 2018-10-25 D 0 0 \n", 2486 | "21 B96436391594695697 2018-09-24 2018-09-24 F 0 0 \n", 2487 | "22 B96436391593055178 2018-09-28 2018-09-28 D 0 0 \n", 2488 | "23 B96436391584119601 2018-10-10 2018-10-10 D 0 0 \n", 2489 | "24 B96436391558820968 2018-10-16 2018-10-16 D 0 0 \n", 2490 | "25 B96436391546006453 2018-10-26 2018-10-26 B 0 0 \n", 2491 | "26 B96436391544818920 2018-10-06 2018-10-06 C 0 0 \n", 2492 | "27 B96436391543511443 2018-10-24 2018-10-24 C 0 0 \n", 2493 | "28 B96436391477200631 2018-10-05 2018-10-05 B 0 0 \n", 2494 | "29 B96436391474310727 2018-10-19 2018-10-19 F 0 0 \n", 2495 | ".. ... ... ... ... ... ... \n", 2496 | "70 B96436390507602521 2018-10-29 2018-10-29 C 0 0 \n", 2497 | "71 B96436390497597008 2018-09-24 2018-09-24 E 0 0 \n", 2498 | "72 B96436390495235932 2018-10-03 2018-10-03 D 0 0 \n", 2499 | "73 B96436390487777832 2018-09-22 2018-09-22 D 0 0 \n", 2500 | "74 B96436390480777386 2018-09-16 2018-09-16 F 0 0 \n", 2501 | "75 B96436390479074152 2018-10-15 2018-10-15 C 0 0 \n", 2502 | "76 B96436390475333449 2018-10-16 2018-10-16 C 0 0 \n", 2503 | "77 B96436390454580556 2018-09-26 2018-09-26 D 0 0 \n", 2504 | "78 B96436390427892316 2018-10-11 2018-10-11 D 0 0 \n", 2505 | "79 B96436390425684833 2018-09-27 2018-09-27 D 0 0 \n", 2506 | "80 B96436390415860138 2018-10-28 2018-10-28 F 0 0 \n", 2507 | "81 B96436390414697217 2018-10-11 2018-10-11 D 0 0 \n", 2508 | "82 B96436390413716157 2018-10-30 2018-10-30 A 0 0 \n", 2509 | "83 B96380588067588203 2018-07-20 2018-07-20 E 0 0 \n", 2510 | "84 B96380588054869625 2018-10-29 2018-10-29 C 0 0 \n", 2511 | "85 B96380588050798444 2018-10-21 2018-10-21 F 0 0 \n", 2512 | "86 B96380588029632882 2018-10-26 2018-10-26 B 0 0 \n", 2513 | "87 B96315273759294263 2018-10-31 2018-10-31 B 0 0 \n", 2514 | "88 B96315273748163247 2018-10-14 2018-10-14 E 0 0 \n", 2515 | "89 B96315273747468640 2018-09-13 2018-10-18 D 0 35 \n", 2516 | "90 B96315273737157951 2018-09-24 2018-09-24 B 0 0 \n", 2517 | "91 B96315273732581702 2018-10-16 2018-10-16 B 0 0 \n", 2518 | "92 B96268191749370731 2018-09-28 2018-09-28 B 0 0 \n", 2519 | "93 B96268191746104292 2018-09-27 2018-09-27 C 0 0 \n", 2520 | "94 B96268191735040374 2018-10-24 2018-10-24 E 0 0 \n", 2521 | "95 B96117370332355190 2018-10-19 2018-10-19 B 0 0 \n", 2522 | "96 B96117370330101658 2018-10-12 2018-10-12 B 0 0 \n", 2523 | "97 B96117370330066347 2018-10-01 2018-10-01 D 0 0 \n", 2524 | "98 B96117370328724350 2018-09-20 2018-09-20 C 0 0 \n", 2525 | "99 B96117370321159033 2018-10-08 2018-10-08 D 0 0 \n", 2526 | "\n", 2527 | " oil_amount_cnt oil_amount_num oil_amount_tot oil_amount_avg \\\n", 2528 | "0 1 0 0.00 0.00 \n", 2529 | "1 1 0 0.00 0.00 \n", 2530 | "2 1 0 0.00 0.00 \n", 2531 | "3 1 0 0.00 0.00 \n", 2532 | "4 1 0 0.00 0.00 \n", 2533 | "5 1 0 0.00 0.00 \n", 2534 | "6 1 0 0.00 0.00 \n", 2535 | "7 1 0 0.00 0.00 \n", 2536 | "8 1 0 0.00 0.00 \n", 2537 | "9 1 0 0.00 0.00 \n", 2538 | "10 1 0 0.00 0.00 \n", 2539 | "11 1 0 0.00 0.00 \n", 2540 | "12 1 0 0.00 0.00 \n", 2541 | "13 1 0 0.00 0.00 \n", 2542 | "14 1 0 0.00 0.00 \n", 2543 | "15 1 0 0.00 0.00 \n", 2544 | "16 1 0 0.00 0.00 \n", 2545 | "17 1 0 0.00 0.00 \n", 2546 | "18 1 0 0.00 0.00 \n", 2547 | "19 1 0 0.00 0.00 \n", 2548 | "20 1 0 0.00 0.00 \n", 2549 | "21 1 0 0.00 0.00 \n", 2550 | "22 1 0 0.00 0.00 \n", 2551 | "23 1 0 0.00 0.00 \n", 2552 | "24 1 0 0.00 0.00 \n", 2553 | "25 1 0 0.00 0.00 \n", 2554 | "26 1 0 0.00 0.00 \n", 2555 | "27 1 0 0.00 0.00 \n", 2556 | "28 1 0 0.00 0.00 \n", 2557 | "29 1 0 0.00 0.00 \n", 2558 | ".. ... ... ... ... \n", 2559 | "70 1 0 0.00 0.00 \n", 2560 | "71 1 0 0.00 0.00 \n", 2561 | "72 1 0 0.00 0.00 \n", 2562 | "73 1 0 0.00 0.00 \n", 2563 | "74 1 0 0.00 0.00 \n", 2564 | "75 1 0 0.00 0.00 \n", 2565 | "76 1 0 0.00 0.00 \n", 2566 | "77 1 0 0.00 0.00 \n", 2567 | "78 1 0 0.00 0.00 \n", 2568 | "79 1 0 0.00 0.00 \n", 2569 | "80 1 0 0.00 0.00 \n", 2570 | "81 1 0 0.00 0.00 \n", 2571 | "82 1 0 0.00 0.00 \n", 2572 | "83 1 0 0.00 0.00 \n", 2573 | "84 1 0 0.00 0.00 \n", 2574 | "85 1 0 0.00 0.00 \n", 2575 | "86 1 0 0.00 0.00 \n", 2576 | "87 1 0 0.00 0.00 \n", 2577 | "88 1 0 0.00 0.00 \n", 2578 | "89 1 1 201.68 201.68 \n", 2579 | "90 1 0 0.00 0.00 \n", 2580 | "91 1 0 0.00 0.00 \n", 2581 | "92 1 0 0.00 0.00 \n", 2582 | "93 1 0 0.00 0.00 \n", 2583 | "94 1 0 0.00 0.00 \n", 2584 | "95 1 0 0.00 0.00 \n", 2585 | "96 1 0 0.00 0.00 \n", 2586 | "97 1 0 0.00 0.00 \n", 2587 | "98 1 0 0.00 0.00 \n", 2588 | "99 1 0 0.00 0.00 \n", 2589 | "\n", 2590 | " ... payment_coupon_amount_max payment_coupon_amount_min \\\n", 2591 | "0 ... 0.0 0.0 \n", 2592 | "1 ... 0.0 0.0 \n", 2593 | "2 ... 0.0 0.0 \n", 2594 | "3 ... 0.0 0.0 \n", 2595 | "4 ... 0.0 0.0 \n", 2596 | "5 ... 0.0 0.0 \n", 2597 | "6 ... 0.0 0.0 \n", 2598 | "7 ... 0.0 0.0 \n", 2599 | "8 ... 0.0 0.0 \n", 2600 | "9 ... 0.0 0.0 \n", 2601 | "10 ... 0.0 0.0 \n", 2602 | "11 ... 0.0 0.0 \n", 2603 | "12 ... 0.0 0.0 \n", 2604 | "13 ... 0.0 0.0 \n", 2605 | "14 ... 0.0 0.0 \n", 2606 | "15 ... 0.0 0.0 \n", 2607 | "16 ... 0.0 0.0 \n", 2608 | "17 ... 0.0 0.0 \n", 2609 | "18 ... 0.0 0.0 \n", 2610 | "19 ... 0.0 0.0 \n", 2611 | "20 ... 0.0 0.0 \n", 2612 | "21 ... 0.0 0.0 \n", 2613 | "22 ... 0.0 0.0 \n", 2614 | "23 ... 0.0 0.0 \n", 2615 | "24 ... 0.0 0.0 \n", 2616 | "25 ... 0.0 0.0 \n", 2617 | "26 ... 0.0 0.0 \n", 2618 | "27 ... 0.0 0.0 \n", 2619 | "28 ... 0.0 0.0 \n", 2620 | "29 ... 0.0 0.0 \n", 2621 | ".. ... ... ... \n", 2622 | "70 ... 0.0 0.0 \n", 2623 | "71 ... 0.0 0.0 \n", 2624 | "72 ... 0.0 0.0 \n", 2625 | "73 ... 0.0 0.0 \n", 2626 | "74 ... 0.0 0.0 \n", 2627 | "75 ... 0.0 0.0 \n", 2628 | "76 ... 0.0 0.0 \n", 2629 | "77 ... 0.0 0.0 \n", 2630 | "78 ... 0.0 0.0 \n", 2631 | "79 ... 0.0 0.0 \n", 2632 | "80 ... 0.0 0.0 \n", 2633 | "81 ... 0.0 0.0 \n", 2634 | "82 ... 0.0 0.0 \n", 2635 | "83 ... 0.0 0.0 \n", 2636 | "84 ... 0.0 0.0 \n", 2637 | "85 ... 0.0 0.0 \n", 2638 | "86 ... 0.0 0.0 \n", 2639 | "87 ... 0.0 0.0 \n", 2640 | "88 ... 0.0 0.0 \n", 2641 | "89 ... 300.0 300.0 \n", 2642 | "90 ... 0.0 0.0 \n", 2643 | "91 ... 0.0 0.0 \n", 2644 | "92 ... 0.0 0.0 \n", 2645 | "93 ... 0.0 0.0 \n", 2646 | "94 ... 0.0 0.0 \n", 2647 | "95 ... 0.0 0.0 \n", 2648 | "96 ... 0.0 0.0 \n", 2649 | "97 ... 0.0 0.0 \n", 2650 | "98 ... 0.0 0.0 \n", 2651 | "99 ... 0.0 0.0 \n", 2652 | "\n", 2653 | " payment_coupon_amount_var_x payment_coupon_amount_var_y \\\n", 2654 | "0 0.0 0.0 \n", 2655 | "1 0.0 0.0 \n", 2656 | "2 0.0 0.0 \n", 2657 | "3 0.0 0.0 \n", 2658 | "4 0.0 0.0 \n", 2659 | "5 0.0 0.0 \n", 2660 | "6 0.0 0.0 \n", 2661 | "7 0.0 0.0 \n", 2662 | "8 0.0 0.0 \n", 2663 | "9 0.0 0.0 \n", 2664 | "10 0.0 0.0 \n", 2665 | "11 0.0 0.0 \n", 2666 | "12 0.0 0.0 \n", 2667 | "13 0.0 0.0 \n", 2668 | "14 0.0 0.0 \n", 2669 | "15 0.0 0.0 \n", 2670 | "16 0.0 0.0 \n", 2671 | "17 0.0 0.0 \n", 2672 | "18 0.0 0.0 \n", 2673 | "19 0.0 0.0 \n", 2674 | "20 0.0 0.0 \n", 2675 | "21 0.0 0.0 \n", 2676 | "22 0.0 0.0 \n", 2677 | "23 0.0 0.0 \n", 2678 | "24 0.0 0.0 \n", 2679 | "25 0.0 0.0 \n", 2680 | "26 0.0 0.0 \n", 2681 | "27 0.0 0.0 \n", 2682 | "28 0.0 0.0 \n", 2683 | "29 0.0 0.0 \n", 2684 | ".. ... ... \n", 2685 | "70 0.0 0.0 \n", 2686 | "71 0.0 0.0 \n", 2687 | "72 0.0 0.0 \n", 2688 | "73 0.0 0.0 \n", 2689 | "74 0.0 0.0 \n", 2690 | "75 0.0 0.0 \n", 2691 | "76 0.0 0.0 \n", 2692 | "77 0.0 0.0 \n", 2693 | "78 0.0 0.0 \n", 2694 | "79 0.0 0.0 \n", 2695 | "80 0.0 0.0 \n", 2696 | "81 0.0 0.0 \n", 2697 | "82 0.0 0.0 \n", 2698 | "83 0.0 0.0 \n", 2699 | "84 0.0 0.0 \n", 2700 | "85 0.0 0.0 \n", 2701 | "86 0.0 0.0 \n", 2702 | "87 0.0 0.0 \n", 2703 | "88 0.0 0.0 \n", 2704 | "89 0.0 0.0 \n", 2705 | "90 0.0 0.0 \n", 2706 | "91 0.0 0.0 \n", 2707 | "92 0.0 0.0 \n", 2708 | "93 0.0 0.0 \n", 2709 | "94 0.0 0.0 \n", 2710 | "95 0.0 0.0 \n", 2711 | "96 0.0 0.0 \n", 2712 | "97 0.0 0.0 \n", 2713 | "98 0.0 0.0 \n", 2714 | "99 0.0 0.0 \n", 2715 | "\n", 2716 | " payment_coupon_amount_var channel_code_dstc oil_code_dstc scene_dstc \\\n", 2717 | "0 0.0 1 1 1 \n", 2718 | "1 0.0 1 1 1 \n", 2719 | "2 0.0 1 1 1 \n", 2720 | "3 0.0 1 1 1 \n", 2721 | "4 0.0 1 1 1 \n", 2722 | "5 0.0 1 1 1 \n", 2723 | "6 0.0 1 1 1 \n", 2724 | "7 0.0 1 1 1 \n", 2725 | "8 0.0 1 1 1 \n", 2726 | "9 0.0 1 1 1 \n", 2727 | "10 0.0 1 1 1 \n", 2728 | "11 0.0 1 1 1 \n", 2729 | "12 0.0 1 1 1 \n", 2730 | "13 0.0 1 1 1 \n", 2731 | "14 0.0 1 1 1 \n", 2732 | "15 0.0 1 1 1 \n", 2733 | "16 0.0 1 1 1 \n", 2734 | "17 0.0 1 1 1 \n", 2735 | "18 0.0 1 1 1 \n", 2736 | "19 0.0 1 1 1 \n", 2737 | "20 0.0 1 1 1 \n", 2738 | "21 0.0 1 1 1 \n", 2739 | "22 0.0 1 1 1 \n", 2740 | "23 0.0 1 1 1 \n", 2741 | "24 0.0 1 1 1 \n", 2742 | "25 0.0 1 1 1 \n", 2743 | "26 0.0 1 1 1 \n", 2744 | "27 0.0 1 1 1 \n", 2745 | "28 0.0 1 1 1 \n", 2746 | "29 0.0 1 1 1 \n", 2747 | ".. ... ... ... ... \n", 2748 | "70 0.0 1 1 1 \n", 2749 | "71 0.0 1 1 1 \n", 2750 | "72 0.0 1 1 1 \n", 2751 | "73 0.0 1 1 1 \n", 2752 | "74 0.0 1 1 1 \n", 2753 | "75 0.0 1 1 1 \n", 2754 | "76 0.0 1 1 1 \n", 2755 | "77 0.0 1 1 1 \n", 2756 | "78 0.0 1 1 1 \n", 2757 | "79 0.0 1 1 1 \n", 2758 | "80 0.0 1 1 1 \n", 2759 | "81 0.0 1 1 1 \n", 2760 | "82 0.0 1 1 1 \n", 2761 | "83 0.0 1 1 1 \n", 2762 | "84 0.0 1 1 1 \n", 2763 | "85 0.0 1 1 1 \n", 2764 | "86 0.0 1 1 1 \n", 2765 | "87 0.0 1 1 1 \n", 2766 | "88 0.0 1 1 1 \n", 2767 | "89 300.0 1 1 1 \n", 2768 | "90 0.0 1 1 1 \n", 2769 | "91 0.0 1 1 1 \n", 2770 | "92 0.0 1 1 1 \n", 2771 | "93 0.0 1 1 1 \n", 2772 | "94 0.0 1 1 1 \n", 2773 | "95 0.0 1 1 1 \n", 2774 | "96 0.0 1 1 1 \n", 2775 | "97 0.0 1 1 1 \n", 2776 | "98 0.0 1 1 1 \n", 2777 | "99 0.0 1 1 1 \n", 2778 | "\n", 2779 | " source_app_dstc call_source_dstc \n", 2780 | "0 1 1 \n", 2781 | "1 1 1 \n", 2782 | "2 1 1 \n", 2783 | "3 1 1 \n", 2784 | "4 1 1 \n", 2785 | "5 1 1 \n", 2786 | "6 1 1 \n", 2787 | "7 1 1 \n", 2788 | "8 1 1 \n", 2789 | "9 1 1 \n", 2790 | "10 1 1 \n", 2791 | "11 1 1 \n", 2792 | "12 1 1 \n", 2793 | "13 1 1 \n", 2794 | "14 1 1 \n", 2795 | "15 1 1 \n", 2796 | "16 1 1 \n", 2797 | "17 1 1 \n", 2798 | "18 1 1 \n", 2799 | "19 1 1 \n", 2800 | "20 1 1 \n", 2801 | "21 1 1 \n", 2802 | "22 1 1 \n", 2803 | "23 1 1 \n", 2804 | "24 1 1 \n", 2805 | "25 1 1 \n", 2806 | "26 1 1 \n", 2807 | "27 1 1 \n", 2808 | "28 1 1 \n", 2809 | "29 1 1 \n", 2810 | ".. ... ... \n", 2811 | "70 1 1 \n", 2812 | "71 1 1 \n", 2813 | "72 1 1 \n", 2814 | "73 1 1 \n", 2815 | "74 1 1 \n", 2816 | "75 1 1 \n", 2817 | "76 1 1 \n", 2818 | "77 1 1 \n", 2819 | "78 1 1 \n", 2820 | "79 1 1 \n", 2821 | "80 1 1 \n", 2822 | "81 1 1 \n", 2823 | "82 1 1 \n", 2824 | "83 1 1 \n", 2825 | "84 1 1 \n", 2826 | "85 1 1 \n", 2827 | "86 1 1 \n", 2828 | "87 1 1 \n", 2829 | "88 1 1 \n", 2830 | "89 1 1 \n", 2831 | "90 1 1 \n", 2832 | "91 1 1 \n", 2833 | "92 1 1 \n", 2834 | "93 1 1 \n", 2835 | "94 1 1 \n", 2836 | "95 1 1 \n", 2837 | "96 1 1 \n", 2838 | "97 1 1 \n", 2839 | "98 1 1 \n", 2840 | "99 1 1 \n", 2841 | "\n", 2842 | "[100 rows x 74 columns]" 2843 | ] 2844 | }, 2845 | "execution_count": 36, 2846 | "metadata": {}, 2847 | "output_type": "execute_result" 2848 | } 2849 | ], 2850 | "source": [ 2851 | "fn.head(100)" 2852 | ] 2853 | }, 2854 | { 2855 | "cell_type": "markdown", 2856 | "metadata": {}, 2857 | "source": [ 2858 | "训练决策树模型" 2859 | ] 2860 | }, 2861 | { 2862 | "cell_type": "code", 2863 | "execution_count": 37, 2864 | "metadata": {}, 2865 | "outputs": [], 2866 | "source": [ 2867 | "x = fn.drop(['uid','oil_actv_dt','create_dt','bad_ind','class_new'],axis = 1)\n", 2868 | "y = fn.bad_ind.copy()\n", 2869 | "from sklearn import tree\n", 2870 | "\n", 2871 | "dtree = tree.DecisionTreeRegressor(max_depth = 2,min_samples_leaf = 500,min_samples_split = 5000)\n", 2872 | "dtree = dtree.fit(x,y)" 2873 | ] 2874 | }, 2875 | { 2876 | "cell_type": "markdown", 2877 | "metadata": {}, 2878 | "source": [ 2879 | "输出决策树图像,并作出决策" 2880 | ] 2881 | }, 2882 | { 2883 | "cell_type": "code", 2884 | "execution_count": 49, 2885 | "metadata": {}, 2886 | "outputs": [ 2887 | { 2888 | "data": { 2889 | "image/png": "\n", 2890 | "text/plain": [ 2891 | "" 2892 | ] 2893 | }, 2894 | "execution_count": 49, 2895 | "metadata": {}, 2896 | "output_type": "execute_result" 2897 | } 2898 | ], 2899 | "source": [ 2900 | "import pydotplus \n", 2901 | "from IPython.display import Image\n", 2902 | "from sklearn.externals.six import StringIO\n", 2903 | "import os\n", 2904 | "os.environ[\"PATH\"] += os.pathsep + 'C:/Program Files (x86)/Graphviz2.38/bin/'\n", 2905 | "with open(path + \"dt.dot\", \"w\") as f:\n", 2906 | " tree.export_graphviz(dtree, out_file=f)\n", 2907 | "dot_data = StringIO()\n", 2908 | "tree.export_graphviz(dtree, out_file=dot_data,\n", 2909 | " feature_names=x.columns,\n", 2910 | " class_names=['bad_ind'],\n", 2911 | " filled=True, rounded=True,\n", 2912 | " special_characters=True)\n", 2913 | "graph = pydotplus.graph_from_dot_data(dot_data.getvalue()) \n", 2914 | "Image(graph.create_png())" 2915 | ] 2916 | }, 2917 | { 2918 | "cell_type": "markdown", 2919 | "metadata": {}, 2920 | "source": [ 2921 | "value = badrate" 2922 | ] 2923 | }, 2924 | { 2925 | "cell_type": "code", 2926 | "execution_count": 53, 2927 | "metadata": {}, 2928 | "outputs": [ 2929 | { 2930 | "data": { 2931 | "text/plain": [ 2932 | "0.04658077304261645" 2933 | ] 2934 | }, 2935 | "execution_count": 53, 2936 | "metadata": {}, 2937 | "output_type": "execute_result" 2938 | } 2939 | ], 2940 | "source": [ 2941 | "sum(fn.bad_ind)/len(fn.bad_ind)" 2942 | ] 2943 | } 2944 | ], 2945 | "metadata": { 2946 | "kernelspec": { 2947 | "display_name": "Python 3", 2948 | "language": "python", 2949 | "name": "python3" 2950 | }, 2951 | "language_info": { 2952 | "codemirror_mode": { 2953 | "name": "ipython", 2954 | "version": 3 2955 | }, 2956 | "file_extension": ".py", 2957 | "mimetype": "text/x-python", 2958 | "name": "python", 2959 | "nbconvert_exporter": "python", 2960 | "pygments_lexer": "ipython3", 2961 | "version": "3.7.3" 2962 | } 2963 | }, 2964 | "nbformat": 4, 2965 | "nbformat_minor": 2 2966 | } 2967 | -------------------------------------------------------------------------------- /常用反欺诈特征.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "### 金融反欺诈 常用特征处理方法 " 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "#### 用户基本属性\n", 15 | "\n", 16 | "phone_nember \n", 17 | "\n", 18 | "- 手机号前缀是否相同\n", 19 | "- 手机号归属地是否相同\n", 20 | "- 是否是虚拟运营商\n", 21 | "- 流量卡还是通话卡\n", 22 | "\n", 23 | "nickname\n", 24 | "\n", 25 | "- 昵称符合固定的规律(中文+数字)\n", 26 | "- 备注是否符合某种亲密的称呼\n", 27 | "\n", 28 | "birthday \n", 29 | "\n", 30 | "- 年纪\n", 31 | "- 星座\n", 32 | "- 生肖\n", 33 | "\n", 34 | "sex \n", 35 | "\n", 36 | "- 性别是否失衡\n", 37 | "\n", 38 | "password \n", 39 | "\n", 40 | "- 是否都相同\n", 41 | "\n", 42 | "\n", 43 | "身份证号码\n", 44 | "\n", 45 | "- 年龄 核对\n", 46 | "- 性比 核对\n", 47 | "- 城市\n", 48 | "\n", 49 | "邮箱\n", 50 | "- 是否是一次性邮箱\n", 51 | "- username 满足规律\n", 52 | "- 是否同一邮箱服务商\n", 53 | "- 邮箱里面的数据(账单)\n", 54 | "\n", 55 | "\n", 56 | "学历\n", 57 | "- 相似性\n", 58 | "\n", 59 | "\n", 60 | "住房\n", 61 | "- 租房情况是否雷同\n", 62 | "\n", 63 | "\n", 64 | "积分 \n", 65 | "- 是不是超过某个阈值\n", 66 | "\n", 67 | "\n", 68 | "签到 \n", 69 | "- 相似性\n", 70 | "\n", 71 | "\n", 72 | "ip \n", 73 | "\n", 74 | "- 是否是同一个号段\n", 75 | "- 每次登录ip地址是否相同\n", 76 | "- 是不是临时ip 和 gps\n", 77 | "- ip 和 gps 是否能对的上\n", 78 | "\n", 79 | "gps\n", 80 | "\n", 81 | "- 经纬度相似性分析\n", 82 | "- 国家 省份 城市 相似性\n", 83 | "- ip 和 gps 是否能对的上\n", 84 | "\n", 85 | "\n", 86 | "wifi\n", 87 | "\n", 88 | "- ssid\n", 89 | "- wifi list\n", 90 | "- 贷款前的几分钟有没有切换过wifi\n", 91 | "\n", 92 | "\n", 93 | "application time\n", 94 | "\n", 95 | "- 时间切片\n", 96 | "- 注册用了多长时间(太快太慢都有问题)\n", 97 | "- 一共申请了几次\n", 98 | "\n", 99 | "login time \n", 100 | "\n", 101 | "- 时间切片\n", 102 | "- 登陆了几次、频率\n", 103 | "- 最后一次登录时间距贷款时间的间隔\n", 104 | "- 同一时间登录做一个校验(同一时间多人登录)\n", 105 | "\n", 106 | "\n", 107 | "ua(user agent)\n", 108 | "\n", 109 | "- 每次打开是否是同一个ua\n", 110 | "\n", 111 | "\n", 112 | "渠道\n", 113 | "\n", 114 | "- app/H5/微信\n", 115 | "- 渠道ID属于违规渠道\n", 116 | "\n", 117 | "app version\n", 118 | "\n", 119 | "- 每次app的版本号是否相同\n", 120 | "- app版本会不会太老了(老版本的app有bug,可能会被黑中介用来攻击我们) \n", 121 | "\n", 122 | "推荐人/联系人 \n", 123 | "\n", 124 | "- 名字匹配\n", 125 | "- 手机号匹配\n" 126 | ] 127 | }, 128 | { 129 | "cell_type": "markdown", 130 | "metadata": {}, 131 | "source": [ 132 | "#### 设备指纹 \n", 133 | "\n", 134 | "imei \n", 135 | "\n", 136 | "- 受否都相同\n", 137 | "- 每次登录imei号是否都相同\n", 138 | " \n", 139 | " \n", 140 | "device id\n", 141 | "\n", 142 | "- 受否都相同\n", 143 | "- 每次登录device id号是否都相同\n", 144 | "\n", 145 | "\n", 146 | "分辨率 \n", 147 | "\n", 148 | "- 手机型号和屏幕分辨率是否一致\n", 149 | "\n", 150 | "mobile type\n", 151 | "\n", 152 | "- 手机品牌\n", 153 | "- 手机型号\n", 154 | "\n", 155 | "os(operating system)\n", 156 | "\n", 157 | "- 每次打开操作系统是否都相同\n", 158 | "- 来申请的人是否os都相同\n", 159 | "- os的版本是否太旧" 160 | ] 161 | }, 162 | { 163 | "cell_type": "markdown", 164 | "metadata": {}, 165 | "source": [ 166 | "#### 中文错别字可以考虑转换成拼音做相似度匹配 \n", 167 | "\n", 168 | "address \n", 169 | "\n", 170 | "- 地址要标准化\n", 171 | "- 模糊匹配\n", 172 | "- 相似度计算(cos距离,词向量)\n", 173 | "\n", 174 | "company \n", 175 | "\n", 176 | "- 正则\n", 177 | "- 字节拆分\n", 178 | "- 关键字提取\n", 179 | "- 相似度计算\n", 180 | "- 错别字/同音字识别\n", 181 | "\n" 182 | ] 183 | }, 184 | { 185 | "cell_type": "markdown", 186 | "metadata": {}, 187 | "source": [ 188 | "#### 第三方数据 \n", 189 | "\n", 190 | "人行征信 \n", 191 | "\n", 192 | "- 公司信息是否一致\n", 193 | "- 学历是否一致\n", 194 | "- 居住地址是否一致\n", 195 | "- 手机号码是否一致\n", 196 | "- 逾期数据\n", 197 | "\n", 198 | "运营商 \n", 199 | "\n", 200 | "- 是否有相同的联系人\n", 201 | "- 是否有黑名单客户在通讯录中\n", 202 | "- 通话最频繁的几个人(所在地是否和他相同)\n", 203 | "\n", 204 | "社保公积金 \n", 205 | "\n", 206 | "- 工资\n", 207 | "- 社保\n", 208 | "- 公积金\n" 209 | ] 210 | } 211 | ], 212 | "metadata": { 213 | "kernelspec": { 214 | "display_name": "Python 3", 215 | "language": "python", 216 | "name": "python3" 217 | }, 218 | "language_info": { 219 | "codemirror_mode": { 220 | "name": "ipython", 221 | "version": 3 222 | }, 223 | "file_extension": ".py", 224 | "mimetype": "text/x-python", 225 | "name": "python", 226 | "nbconvert_exporter": "python", 227 | "pygments_lexer": "ipython3", 228 | "version": "3.7.1" 229 | } 230 | }, 231 | "nbformat": 4, 232 | "nbformat_minor": 2 233 | } 234 | --------------------------------------------------------------------------------