├── README.md
├── python操作
    ├── python操作.ipynb
    └── read_split_csv.py
├── 数据分析实践
    ├── task1_数据探索分析.ipynb
    ├── task2_特征衍生和选择.ipynb
    ├── task3_建模和评分.ipynb
    ├── task4_模型评估.ipynb
    ├── task4_模型评估（整理版）.ipynb
    ├── task5_模型调优.ipynb
    ├── task6_模型融合.ipynb
    └── 多分类的评估问题.ipynb
└── 爬虫与网页分析
    ├── data
        ├── 1396354
        ├── 1404079
        └── actor_info1
    └── 网页解析.py


/README.md:
--------------------------------------------------------------------------------
 1 | # python_data_analyse
 2 | python常用数据分析代码和技巧
 3 | 
 4 | 1.python操作:
 5 | 目的：积累python操作
 6 | 内容：
 7 | 
 8 | 2.爬虫与网页分析:
 9 | 目的：爬虫与网页分析
10 | 内容：（1）网页解析.py:把爬取的人物豆瓣网页和数据库的人物百度百科信息合并起来。
11 |                       用BeautifulSoup做豆瓣网页的解析和信息提取。
12 |                       数据在data/下面
13 | 
14 | 3.数据分析实践：
15 | 目的：数据分析组队学习内容和数据竞赛代码积累
16 | 
17 | 


--------------------------------------------------------------------------------
/python操作/python操作.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "### bz2压缩文件的操作"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "code",
 12 |    "execution_count": null,
 13 |    "metadata": {
 14 |     "collapsed": true
 15 |    },
 16 |    "outputs": [],
 17 |    "source": [
 18 |     "# 对于bz2压缩文件的操作\n",
 19 |     "\n",
 20 |     "import bz2\n",
 21 |     "with bz2.BZ2File(txtpath,'r') as f:\n",
 22 |     "        for line in f:\n",
 23 |     "            line = str(line, encoding = \"utf8\") #必须要加，否则\n",
 24 |     "            \n"
 25 |    ]
 26 |   },
 27 |   {
 28 |    "cell_type": "markdown",
 29 |    "metadata": {},
 30 |    "source": [
 31 |     "## 对于json的操作\n",
 32 |     "\n",
 33 |     "一、json是一种编码格式，并不是dict\n",
 34 |     "1. json.dump() 和 json.load() 来编码和解码JSON数据,用于处理文件   \n",
 35 |     "2. json.dumps将一个Python数据结构转换为JSON，json.loads将一个JSON编码的字符串转换回一个Python数据结构：\n",
 36 |     "3. ensure_ascii=False  用在dump的时候，中文不乱码\n",
 37 |     "\n",
 38 |     "\n"
 39 |    ]
 40 |   },
 41 |   {
 42 |    "cell_type": "code",
 43 |    "execution_count": null,
 44 |    "metadata": {
 45 |     "collapsed": true
 46 |    },
 47 |    "outputs": [],
 48 |    "source": [
 49 |     "import json\n",
 50 |     "\n",
 51 |     "# 1. 将整个数据结构写入json文件或者从json文件读出：\n",
 52 |     "\n",
 53 |     "with open('test.json','w',encoding = 'utf-8') as fw:\n",
 54 |     "    json.dump(data_dict, fw, ensure_ascii=False)\n",
 55 |     "with open('test.json','r',encoding = 'utf-8') as fr:\n",
 56 |     "    date = json.load(fr)\n",
 57 |     "    \n",
 58 |     "# 2 . 将数据结构一行行json编码写入文件，或者从文件读出来\n",
 59 |     " #一行行写入\n",
 60 |     "tmp_dict = {'question':'fasdfa','head':'fadf'}\n",
 61 |     "result_f.write(json.dumps(tmp_dict,ensure_ascii=False) + '\\n')\n",
 62 |     " # 一行行读出来\n",
 63 |     "    with open('ner_result.txt','r',encoding='utf-8') as fr:\n",
 64 |     "        for line in fr:\n",
 65 |     "            line = line.strip()\n",
 66 |     "            tmp_dict = json.loads(line)\n",
 67 |     "\n",
 68 |     "\n",
 69 |     "\n"
 70 |    ]
 71 |   },
 72 |   {
 73 |    "cell_type": "markdown",
 74 |    "metadata": {},
 75 |    "source": [
 76 |     "### 文件操作"
 77 |    ]
 78 |   },
 79 |   {
 80 |    "cell_type": "code",
 81 |    "execution_count": null,
 82 |    "metadata": {
 83 |     "collapsed": true
 84 |    },
 85 |    "outputs": [],
 86 |    "source": [
 87 |     "### 1 . 同时对两个文件操作\n",
 88 |     "with open('a1.txt','r',encoding='utf-8') as fr, open('a2.txt','a',encoding='utf-8') as fw:\n",
 89 |     "    for line in fr: # 读文件\n",
 90 |     "        line = line.strip() # 惯用处理，可以去掉头尾空格和换行符\n",
 91 |     "         # strip() 方法用于移除字符串头尾指定的字符（默认为空格或换行符,包括'\\n', '\\r',  '\\t',  ' ')\n",
 92 |     "        part = line.split('\\t') #  split \n",
 93 |     "        if len(part) == 2:\n",
 94 |     "            key = part[0]\n",
 95 |     "            value = part[1]\n",
 96 |     "            \n",
 97 |     "    \n",
 98 |     "# 2 . 把一个数组遍历写入文件 writelines: 可以写迭代结构， write只能写入str\n",
 99 |     "\"\"\"读+写\"\"\"\n",
100 |     "#读文件，做处理，再写入文件\n",
101 |     "li = []\n",
102 |     "with open(\"./data/test\",encoding='utf-8') as fr:\n",
103 |     "    for line in fr:\n",
104 |     "        \n",
105 |     "        line =line.strip()\n",
106 |     "        li.append('1'+'\\t'+ line)#做处理 ，放到列表中\n",
107 |     "        \n",
108 |     "with open('data/test_xkj1','a',encoding='utf-8') as fw:\n",
109 |     "    fw.writelines(e + '\\n' for e in li) "
110 |    ]
111 |   },
112 |   {
113 |    "cell_type": "markdown",
114 |    "metadata": {},
115 |    "source": [
116 |     "### 字符串操作"
117 |    ]
118 |   },
119 |   {
120 |    "cell_type": "code",
121 |    "execution_count": 20,
122 |    "metadata": {},
123 |    "outputs": [
124 |     {
125 |      "ename": "TypeError",
126 |      "evalue": "write() argument must be str, not generator",
127 |      "output_type": "error",
128 |      "traceback": [
129 |       "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
130 |       "\u001b[1;31mTypeError\u001b[0m                                 Traceback (most recent call last)",
131 |       "\u001b[1;32m<ipython-input-20-1b5767b706dc>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m()\u001b[0m\n\u001b[0;32m      2\u001b[0m \u001b[0mli\u001b[0m \u001b[1;33m=\u001b[0m \u001b[1;33m[\u001b[0m\u001b[1;34m'a'\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;34m'b'\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;34m'c'\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m      3\u001b[0m \u001b[1;32mwith\u001b[0m \u001b[0mopen\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m'data/test_xkj2'\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;34m'a'\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0mencoding\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;34m'utf-8'\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;32mas\u001b[0m \u001b[0mfw\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 4\u001b[1;33m     \u001b[0mfw\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mwrite\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0me\u001b[0m \u001b[1;33m+\u001b[0m \u001b[1;34m'\\n'\u001b[0m \u001b[1;32mfor\u001b[0m \u001b[0me\u001b[0m \u001b[1;32min\u001b[0m \u001b[0mli\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m",
132 |       "\u001b[1;31mTypeError\u001b[0m: write() argument must be str, not generator"
133 |      ]
134 |     }
135 |    ],
136 |    "source": [
137 |     "# \n"
138 |    ]
139 |   },
140 |   {
141 |    "cell_type": "markdown",
142 |    "metadata": {},
143 |    "source": [
144 |     "### 字典 dict操作"
145 |    ]
146 |   },
147 |   {
148 |    "cell_type": "code",
149 |    "execution_count": 22,
150 |    "metadata": {},
151 |    "outputs": [
152 |     {
153 |      "name": "stdout",
154 |      "output_type": "stream",
155 |      "text": [
156 |       "dict_keys(['b', 'c'])\n",
157 |       "['b', 'c']\n",
158 |       "dict_values([2, 3])\n",
159 |       "dict_items([('b', 2), ('c', 3)])\n"
160 |      ]
161 |     },
162 |     {
163 |      "ename": "NameError",
164 |      "evalue": "name 'cmp' is not defined",
165 |      "output_type": "error",
166 |      "traceback": [
167 |       "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
168 |       "\u001b[1;31mNameError\u001b[0m                                 Traceback (most recent call last)",
169 |       "\u001b[1;32m<ipython-input-22-1c3739c3a2b7>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m()\u001b[0m\n\u001b[0;32m     21\u001b[0m \u001b[0mprint\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mdata_dict\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mitems\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     22\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 23\u001b[1;33m \u001b[0mprint\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mcmp\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mdata_dict\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0mdata_dict2\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m",
170 |       "\u001b[1;31mNameError\u001b[0m: name 'cmp' is not defined"
171 |      ]
172 |     }
173 |    ],
174 |    "source": [
175 |     "data_dict = {}\n",
176 |     "data_dict2 = {\"a\":1,\"b\":2, \"c\":3}\n",
177 |     "# 增加\n",
178 |     "data_dict.update({\"a\":1,\"b\":2, \"c\":3})\n",
179 |     "\n",
180 |     "# 删除某个键值\n",
181 |     "del data_dict[\"a\"]\n",
182 |     "\n",
183 |     "# 判断键值在不在\n",
184 |     "if \"a\" in data_dict:\n",
185 |     "    print(\"a\")\n",
186 |     "\n",
187 |     "# 返回key值\n",
188 |     "print(data_dict.keys()) #不是list类型要强制转换，\n",
189 |     "print(list(data_dict.keys()))\n",
190 |     "\n",
191 |     "# 返回value值\n",
192 |     "print(data_dict.values())\n",
193 |     "\n",
194 |     "#  返回可遍历的元组形式\n",
195 |     "print(data_dict.items())\n",
196 |     "\n"
197 |    ]
198 |   },
199 |   {
200 |    "cell_type": "markdown",
201 |    "metadata": {},
202 |    "source": [
203 |     "### 条件判断"
204 |    ]
205 |   },
206 |   {
207 |    "cell_type": "code",
208 |    "execution_count": null,
209 |    "metadata": {
210 |     "collapsed": true
211 |    },
212 |    "outputs": [],
213 |    "source": [
214 |     "# if not array\n",
215 |     "\n",
216 |     "# 哪些东西相当于空"
217 |    ]
218 |   }
219 |  ],
220 |  "metadata": {
221 |   "kernelspec": {
222 |    "display_name": "Python 3",
223 |    "language": "python",
224 |    "name": "python3"
225 |   },
226 |   "language_info": {
227 |    "codemirror_mode": {
228 |     "name": "ipython",
229 |     "version": 3
230 |    },
231 |    "file_extension": ".py",
232 |    "mimetype": "text/x-python",
233 |    "name": "python",
234 |    "nbconvert_exporter": "python",
235 |    "pygments_lexer": "ipython3",
236 |    "version": "3.6.9"
237 |   }
238 |  },
239 |  "nbformat": 4,
240 |  "nbformat_minor": 2
241 | }
242 | 


--------------------------------------------------------------------------------
/python操作/read_split_csv.py:
--------------------------------------------------------------------------------
 1 | import csv
 2 | import json
 3 | import pickle
 4 | import numpy as np
 5 | import pandas as pd
 6 | from sklearn.cross_validation import train_test_split
 7 | 
 8 | """
 9 | python将大csv文件划分成小csv文件做训练集和测试集及对应label文件
10 | """
11 | 
12 | def read_data(test_data='input/train.csv', n=0, label=1):
13 |     '''
14 |     加载数据的功能
15 |     n:特征数据起始位
16 |     label：是否是监督样本数据
17 |     '''
18 |     csv_reader = csv.reader(open(test_data, encoding="utf8", errors="ignore"))
19 |     data_list = []
20 |     for one_line in csv_reader:
21 |         data_list.append(one_line)
22 |     x_list = []
23 |     y_list = []
24 |     for one_line in data_list[1:]:
25 |         if label == 1:#如果是监督样本数据
26 |             y_list.append(int(one_line[-1]))  # 标志位(最后一位都是标签位)
27 |             one_list = [o for o in one_line[n:-1]]
28 |             x_list.append(one_list)
29 |         else:
30 |             one_list = [o for o in one_line[n:]]
31 |             x_list.append(one_list)
32 |     return x_list, y_list
33 | 
34 | def split_data(data_list, y_list, ratio=0.30):#70%训练集，30%测试集: 914285,391837
35 |     '''
36 |     按照指定的比例，划分样本数据集
37 |     ratio: 测试数据的比率
38 |     '''
39 |     X_train, X_test, y_train, y_test = train_test_split(data_list, y_list, test_size=ratio, random_state=50)
40 | 
41 |     """训练集"""
42 |     with open('input/sub_train.csv', 'w', encoding="utf8",newline="", errors="ignore") as csvfile:#不加newline=""的话会空一行出来
43 |         fieldnames = ['qid', 'question_text','target']
44 |         write = csv.DictWriter(csvfile,fieldnames=fieldnames)
45 |         write.writeheader()#写表头
46 |         for i in range(len(X_train)):
47 |            write.writerow({'qid':X_train[i][0],'question_text':X_train[i][1],'target':y_train[i]})
48 | 
49 |     """测试集"""
50 |     #标签文件
51 |     with open('input/sub_test_y', 'w') as fp:
52 |         json.dump(y_test, fp)
53 |     #测试csv
54 |     with open('input/sub_test_x.csv', 'w', encoding="utf8",newline="", errors="ignore") as csvfile:#不加newline=""的话会空一行出来
55 |         fieldnames = ['qid', 'question_text']
56 |         write = csv.DictWriter(csvfile,fieldnames=fieldnames)
57 |         write.writeheader()#写表头
58 |         for i in range(len(X_test)):
59 |            write.writerow({'qid':X_test[i][0],'question_text':X_test[i][1]})
60 |     return X_train, X_test, y_train, y_test
61 | 
62 | if __name__ == '__main__':
63 |     """获取大文件的数据"""
64 |     x_list, y_list=read_data()
65 |     """划分为训练集和测试集及label文件"""
66 |     split_data(x_list,y_list)
67 | 


--------------------------------------------------------------------------------
/数据分析实践/task1_数据探索分析.ipynb:
--------------------------------------------------------------------------------
   1 | {
   2 |  "cells": [
   3 |   {
   4 |    "cell_type": "markdown",
   5 |    "metadata": {},
   6 |    "source": [
   7 |     "### 题目：  \n",
   8 |     "这份数据集是金融数据（非原始数据，已经处理过了），我们要做的是预测贷款用户是否会逾期。表格中 \"status\" 是结果标签：0表示未逾期，1表示逾期。\n",
   9 |     "### 要求：  \n",
  10 |     "数据切分方式 - 三七分，其中测试集30%，训练集70%，随机种子设置为2018\n",
  11 |     "### 任务1：  \n",
  12 |     "对数据进行探索和分析。  \n",
  13 |     "\n",
  14 |     "数据类型的分析  \n",
  15 |     "无关特征删除  \n",
  16 |     "数据类型转换  \n",
  17 |     "缺失值处理  \n",
  18 |     "……以及你能想到和借鉴的数据分析处理"
  19 |    ]
  20 |   },
  21 |   {
  22 |    "cell_type": "markdown",
  23 |    "metadata": {},
  24 |    "source": [
  25 |     "### 一、 观察数据"
  26 |    ]
  27 |   },
  28 |   {
  29 |    "cell_type": "code",
  30 |    "execution_count": 1,
  31 |    "metadata": {
  32 |     "collapsed": false
  33 |    },
  34 |    "outputs": [
  35 |     {
  36 |      "data": {
  37 |       "text/html": [
  38 |        "<div>\n",
  39 |        "<style scoped>\n",
  40 |        "    .dataframe tbody tr th:only-of-type {\n",
  41 |        "        vertical-align: middle;\n",
  42 |        "    }\n",
  43 |        "\n",
  44 |        "    .dataframe tbody tr th {\n",
  45 |        "        vertical-align: top;\n",
  46 |        "    }\n",
  47 |        "\n",
  48 |        "    .dataframe thead th {\n",
  49 |        "        text-align: right;\n",
  50 |        "    }\n",
  51 |        "</style>\n",
  52 |        "<table border=\"1\" class=\"dataframe\">\n",
  53 |        "  <thead>\n",
  54 |        "    <tr style=\"text-align: right;\">\n",
  55 |        "      <th></th>\n",
  56 |        "      <th>Unnamed: 0</th>\n",
  57 |        "      <th>custid</th>\n",
  58 |        "      <th>trade_no</th>\n",
  59 |        "      <th>bank_card_no</th>\n",
  60 |        "      <th>low_volume_percent</th>\n",
  61 |        "      <th>middle_volume_percent</th>\n",
  62 |        "      <th>take_amount_in_later_12_month_highest</th>\n",
  63 |        "      <th>trans_amount_increase_rate_lately</th>\n",
  64 |        "      <th>trans_activity_month</th>\n",
  65 |        "      <th>trans_activity_day</th>\n",
  66 |        "      <th>...</th>\n",
  67 |        "      <th>loans_max_limit</th>\n",
  68 |        "      <th>loans_avg_limit</th>\n",
  69 |        "      <th>consfin_credit_limit</th>\n",
  70 |        "      <th>consfin_credibility</th>\n",
  71 |        "      <th>consfin_org_count_current</th>\n",
  72 |        "      <th>consfin_product_count</th>\n",
  73 |        "      <th>consfin_max_limit</th>\n",
  74 |        "      <th>consfin_avg_limit</th>\n",
  75 |        "      <th>latest_query_day</th>\n",
  76 |        "      <th>loans_latest_day</th>\n",
  77 |        "    </tr>\n",
  78 |        "  </thead>\n",
  79 |        "  <tbody>\n",
  80 |        "    <tr>\n",
  81 |        "      <th>0</th>\n",
  82 |        "      <td>5</td>\n",
  83 |        "      <td>2791858</td>\n",
  84 |        "      <td>20180507115231274000000023057383</td>\n",
  85 |        "      <td>卡号1</td>\n",
  86 |        "      <td>0.01</td>\n",
  87 |        "      <td>0.99</td>\n",
  88 |        "      <td>0</td>\n",
  89 |        "      <td>0.90</td>\n",
  90 |        "      <td>0.55</td>\n",
  91 |        "      <td>0.313</td>\n",
  92 |        "      <td>...</td>\n",
  93 |        "      <td>2900.0</td>\n",
  94 |        "      <td>1688.0</td>\n",
  95 |        "      <td>1200.0</td>\n",
  96 |        "      <td>75.0</td>\n",
  97 |        "      <td>1.0</td>\n",
  98 |        "      <td>2.0</td>\n",
  99 |        "      <td>1200.0</td>\n",
 100 |        "      <td>1200.0</td>\n",
 101 |        "      <td>12.0</td>\n",
 102 |        "      <td>18.0</td>\n",
 103 |        "    </tr>\n",
 104 |        "    <tr>\n",
 105 |        "      <th>1</th>\n",
 106 |        "      <td>10</td>\n",
 107 |        "      <td>534047</td>\n",
 108 |        "      <td>20180507121002192000000023073000</td>\n",
 109 |        "      <td>卡号1</td>\n",
 110 |        "      <td>0.02</td>\n",
 111 |        "      <td>0.94</td>\n",
 112 |        "      <td>2000</td>\n",
 113 |        "      <td>1.28</td>\n",
 114 |        "      <td>1.00</td>\n",
 115 |        "      <td>0.458</td>\n",
 116 |        "      <td>...</td>\n",
 117 |        "      <td>3500.0</td>\n",
 118 |        "      <td>1758.0</td>\n",
 119 |        "      <td>15100.0</td>\n",
 120 |        "      <td>80.0</td>\n",
 121 |        "      <td>5.0</td>\n",
 122 |        "      <td>6.0</td>\n",
 123 |        "      <td>22800.0</td>\n",
 124 |        "      <td>9360.0</td>\n",
 125 |        "      <td>4.0</td>\n",
 126 |        "      <td>2.0</td>\n",
 127 |        "    </tr>\n",
 128 |        "    <tr>\n",
 129 |        "      <th>2</th>\n",
 130 |        "      <td>12</td>\n",
 131 |        "      <td>2849787</td>\n",
 132 |        "      <td>20180507125159718000000023114911</td>\n",
 133 |        "      <td>卡号1</td>\n",
 134 |        "      <td>0.04</td>\n",
 135 |        "      <td>0.96</td>\n",
 136 |        "      <td>0</td>\n",
 137 |        "      <td>1.00</td>\n",
 138 |        "      <td>1.00</td>\n",
 139 |        "      <td>0.114</td>\n",
 140 |        "      <td>...</td>\n",
 141 |        "      <td>1600.0</td>\n",
 142 |        "      <td>1250.0</td>\n",
 143 |        "      <td>4200.0</td>\n",
 144 |        "      <td>87.0</td>\n",
 145 |        "      <td>1.0</td>\n",
 146 |        "      <td>1.0</td>\n",
 147 |        "      <td>4200.0</td>\n",
 148 |        "      <td>4200.0</td>\n",
 149 |        "      <td>2.0</td>\n",
 150 |        "      <td>6.0</td>\n",
 151 |        "    </tr>\n",
 152 |        "    <tr>\n",
 153 |        "      <th>3</th>\n",
 154 |        "      <td>13</td>\n",
 155 |        "      <td>1809708</td>\n",
 156 |        "      <td>20180507121358683000000388283484</td>\n",
 157 |        "      <td>卡号1</td>\n",
 158 |        "      <td>0.00</td>\n",
 159 |        "      <td>0.96</td>\n",
 160 |        "      <td>2000</td>\n",
 161 |        "      <td>0.13</td>\n",
 162 |        "      <td>0.57</td>\n",
 163 |        "      <td>0.777</td>\n",
 164 |        "      <td>...</td>\n",
 165 |        "      <td>3200.0</td>\n",
 166 |        "      <td>1541.0</td>\n",
 167 |        "      <td>16300.0</td>\n",
 168 |        "      <td>80.0</td>\n",
 169 |        "      <td>5.0</td>\n",
 170 |        "      <td>5.0</td>\n",
 171 |        "      <td>30000.0</td>\n",
 172 |        "      <td>12180.0</td>\n",
 173 |        "      <td>2.0</td>\n",
 174 |        "      <td>4.0</td>\n",
 175 |        "    </tr>\n",
 176 |        "    <tr>\n",
 177 |        "      <th>4</th>\n",
 178 |        "      <td>14</td>\n",
 179 |        "      <td>2499829</td>\n",
 180 |        "      <td>20180507115448545000000388205844</td>\n",
 181 |        "      <td>卡号1</td>\n",
 182 |        "      <td>0.01</td>\n",
 183 |        "      <td>0.99</td>\n",
 184 |        "      <td>0</td>\n",
 185 |        "      <td>0.46</td>\n",
 186 |        "      <td>1.00</td>\n",
 187 |        "      <td>0.175</td>\n",
 188 |        "      <td>...</td>\n",
 189 |        "      <td>2300.0</td>\n",
 190 |        "      <td>1630.0</td>\n",
 191 |        "      <td>8300.0</td>\n",
 192 |        "      <td>79.0</td>\n",
 193 |        "      <td>2.0</td>\n",
 194 |        "      <td>2.0</td>\n",
 195 |        "      <td>8400.0</td>\n",
 196 |        "      <td>8250.0</td>\n",
 197 |        "      <td>22.0</td>\n",
 198 |        "      <td>120.0</td>\n",
 199 |        "    </tr>\n",
 200 |        "  </tbody>\n",
 201 |        "</table>\n",
 202 |        "<p>5 rows × 90 columns</p>\n",
 203 |        "</div>"
 204 |       ],
 205 |       "text/plain": [
 206 |        "   Unnamed: 0   custid                          trade_no bank_card_no  \\\n",
 207 |        "0           5  2791858  20180507115231274000000023057383          卡号1   \n",
 208 |        "1          10   534047  20180507121002192000000023073000          卡号1   \n",
 209 |        "2          12  2849787  20180507125159718000000023114911          卡号1   \n",
 210 |        "3          13  1809708  20180507121358683000000388283484          卡号1   \n",
 211 |        "4          14  2499829  20180507115448545000000388205844          卡号1   \n",
 212 |        "\n",
 213 |        "   low_volume_percent  middle_volume_percent  \\\n",
 214 |        "0                0.01                   0.99   \n",
 215 |        "1                0.02                   0.94   \n",
 216 |        "2                0.04                   0.96   \n",
 217 |        "3                0.00                   0.96   \n",
 218 |        "4                0.01                   0.99   \n",
 219 |        "\n",
 220 |        "   take_amount_in_later_12_month_highest  trans_amount_increase_rate_lately  \\\n",
 221 |        "0                                      0                               0.90   \n",
 222 |        "1                                   2000                               1.28   \n",
 223 |        "2                                      0                               1.00   \n",
 224 |        "3                                   2000                               0.13   \n",
 225 |        "4                                      0                               0.46   \n",
 226 |        "\n",
 227 |        "   trans_activity_month  trans_activity_day        ...         \\\n",
 228 |        "0                  0.55               0.313        ...          \n",
 229 |        "1                  1.00               0.458        ...          \n",
 230 |        "2                  1.00               0.114        ...          \n",
 231 |        "3                  0.57               0.777        ...          \n",
 232 |        "4                  1.00               0.175        ...          \n",
 233 |        "\n",
 234 |        "   loans_max_limit  loans_avg_limit  consfin_credit_limit  \\\n",
 235 |        "0           2900.0           1688.0                1200.0   \n",
 236 |        "1           3500.0           1758.0               15100.0   \n",
 237 |        "2           1600.0           1250.0                4200.0   \n",
 238 |        "3           3200.0           1541.0               16300.0   \n",
 239 |        "4           2300.0           1630.0                8300.0   \n",
 240 |        "\n",
 241 |        "   consfin_credibility  consfin_org_count_current  consfin_product_count  \\\n",
 242 |        "0                 75.0                        1.0                    2.0   \n",
 243 |        "1                 80.0                        5.0                    6.0   \n",
 244 |        "2                 87.0                        1.0                    1.0   \n",
 245 |        "3                 80.0                        5.0                    5.0   \n",
 246 |        "4                 79.0                        2.0                    2.0   \n",
 247 |        "\n",
 248 |        "   consfin_max_limit  consfin_avg_limit  latest_query_day  loans_latest_day  \n",
 249 |        "0             1200.0             1200.0              12.0              18.0  \n",
 250 |        "1            22800.0             9360.0               4.0               2.0  \n",
 251 |        "2             4200.0             4200.0               2.0               6.0  \n",
 252 |        "3            30000.0            12180.0               2.0               4.0  \n",
 253 |        "4             8400.0             8250.0              22.0             120.0  \n",
 254 |        "\n",
 255 |        "[5 rows x 90 columns]"
 256 |       ]
 257 |      },
 258 |      "execution_count": 1,
 259 |      "metadata": {},
 260 |      "output_type": "execute_result"
 261 |     }
 262 |    ],
 263 |    "source": [
 264 |     "import pandas as pd \n",
 265 |     "\n",
 266 |     "df = pd.read_csv('data.csv',encoding = 'gbk')\n",
 267 |     "df.head(5) "
 268 |    ]
 269 |   },
 270 |   {
 271 |    "cell_type": "code",
 272 |    "execution_count": 2,
 273 |    "metadata": {
 274 |     "collapsed": false,
 275 |     "scrolled": true
 276 |    },
 277 |    "outputs": [
 278 |     {
 279 |      "name": "stdout",
 280 |      "output_type": "stream",
 281 |      "text": [
 282 |       "<class 'pandas.core.frame.DataFrame'>\n",
 283 |       "RangeIndex: 4754 entries, 0 to 4753\n",
 284 |       "Data columns (total 90 columns):\n",
 285 |       "Unnamed: 0                                    4754 non-null int64\n",
 286 |       "custid                                        4754 non-null int64\n",
 287 |       "trade_no                                      4754 non-null object\n",
 288 |       "bank_card_no                                  4754 non-null object\n",
 289 |       "low_volume_percent                            4752 non-null float64\n",
 290 |       "middle_volume_percent                         4752 non-null float64\n",
 291 |       "take_amount_in_later_12_month_highest         4754 non-null int64\n",
 292 |       "trans_amount_increase_rate_lately             4751 non-null float64\n",
 293 |       "trans_activity_month                          4752 non-null float64\n",
 294 |       "trans_activity_day                            4752 non-null float64\n",
 295 |       "transd_mcc                                    4752 non-null float64\n",
 296 |       "trans_days_interval_filter                    4746 non-null float64\n",
 297 |       "trans_days_interval                           4752 non-null float64\n",
 298 |       "regional_mobility                             4752 non-null float64\n",
 299 |       "student_feature                               1756 non-null float64\n",
 300 |       "repayment_capability                          4754 non-null int64\n",
 301 |       "is_high_user                                  4754 non-null int64\n",
 302 |       "number_of_trans_from_2011                     4752 non-null float64\n",
 303 |       "first_transaction_time                        4752 non-null float64\n",
 304 |       "historical_trans_amount                       4754 non-null int64\n",
 305 |       "historical_trans_day                          4752 non-null float64\n",
 306 |       "rank_trad_1_month                             4752 non-null float64\n",
 307 |       "trans_amount_3_month                          4754 non-null int64\n",
 308 |       "avg_consume_less_12_valid_month               4752 non-null float64\n",
 309 |       "abs                                           4754 non-null int64\n",
 310 |       "top_trans_count_last_1_month                  4752 non-null float64\n",
 311 |       "avg_price_last_12_month                       4754 non-null int64\n",
 312 |       "avg_price_top_last_12_valid_month             4650 non-null float64\n",
 313 |       "reg_preference_for_trad                       4752 non-null object\n",
 314 |       "trans_top_time_last_1_month                   4746 non-null float64\n",
 315 |       "trans_top_time_last_6_month                   4746 non-null float64\n",
 316 |       "consume_top_time_last_1_month                 4746 non-null float64\n",
 317 |       "consume_top_time_last_6_month                 4746 non-null float64\n",
 318 |       "cross_consume_count_last_1_month              4328 non-null float64\n",
 319 |       "trans_fail_top_count_enum_last_1_month        4738 non-null float64\n",
 320 |       "trans_fail_top_count_enum_last_6_month        4738 non-null float64\n",
 321 |       "trans_fail_top_count_enum_last_12_month       4738 non-null float64\n",
 322 |       "consume_mini_time_last_1_month                4728 non-null float64\n",
 323 |       "max_cumulative_consume_later_1_month          4754 non-null int64\n",
 324 |       "max_consume_count_later_6_month               4746 non-null float64\n",
 325 |       "railway_consume_count_last_12_month           4742 non-null float64\n",
 326 |       "pawns_auctions_trusts_consume_last_1_month    4754 non-null int64\n",
 327 |       "pawns_auctions_trusts_consume_last_6_month    4754 non-null int64\n",
 328 |       "jewelry_consume_count_last_6_month            4742 non-null float64\n",
 329 |       "status                                        4754 non-null int64\n",
 330 |       "source                                        4754 non-null object\n",
 331 |       "first_transaction_day                         4752 non-null float64\n",
 332 |       "trans_day_last_12_month                       4752 non-null float64\n",
 333 |       "id_name                                       4478 non-null object\n",
 334 |       "apply_score                                   4450 non-null float64\n",
 335 |       "apply_credibility                             4450 non-null float64\n",
 336 |       "query_org_count                               4450 non-null float64\n",
 337 |       "query_finance_count                           4450 non-null float64\n",
 338 |       "query_cash_count                              4450 non-null float64\n",
 339 |       "query_sum_count                               4450 non-null float64\n",
 340 |       "latest_query_time                             4450 non-null object\n",
 341 |       "latest_one_month_apply                        4450 non-null float64\n",
 342 |       "latest_three_month_apply                      4450 non-null float64\n",
 343 |       "latest_six_month_apply                        4450 non-null float64\n",
 344 |       "loans_score                                   4457 non-null float64\n",
 345 |       "loans_credibility_behavior                    4457 non-null float64\n",
 346 |       "loans_count                                   4457 non-null float64\n",
 347 |       "loans_settle_count                            4457 non-null float64\n",
 348 |       "loans_overdue_count                           4457 non-null float64\n",
 349 |       "loans_org_count_behavior                      4457 non-null float64\n",
 350 |       "consfin_org_count_behavior                    4457 non-null float64\n",
 351 |       "loans_cash_count                              4457 non-null float64\n",
 352 |       "latest_one_month_loan                         4457 non-null float64\n",
 353 |       "latest_three_month_loan                       4457 non-null float64\n",
 354 |       "latest_six_month_loan                         4457 non-null float64\n",
 355 |       "history_suc_fee                               4457 non-null float64\n",
 356 |       "history_fail_fee                              4457 non-null float64\n",
 357 |       "latest_one_month_suc                          4457 non-null float64\n",
 358 |       "latest_one_month_fail                         4457 non-null float64\n",
 359 |       "loans_long_time                               4457 non-null float64\n",
 360 |       "loans_latest_time                             4457 non-null object\n",
 361 |       "loans_credit_limit                            4457 non-null float64\n",
 362 |       "loans_credibility_limit                       4457 non-null float64\n",
 363 |       "loans_org_count_current                       4457 non-null float64\n",
 364 |       "loans_product_count                           4457 non-null float64\n",
 365 |       "loans_max_limit                               4457 non-null float64\n",
 366 |       "loans_avg_limit                               4457 non-null float64\n",
 367 |       "consfin_credit_limit                          4457 non-null float64\n",
 368 |       "consfin_credibility                           4457 non-null float64\n",
 369 |       "consfin_org_count_current                     4457 non-null float64\n",
 370 |       "consfin_product_count                         4457 non-null float64\n",
 371 |       "consfin_max_limit                             4457 non-null float64\n",
 372 |       "consfin_avg_limit                             4457 non-null float64\n",
 373 |       "latest_query_day                              4450 non-null float64\n",
 374 |       "loans_latest_day                              4457 non-null float64\n",
 375 |       "dtypes: float64(70), int64(13), object(7)\n",
 376 |       "memory usage: 3.3+ MB\n"
 377 |      ]
 378 |     }
 379 |    ],
 380 |    "source": [
 381 |     "df.info() #返回df的所有信息\n",
 382 |     "#我们可以知道 有4754条数据，有90个特征（列名）和他们的类别（有些特征的数据不够4754，但是也没看到空值这是咋回事？）\n"
 383 |    ]
 384 |   },
 385 |   {
 386 |    "cell_type": "code",
 387 |    "execution_count": 3,
 388 |    "metadata": {
 389 |     "collapsed": false
 390 |    },
 391 |    "outputs": [
 392 |     {
 393 |      "data": {
 394 |       "text/html": [
 395 |        "<div>\n",
 396 |        "<style scoped>\n",
 397 |        "    .dataframe tbody tr th:only-of-type {\n",
 398 |        "        vertical-align: middle;\n",
 399 |        "    }\n",
 400 |        "\n",
 401 |        "    .dataframe tbody tr th {\n",
 402 |        "        vertical-align: top;\n",
 403 |        "    }\n",
 404 |        "\n",
 405 |        "    .dataframe thead th {\n",
 406 |        "        text-align: right;\n",
 407 |        "    }\n",
 408 |        "</style>\n",
 409 |        "<table border=\"1\" class=\"dataframe\">\n",
 410 |        "  <thead>\n",
 411 |        "    <tr style=\"text-align: right;\">\n",
 412 |        "      <th></th>\n",
 413 |        "      <th>Unnamed: 0</th>\n",
 414 |        "      <th>custid</th>\n",
 415 |        "      <th>low_volume_percent</th>\n",
 416 |        "      <th>middle_volume_percent</th>\n",
 417 |        "      <th>take_amount_in_later_12_month_highest</th>\n",
 418 |        "      <th>trans_amount_increase_rate_lately</th>\n",
 419 |        "      <th>trans_activity_month</th>\n",
 420 |        "      <th>trans_activity_day</th>\n",
 421 |        "      <th>transd_mcc</th>\n",
 422 |        "      <th>trans_days_interval_filter</th>\n",
 423 |        "      <th>...</th>\n",
 424 |        "      <th>loans_max_limit</th>\n",
 425 |        "      <th>loans_avg_limit</th>\n",
 426 |        "      <th>consfin_credit_limit</th>\n",
 427 |        "      <th>consfin_credibility</th>\n",
 428 |        "      <th>consfin_org_count_current</th>\n",
 429 |        "      <th>consfin_product_count</th>\n",
 430 |        "      <th>consfin_max_limit</th>\n",
 431 |        "      <th>consfin_avg_limit</th>\n",
 432 |        "      <th>latest_query_day</th>\n",
 433 |        "      <th>loans_latest_day</th>\n",
 434 |        "    </tr>\n",
 435 |        "  </thead>\n",
 436 |        "  <tbody>\n",
 437 |        "    <tr>\n",
 438 |        "      <th>count</th>\n",
 439 |        "      <td>4754.000000</td>\n",
 440 |        "      <td>4.754000e+03</td>\n",
 441 |        "      <td>4752.000000</td>\n",
 442 |        "      <td>4752.000000</td>\n",
 443 |        "      <td>4754.000000</td>\n",
 444 |        "      <td>4751.000000</td>\n",
 445 |        "      <td>4752.000000</td>\n",
 446 |        "      <td>4752.000000</td>\n",
 447 |        "      <td>4752.000000</td>\n",
 448 |        "      <td>4746.000000</td>\n",
 449 |        "      <td>...</td>\n",
 450 |        "      <td>4457.000000</td>\n",
 451 |        "      <td>4457.000000</td>\n",
 452 |        "      <td>4457.000000</td>\n",
 453 |        "      <td>4457.000000</td>\n",
 454 |        "      <td>4457.000000</td>\n",
 455 |        "      <td>4457.000000</td>\n",
 456 |        "      <td>4457.000000</td>\n",
 457 |        "      <td>4457.000000</td>\n",
 458 |        "      <td>4450.000000</td>\n",
 459 |        "      <td>4457.000000</td>\n",
 460 |        "    </tr>\n",
 461 |        "    <tr>\n",
 462 |        "      <th>mean</th>\n",
 463 |        "      <td>6008.414178</td>\n",
 464 |        "      <td>1.690993e+06</td>\n",
 465 |        "      <td>0.021806</td>\n",
 466 |        "      <td>0.901294</td>\n",
 467 |        "      <td>1940.197728</td>\n",
 468 |        "      <td>14.160674</td>\n",
 469 |        "      <td>0.804411</td>\n",
 470 |        "      <td>0.365425</td>\n",
 471 |        "      <td>17.502946</td>\n",
 472 |        "      <td>29.029920</td>\n",
 473 |        "      <td>...</td>\n",
 474 |        "      <td>3390.038142</td>\n",
 475 |        "      <td>1820.357864</td>\n",
 476 |        "      <td>9187.009199</td>\n",
 477 |        "      <td>76.042630</td>\n",
 478 |        "      <td>4.732331</td>\n",
 479 |        "      <td>5.227507</td>\n",
 480 |        "      <td>16153.690823</td>\n",
 481 |        "      <td>8007.696881</td>\n",
 482 |        "      <td>24.112809</td>\n",
 483 |        "      <td>55.181512</td>\n",
 484 |        "    </tr>\n",
 485 |        "    <tr>\n",
 486 |        "      <th>std</th>\n",
 487 |        "      <td>3452.071428</td>\n",
 488 |        "      <td>1.034235e+06</td>\n",
 489 |        "      <td>0.041527</td>\n",
 490 |        "      <td>0.144856</td>\n",
 491 |        "      <td>3923.971494</td>\n",
 492 |        "      <td>694.180473</td>\n",
 493 |        "      <td>0.196920</td>\n",
 494 |        "      <td>0.170196</td>\n",
 495 |        "      <td>4.475616</td>\n",
 496 |        "      <td>22.722432</td>\n",
 497 |        "      <td>...</td>\n",
 498 |        "      <td>1474.206546</td>\n",
 499 |        "      <td>583.418291</td>\n",
 500 |        "      <td>7371.257043</td>\n",
 501 |        "      <td>14.536819</td>\n",
 502 |        "      <td>2.974596</td>\n",
 503 |        "      <td>3.409292</td>\n",
 504 |        "      <td>14301.037628</td>\n",
 505 |        "      <td>5679.418585</td>\n",
 506 |        "      <td>37.725724</td>\n",
 507 |        "      <td>53.486408</td>\n",
 508 |        "    </tr>\n",
 509 |        "    <tr>\n",
 510 |        "      <th>min</th>\n",
 511 |        "      <td>5.000000</td>\n",
 512 |        "      <td>1.140000e+02</td>\n",
 513 |        "      <td>0.000000</td>\n",
 514 |        "      <td>0.000000</td>\n",
 515 |        "      <td>0.000000</td>\n",
 516 |        "      <td>0.000000</td>\n",
 517 |        "      <td>0.120000</td>\n",
 518 |        "      <td>0.033000</td>\n",
 519 |        "      <td>2.000000</td>\n",
 520 |        "      <td>0.000000</td>\n",
 521 |        "      <td>...</td>\n",
 522 |        "      <td>0.000000</td>\n",
 523 |        "      <td>0.000000</td>\n",
 524 |        "      <td>0.000000</td>\n",
 525 |        "      <td>0.000000</td>\n",
 526 |        "      <td>0.000000</td>\n",
 527 |        "      <td>0.000000</td>\n",
 528 |        "      <td>0.000000</td>\n",
 529 |        "      <td>0.000000</td>\n",
 530 |        "      <td>-2.000000</td>\n",
 531 |        "      <td>-2.000000</td>\n",
 532 |        "    </tr>\n",
 533 |        "    <tr>\n",
 534 |        "      <th>25%</th>\n",
 535 |        "      <td>3106.000000</td>\n",
 536 |        "      <td>7.593358e+05</td>\n",
 537 |        "      <td>0.010000</td>\n",
 538 |        "      <td>0.880000</td>\n",
 539 |        "      <td>0.000000</td>\n",
 540 |        "      <td>0.615000</td>\n",
 541 |        "      <td>0.670000</td>\n",
 542 |        "      <td>0.233000</td>\n",
 543 |        "      <td>15.000000</td>\n",
 544 |        "      <td>16.000000</td>\n",
 545 |        "      <td>...</td>\n",
 546 |        "      <td>2300.000000</td>\n",
 547 |        "      <td>1535.000000</td>\n",
 548 |        "      <td>4800.000000</td>\n",
 549 |        "      <td>77.000000</td>\n",
 550 |        "      <td>2.000000</td>\n",
 551 |        "      <td>3.000000</td>\n",
 552 |        "      <td>7800.000000</td>\n",
 553 |        "      <td>4737.000000</td>\n",
 554 |        "      <td>5.000000</td>\n",
 555 |        "      <td>10.000000</td>\n",
 556 |        "    </tr>\n",
 557 |        "    <tr>\n",
 558 |        "      <th>50%</th>\n",
 559 |        "      <td>6006.500000</td>\n",
 560 |        "      <td>1.634942e+06</td>\n",
 561 |        "      <td>0.010000</td>\n",
 562 |        "      <td>0.960000</td>\n",
 563 |        "      <td>500.000000</td>\n",
 564 |        "      <td>0.970000</td>\n",
 565 |        "      <td>0.860000</td>\n",
 566 |        "      <td>0.350000</td>\n",
 567 |        "      <td>17.000000</td>\n",
 568 |        "      <td>23.000000</td>\n",
 569 |        "      <td>...</td>\n",
 570 |        "      <td>3100.000000</td>\n",
 571 |        "      <td>1810.000000</td>\n",
 572 |        "      <td>7700.000000</td>\n",
 573 |        "      <td>79.000000</td>\n",
 574 |        "      <td>4.000000</td>\n",
 575 |        "      <td>5.000000</td>\n",
 576 |        "      <td>13800.000000</td>\n",
 577 |        "      <td>7050.000000</td>\n",
 578 |        "      <td>14.000000</td>\n",
 579 |        "      <td>36.000000</td>\n",
 580 |        "    </tr>\n",
 581 |        "    <tr>\n",
 582 |        "      <th>75%</th>\n",
 583 |        "      <td>8999.000000</td>\n",
 584 |        "      <td>2.597905e+06</td>\n",
 585 |        "      <td>0.020000</td>\n",
 586 |        "      <td>0.990000</td>\n",
 587 |        "      <td>2000.000000</td>\n",
 588 |        "      <td>1.600000</td>\n",
 589 |        "      <td>1.000000</td>\n",
 590 |        "      <td>0.480000</td>\n",
 591 |        "      <td>20.000000</td>\n",
 592 |        "      <td>32.000000</td>\n",
 593 |        "      <td>...</td>\n",
 594 |        "      <td>4300.000000</td>\n",
 595 |        "      <td>2100.000000</td>\n",
 596 |        "      <td>11700.000000</td>\n",
 597 |        "      <td>80.000000</td>\n",
 598 |        "      <td>7.000000</td>\n",
 599 |        "      <td>7.000000</td>\n",
 600 |        "      <td>20400.000000</td>\n",
 601 |        "      <td>10000.000000</td>\n",
 602 |        "      <td>24.000000</td>\n",
 603 |        "      <td>91.000000</td>\n",
 604 |        "    </tr>\n",
 605 |        "    <tr>\n",
 606 |        "      <th>max</th>\n",
 607 |        "      <td>11992.000000</td>\n",
 608 |        "      <td>4.004694e+06</td>\n",
 609 |        "      <td>1.000000</td>\n",
 610 |        "      <td>1.000000</td>\n",
 611 |        "      <td>68000.000000</td>\n",
 612 |        "      <td>47596.740000</td>\n",
 613 |        "      <td>1.000000</td>\n",
 614 |        "      <td>0.941000</td>\n",
 615 |        "      <td>42.000000</td>\n",
 616 |        "      <td>285.000000</td>\n",
 617 |        "      <td>...</td>\n",
 618 |        "      <td>10000.000000</td>\n",
 619 |        "      <td>6900.000000</td>\n",
 620 |        "      <td>87100.000000</td>\n",
 621 |        "      <td>87.000000</td>\n",
 622 |        "      <td>18.000000</td>\n",
 623 |        "      <td>20.000000</td>\n",
 624 |        "      <td>266400.000000</td>\n",
 625 |        "      <td>82800.000000</td>\n",
 626 |        "      <td>360.000000</td>\n",
 627 |        "      <td>323.000000</td>\n",
 628 |        "    </tr>\n",
 629 |        "  </tbody>\n",
 630 |        "</table>\n",
 631 |        "<p>8 rows × 83 columns</p>\n",
 632 |        "</div>"
 633 |       ],
 634 |       "text/plain": [
 635 |        "         Unnamed: 0        custid  low_volume_percent  middle_volume_percent  \\\n",
 636 |        "count   4754.000000  4.754000e+03         4752.000000            4752.000000   \n",
 637 |        "mean    6008.414178  1.690993e+06            0.021806               0.901294   \n",
 638 |        "std     3452.071428  1.034235e+06            0.041527               0.144856   \n",
 639 |        "min        5.000000  1.140000e+02            0.000000               0.000000   \n",
 640 |        "25%     3106.000000  7.593358e+05            0.010000               0.880000   \n",
 641 |        "50%     6006.500000  1.634942e+06            0.010000               0.960000   \n",
 642 |        "75%     8999.000000  2.597905e+06            0.020000               0.990000   \n",
 643 |        "max    11992.000000  4.004694e+06            1.000000               1.000000   \n",
 644 |        "\n",
 645 |        "       take_amount_in_later_12_month_highest  \\\n",
 646 |        "count                            4754.000000   \n",
 647 |        "mean                             1940.197728   \n",
 648 |        "std                              3923.971494   \n",
 649 |        "min                                 0.000000   \n",
 650 |        "25%                                 0.000000   \n",
 651 |        "50%                               500.000000   \n",
 652 |        "75%                              2000.000000   \n",
 653 |        "max                             68000.000000   \n",
 654 |        "\n",
 655 |        "       trans_amount_increase_rate_lately  trans_activity_month  \\\n",
 656 |        "count                        4751.000000           4752.000000   \n",
 657 |        "mean                           14.160674              0.804411   \n",
 658 |        "std                           694.180473              0.196920   \n",
 659 |        "min                             0.000000              0.120000   \n",
 660 |        "25%                             0.615000              0.670000   \n",
 661 |        "50%                             0.970000              0.860000   \n",
 662 |        "75%                             1.600000              1.000000   \n",
 663 |        "max                         47596.740000              1.000000   \n",
 664 |        "\n",
 665 |        "       trans_activity_day   transd_mcc  trans_days_interval_filter  \\\n",
 666 |        "count         4752.000000  4752.000000                 4746.000000   \n",
 667 |        "mean             0.365425    17.502946                   29.029920   \n",
 668 |        "std              0.170196     4.475616                   22.722432   \n",
 669 |        "min              0.033000     2.000000                    0.000000   \n",
 670 |        "25%              0.233000    15.000000                   16.000000   \n",
 671 |        "50%              0.350000    17.000000                   23.000000   \n",
 672 |        "75%              0.480000    20.000000                   32.000000   \n",
 673 |        "max              0.941000    42.000000                  285.000000   \n",
 674 |        "\n",
 675 |        "             ...         loans_max_limit  loans_avg_limit  \\\n",
 676 |        "count        ...             4457.000000      4457.000000   \n",
 677 |        "mean         ...             3390.038142      1820.357864   \n",
 678 |        "std          ...             1474.206546       583.418291   \n",
 679 |        "min          ...                0.000000         0.000000   \n",
 680 |        "25%          ...             2300.000000      1535.000000   \n",
 681 |        "50%          ...             3100.000000      1810.000000   \n",
 682 |        "75%          ...             4300.000000      2100.000000   \n",
 683 |        "max          ...            10000.000000      6900.000000   \n",
 684 |        "\n",
 685 |        "       consfin_credit_limit  consfin_credibility  consfin_org_count_current  \\\n",
 686 |        "count           4457.000000          4457.000000                4457.000000   \n",
 687 |        "mean            9187.009199            76.042630                   4.732331   \n",
 688 |        "std             7371.257043            14.536819                   2.974596   \n",
 689 |        "min                0.000000             0.000000                   0.000000   \n",
 690 |        "25%             4800.000000            77.000000                   2.000000   \n",
 691 |        "50%             7700.000000            79.000000                   4.000000   \n",
 692 |        "75%            11700.000000            80.000000                   7.000000   \n",
 693 |        "max            87100.000000            87.000000                  18.000000   \n",
 694 |        "\n",
 695 |        "       consfin_product_count  consfin_max_limit  consfin_avg_limit  \\\n",
 696 |        "count            4457.000000        4457.000000        4457.000000   \n",
 697 |        "mean                5.227507       16153.690823        8007.696881   \n",
 698 |        "std                 3.409292       14301.037628        5679.418585   \n",
 699 |        "min                 0.000000           0.000000           0.000000   \n",
 700 |        "25%                 3.000000        7800.000000        4737.000000   \n",
 701 |        "50%                 5.000000       13800.000000        7050.000000   \n",
 702 |        "75%                 7.000000       20400.000000       10000.000000   \n",
 703 |        "max                20.000000      266400.000000       82800.000000   \n",
 704 |        "\n",
 705 |        "       latest_query_day  loans_latest_day  \n",
 706 |        "count       4450.000000       4457.000000  \n",
 707 |        "mean          24.112809         55.181512  \n",
 708 |        "std           37.725724         53.486408  \n",
 709 |        "min           -2.000000         -2.000000  \n",
 710 |        "25%            5.000000         10.000000  \n",
 711 |        "50%           14.000000         36.000000  \n",
 712 |        "75%           24.000000         91.000000  \n",
 713 |        "max          360.000000        323.000000  \n",
 714 |        "\n",
 715 |        "[8 rows x 83 columns]"
 716 |       ]
 717 |      },
 718 |      "execution_count": 3,
 719 |      "metadata": {},
 720 |      "output_type": "execute_result"
 721 |     }
 722 |    ],
 723 |    "source": [
 724 |     "df.describe() #可以看到统计信息"
 725 |    ]
 726 |   },
 727 |   {
 728 |    "cell_type": "code",
 729 |    "execution_count": 4,
 730 |    "metadata": {
 731 |     "collapsed": false
 732 |    },
 733 |    "outputs": [
 734 |     {
 735 |      "name": "stdout",
 736 |      "output_type": "stream",
 737 |      "text": [
 738 |       "we have 70 columns in type float64, they are ['low_volume_percent', 'middle_volume_percent', 'trans_amount_increase_rate_lately', 'trans_activity_month', 'trans_activity_day', 'transd_mcc', 'trans_days_interval_filter', 'trans_days_interval', 'regional_mobility', 'student_feature', 'number_of_trans_from_2011', 'first_transaction_time', 'historical_trans_day', 'rank_trad_1_month', 'avg_consume_less_12_valid_month', 'top_trans_count_last_1_month', 'avg_price_top_last_12_valid_month', 'trans_top_time_last_1_month', 'trans_top_time_last_6_month', 'consume_top_time_last_1_month', 'consume_top_time_last_6_month', 'cross_consume_count_last_1_month', 'trans_fail_top_count_enum_last_1_month', 'trans_fail_top_count_enum_last_6_month', 'trans_fail_top_count_enum_last_12_month', 'consume_mini_time_last_1_month', 'max_consume_count_later_6_month', 'railway_consume_count_last_12_month', 'jewelry_consume_count_last_6_month', 'first_transaction_day', 'trans_day_last_12_month', 'apply_score', 'apply_credibility', 'query_org_count', 'query_finance_count', 'query_cash_count', 'query_sum_count', 'latest_one_month_apply', 'latest_three_month_apply', 'latest_six_month_apply', 'loans_score', 'loans_credibility_behavior', 'loans_count', 'loans_settle_count', 'loans_overdue_count', 'loans_org_count_behavior', 'consfin_org_count_behavior', 'loans_cash_count', 'latest_one_month_loan', 'latest_three_month_loan', 'latest_six_month_loan', 'history_suc_fee', 'history_fail_fee', 'latest_one_month_suc', 'latest_one_month_fail', 'loans_long_time', 'loans_credit_limit', 'loans_credibility_limit', 'loans_org_count_current', 'loans_product_count', 'loans_max_limit', 'loans_avg_limit', 'consfin_credit_limit', 'consfin_credibility', 'consfin_org_count_current', 'consfin_product_count', 'consfin_max_limit', 'consfin_avg_limit', 'latest_query_day', 'loans_latest_day']\\\n",
 739 |       "we have 13 columns in type int64, they are ['Unnamed: 0', 'custid', 'take_amount_in_later_12_month_highest', 'repayment_capability', 'is_high_user', 'historical_trans_amount', 'trans_amount_3_month', 'abs', 'avg_price_last_12_month', 'max_cumulative_consume_later_1_month', 'pawns_auctions_trusts_consume_last_1_month', 'pawns_auctions_trusts_consume_last_6_month', 'status']\\\n",
 740 |       "we have 7 columns in type object, they are ['trade_no', 'bank_card_no', 'reg_preference_for_trad', 'source', 'id_name', 'latest_query_time', 'loans_latest_time']\\\n"
 741 |      ]
 742 |     }
 743 |    ],
 744 |    "source": [
 745 |     "#提取我们需要的信息，特别是每个特征的类别信息\n",
 746 |     "\n",
 747 |     "def get_data_type(df):\n",
 748 |     "    typedic= {} # 类型字典\n",
 749 |     "    for name in df.columns:\n",
 750 |     "        typedic[str(df[name].dtype)] = typedic.get(str(df[name].dtype),[])+[name]\n",
 751 |     "    for key,value in typedic.items():\n",
 752 |     "        # print('we have {} columns in type {}'.format(len(value),key))\n",
 753 |     "        print('we have {} columns in type {}, they are {}\\\\'.format(len(value),key,value))\n",
 754 |     "\n",
 755 |     "get_data_type(df)"
 756 |    ]
 757 |   },
 758 |   {
 759 |    "cell_type": "markdown",
 760 |    "metadata": {},
 761 |    "source": [
 762 |     "### 二、数据预处理  \n",
 763 |     "无关特征删除  \n",
 764 |     "数据类型转换  \n",
 765 |     "缺失值处理等等"
 766 |    ]
 767 |   },
 768 |   {
 769 |    "cell_type": "markdown",
 770 |    "metadata": {},
 771 |    "source": [
 772 |     "### 1 .  无关特征删除（每个特征过一下）  \n",
 773 |     "有90个特征,选出应该去除的\n",
 774 |     "\n",
 775 |     "先对 类型为obect的查看一下：['trade_no', 'bank_card_no', 'reg_preference_for_trad', 'source', 'id_name', 'latest_query_time', 'loans_latest_time']\n",
 776 |     "'bank_card_no' ：都是卡号一，去掉\n",
 777 |     "'id_name'：客户名字，去掉\n",
 778 |     "\n",
 779 |     "\n",
 780 |     "Unnamed: 0： 应该是原来的数据序号，删掉一些无用数据，造成序号不连续。可以去掉  \n",
 781 |     "custid : 顾客id号没啥分析\n",
 782 |     " \n",
 783 |     " \n"
 784 |    ]
 785 |   },
 786 |   {
 787 |    "cell_type": "code",
 788 |    "execution_count": 5,
 789 |    "metadata": {
 790 |     "collapsed": false,
 791 |     "scrolled": true
 792 |    },
 793 |    "outputs": [
 794 |     {
 795 |      "data": {
 796 |       "text/plain": [
 797 |        "Index(['Unnamed: 0', 'custid', 'trade_no', 'bank_card_no',\n",
 798 |        "       'low_volume_percent', 'middle_volume_percent',\n",
 799 |        "       'take_amount_in_later_12_month_highest',\n",
 800 |        "       'trans_amount_increase_rate_lately', 'trans_activity_month',\n",
 801 |        "       'trans_activity_day', 'transd_mcc', 'trans_days_interval_filter',\n",
 802 |        "       'trans_days_interval', 'regional_mobility', 'student_feature',\n",
 803 |        "       'repayment_capability', 'is_high_user', 'number_of_trans_from_2011',\n",
 804 |        "       'first_transaction_time', 'historical_trans_amount',\n",
 805 |        "       'historical_trans_day', 'rank_trad_1_month', 'trans_amount_3_month',\n",
 806 |        "       'avg_consume_less_12_valid_month', 'abs',\n",
 807 |        "       'top_trans_count_last_1_month', 'avg_price_last_12_month',\n",
 808 |        "       'avg_price_top_last_12_valid_month', 'reg_preference_for_trad',\n",
 809 |        "       'trans_top_time_last_1_month', 'trans_top_time_last_6_month',\n",
 810 |        "       'consume_top_time_last_1_month', 'consume_top_time_last_6_month',\n",
 811 |        "       'cross_consume_count_last_1_month',\n",
 812 |        "       'trans_fail_top_count_enum_last_1_month',\n",
 813 |        "       'trans_fail_top_count_enum_last_6_month',\n",
 814 |        "       'trans_fail_top_count_enum_last_12_month',\n",
 815 |        "       'consume_mini_time_last_1_month',\n",
 816 |        "       'max_cumulative_consume_later_1_month',\n",
 817 |        "       'max_consume_count_later_6_month',\n",
 818 |        "       'railway_consume_count_last_12_month',\n",
 819 |        "       'pawns_auctions_trusts_consume_last_1_month',\n",
 820 |        "       'pawns_auctions_trusts_consume_last_6_month',\n",
 821 |        "       'jewelry_consume_count_last_6_month', 'status', 'source',\n",
 822 |        "       'first_transaction_day', 'trans_day_last_12_month', 'id_name',\n",
 823 |        "       'apply_score', 'apply_credibility', 'query_org_count',\n",
 824 |        "       'query_finance_count', 'query_cash_count', 'query_sum_count',\n",
 825 |        "       'latest_query_time', 'latest_one_month_apply',\n",
 826 |        "       'latest_three_month_apply', 'latest_six_month_apply', 'loans_score',\n",
 827 |        "       'loans_credibility_behavior', 'loans_count', 'loans_settle_count',\n",
 828 |        "       'loans_overdue_count', 'loans_org_count_behavior',\n",
 829 |        "       'consfin_org_count_behavior', 'loans_cash_count',\n",
 830 |        "       'latest_one_month_loan', 'latest_three_month_loan',\n",
 831 |        "       'latest_six_month_loan', 'history_suc_fee', 'history_fail_fee',\n",
 832 |        "       'latest_one_month_suc', 'latest_one_month_fail', 'loans_long_time',\n",
 833 |        "       'loans_latest_time', 'loans_credit_limit', 'loans_credibility_limit',\n",
 834 |        "       'loans_org_count_current', 'loans_product_count', 'loans_max_limit',\n",
 835 |        "       'loans_avg_limit', 'consfin_credit_limit', 'consfin_credibility',\n",
 836 |        "       'consfin_org_count_current', 'consfin_product_count',\n",
 837 |        "       'consfin_max_limit', 'consfin_avg_limit', 'latest_query_day',\n",
 838 |        "       'loans_latest_day'],\n",
 839 |        "      dtype='object')"
 840 |       ]
 841 |      },
 842 |      "execution_count": 5,
 843 |      "metadata": {},
 844 |      "output_type": "execute_result"
 845 |     }
 846 |    ],
 847 |    "source": [
 848 |     "df.columns"
 849 |    ]
 850 |   },
 851 |   {
 852 |    "cell_type": "code",
 853 |    "execution_count": 6,
 854 |    "metadata": {
 855 |     "collapsed": false,
 856 |     "scrolled": true
 857 |    },
 858 |    "outputs": [
 859 |     {
 860 |      "data": {
 861 |       "text/plain": [
 862 |        "0    20180507115231274000000023057383\n",
 863 |        "1    20180507121002192000000023073000\n",
 864 |        "2    20180507125159718000000023114911\n",
 865 |        "3    20180507121358683000000388283484\n",
 866 |        "4    20180507115448545000000388205844\n",
 867 |        "Name: trade_no, dtype: object"
 868 |       ]
 869 |      },
 870 |      "execution_count": 6,
 871 |      "metadata": {},
 872 |      "output_type": "execute_result"
 873 |     }
 874 |    ],
 875 |    "source": [
 876 |     "df['trade_no'].head(5)"
 877 |    ]
 878 |   },
 879 |   {
 880 |    "cell_type": "markdown",
 881 |    "metadata": {},
 882 |    "source": [
 883 |     "交易号，有时间信息，但后面也还有时间的特征，这里可以去掉"
 884 |    ]
 885 |   },
 886 |   {
 887 |    "cell_type": "code",
 888 |    "execution_count": 7,
 889 |    "metadata": {
 890 |     "collapsed": false,
 891 |     "scrolled": true
 892 |    },
 893 |    "outputs": [
 894 |     {
 895 |      "data": {
 896 |       "text/plain": [
 897 |        "0    卡号1\n",
 898 |        "1    卡号1\n",
 899 |        "2    卡号1\n",
 900 |        "3    卡号1\n",
 901 |        "4    卡号1\n",
 902 |        "Name: bank_card_no, dtype: object"
 903 |       ]
 904 |      },
 905 |      "execution_count": 7,
 906 |      "metadata": {},
 907 |      "output_type": "execute_result"
 908 |     }
 909 |    ],
 910 |    "source": [
 911 |     "df['bank_card_no'].head()"
 912 |    ]
 913 |   },
 914 |   {
 915 |    "cell_type": "code",
 916 |    "execution_count": 8,
 917 |    "metadata": {
 918 |     "collapsed": false
 919 |    },
 920 |    "outputs": [
 921 |     {
 922 |      "data": {
 923 |       "text/plain": [
 924 |        "xs    4754\n",
 925 |        "Name: source, dtype: int64"
 926 |       ]
 927 |      },
 928 |      "execution_count": 8,
 929 |      "metadata": {},
 930 |      "output_type": "execute_result"
 931 |     }
 932 |    ],
 933 |    "source": [
 934 |     "#查看重复的值就可以去掉了\n",
 935 |     "\n",
 936 |     "df['source'].value_counts() #railway_consume_count_last_12_month\n",
 937 |     "\n",
 938 |     "# df['source'].duplicated().sum() #查看不同元素的数量"
 939 |    ]
 940 |   },
 941 |   {
 942 |    "cell_type": "markdown",
 943 |    "metadata": {},
 944 |    "source": [
 945 |     "都是一样，删除"
 946 |    ]
 947 |   },
 948 |   {
 949 |    "cell_type": "code",
 950 |    "execution_count": 9,
 951 |    "metadata": {
 952 |     "collapsed": false,
 953 |     "scrolled": true
 954 |    },
 955 |    "outputs": [
 956 |     {
 957 |      "data": {
 958 |       "text/plain": [
 959 |        "0.0     4651\n",
 960 |        "1.0       72\n",
 961 |        "2.0       13\n",
 962 |        "4.0        3\n",
 963 |        "3.0        2\n",
 964 |        "30.0       1\n",
 965 |        "Name: railway_consume_count_last_12_month, dtype: int64"
 966 |       ]
 967 |      },
 968 |      "execution_count": 9,
 969 |      "metadata": {},
 970 |      "output_type": "execute_result"
 971 |     }
 972 |    ],
 973 |    "source": [
 974 |     "df['railway_consume_count_last_12_month'].value_counts()"
 975 |    ]
 976 |   },
 977 |   {
 978 |    "cell_type": "markdown",
 979 |    "metadata": {},
 980 |    "source": [
 981 |     "### 综上，删掉的值有'Unnamed: 0','custid','trade_no','bank_card_no' ,'source','id_name'"
 982 |    ]
 983 |   },
 984 |   {
 985 |    "cell_type": "code",
 986 |    "execution_count": 10,
 987 |    "metadata": {
 988 |     "collapsed": false,
 989 |     "scrolled": true
 990 |    },
 991 |    "outputs": [
 992 |     {
 993 |      "name": "stdout",
 994 |      "output_type": "stream",
 995 |      "text": [
 996 |       "84\n"
 997 |      ]
 998 |     },
 999 |     {
1000 |      "data": {
1001 |       "text/plain": [
1002 |        "Index(['low_volume_percent', 'middle_volume_percent',\n",
1003 |        "       'take_amount_in_later_12_month_highest',\n",
1004 |        "       'trans_amount_increase_rate_lately', 'trans_activity_month',\n",
1005 |        "       'trans_activity_day', 'transd_mcc', 'trans_days_interval_filter',\n",
1006 |        "       'trans_days_interval', 'regional_mobility', 'student_feature',\n",
1007 |        "       'repayment_capability', 'is_high_user', 'number_of_trans_from_2011',\n",
1008 |        "       'first_transaction_time', 'historical_trans_amount',\n",
1009 |        "       'historical_trans_day', 'rank_trad_1_month', 'trans_amount_3_month',\n",
1010 |        "       'avg_consume_less_12_valid_month', 'abs',\n",
1011 |        "       'top_trans_count_last_1_month', 'avg_price_last_12_month',\n",
1012 |        "       'avg_price_top_last_12_valid_month', 'reg_preference_for_trad',\n",
1013 |        "       'trans_top_time_last_1_month', 'trans_top_time_last_6_month',\n",
1014 |        "       'consume_top_time_last_1_month', 'consume_top_time_last_6_month',\n",
1015 |        "       'cross_consume_count_last_1_month',\n",
1016 |        "       'trans_fail_top_count_enum_last_1_month',\n",
1017 |        "       'trans_fail_top_count_enum_last_6_month',\n",
1018 |        "       'trans_fail_top_count_enum_last_12_month',\n",
1019 |        "       'consume_mini_time_last_1_month',\n",
1020 |        "       'max_cumulative_consume_later_1_month',\n",
1021 |        "       'max_consume_count_later_6_month',\n",
1022 |        "       'railway_consume_count_last_12_month',\n",
1023 |        "       'pawns_auctions_trusts_consume_last_1_month',\n",
1024 |        "       'pawns_auctions_trusts_consume_last_6_month',\n",
1025 |        "       'jewelry_consume_count_last_6_month', 'status', 'first_transaction_day',\n",
1026 |        "       'trans_day_last_12_month', 'apply_score', 'apply_credibility',\n",
1027 |        "       'query_org_count', 'query_finance_count', 'query_cash_count',\n",
1028 |        "       'query_sum_count', 'latest_query_time', 'latest_one_month_apply',\n",
1029 |        "       'latest_three_month_apply', 'latest_six_month_apply', 'loans_score',\n",
1030 |        "       'loans_credibility_behavior', 'loans_count', 'loans_settle_count',\n",
1031 |        "       'loans_overdue_count', 'loans_org_count_behavior',\n",
1032 |        "       'consfin_org_count_behavior', 'loans_cash_count',\n",
1033 |        "       'latest_one_month_loan', 'latest_three_month_loan',\n",
1034 |        "       'latest_six_month_loan', 'history_suc_fee', 'history_fail_fee',\n",
1035 |        "       'latest_one_month_suc', 'latest_one_month_fail', 'loans_long_time',\n",
1036 |        "       'loans_latest_time', 'loans_credit_limit', 'loans_credibility_limit',\n",
1037 |        "       'loans_org_count_current', 'loans_product_count', 'loans_max_limit',\n",
1038 |        "       'loans_avg_limit', 'consfin_credit_limit', 'consfin_credibility',\n",
1039 |        "       'consfin_org_count_current', 'consfin_product_count',\n",
1040 |        "       'consfin_max_limit', 'consfin_avg_limit', 'latest_query_day',\n",
1041 |        "       'loans_latest_day'],\n",
1042 |        "      dtype='object')"
1043 |       ]
1044 |      },
1045 |      "execution_count": 10,
1046 |      "metadata": {},
1047 |      "output_type": "execute_result"
1048 |     }
1049 |    ],
1050 |    "source": [
1051 |     "df.drop(['Unnamed: 0','custid','trade_no','bank_card_no' ,'source','id_name'],inplace =True,axis = 1)\n",
1052 |     "print(len(df.columns))\n",
1053 |     "df.columns"
1054 |    ]
1055 |   },
1056 |   {
1057 |    "cell_type": "markdown",
1058 |    "metadata": {
1059 |     "collapsed": true
1060 |    },
1061 |    "source": [
1062 |     "### 2.数值类型转换"
1063 |    ]
1064 |   },
1065 |   {
1066 |    "cell_type": "code",
1067 |    "execution_count": 11,
1068 |    "metadata": {
1069 |     "collapsed": false
1070 |    },
1071 |    "outputs": [
1072 |     {
1073 |      "name": "stdout",
1074 |      "output_type": "stream",
1075 |      "text": [
1076 |       "we have 3 columns in type object, they are ['reg_preference_for_trad', 'latest_query_time', 'loans_latest_time']\\\n",
1077 |       "we have 11 columns in type int64, they are ['take_amount_in_later_12_month_highest', 'repayment_capability', 'is_high_user', 'historical_trans_amount', 'trans_amount_3_month', 'abs', 'avg_price_last_12_month', 'max_cumulative_consume_later_1_month', 'pawns_auctions_trusts_consume_last_1_month', 'pawns_auctions_trusts_consume_last_6_month', 'status']\\\n",
1078 |       "we have 70 columns in type float64, they are ['low_volume_percent', 'middle_volume_percent', 'trans_amount_increase_rate_lately', 'trans_activity_month', 'trans_activity_day', 'transd_mcc', 'trans_days_interval_filter', 'trans_days_interval', 'regional_mobility', 'student_feature', 'number_of_trans_from_2011', 'first_transaction_time', 'historical_trans_day', 'rank_trad_1_month', 'avg_consume_less_12_valid_month', 'top_trans_count_last_1_month', 'avg_price_top_last_12_valid_month', 'trans_top_time_last_1_month', 'trans_top_time_last_6_month', 'consume_top_time_last_1_month', 'consume_top_time_last_6_month', 'cross_consume_count_last_1_month', 'trans_fail_top_count_enum_last_1_month', 'trans_fail_top_count_enum_last_6_month', 'trans_fail_top_count_enum_last_12_month', 'consume_mini_time_last_1_month', 'max_consume_count_later_6_month', 'railway_consume_count_last_12_month', 'jewelry_consume_count_last_6_month', 'first_transaction_day', 'trans_day_last_12_month', 'apply_score', 'apply_credibility', 'query_org_count', 'query_finance_count', 'query_cash_count', 'query_sum_count', 'latest_one_month_apply', 'latest_three_month_apply', 'latest_six_month_apply', 'loans_score', 'loans_credibility_behavior', 'loans_count', 'loans_settle_count', 'loans_overdue_count', 'loans_org_count_behavior', 'consfin_org_count_behavior', 'loans_cash_count', 'latest_one_month_loan', 'latest_three_month_loan', 'latest_six_month_loan', 'history_suc_fee', 'history_fail_fee', 'latest_one_month_suc', 'latest_one_month_fail', 'loans_long_time', 'loans_credit_limit', 'loans_credibility_limit', 'loans_org_count_current', 'loans_product_count', 'loans_max_limit', 'loans_avg_limit', 'consfin_credit_limit', 'consfin_credibility', 'consfin_org_count_current', 'consfin_product_count', 'consfin_max_limit', 'consfin_avg_limit', 'latest_query_day', 'loans_latest_day']\\\n"
1079 |      ]
1080 |     }
1081 |    ],
1082 |    "source": [
1083 |     "get_data_type(df)"
1084 |    ]
1085 |   },
1086 |   {
1087 |    "cell_type": "markdown",
1088 |    "metadata": {},
1089 |    "source": [
1090 |     "#### 非数值型的有三个 ['reg_preference_for_trad', 'latest_query_time', 'loans_latest_time']"
1091 |    ]
1092 |   },
1093 |   {
1094 |    "cell_type": "code",
1095 |    "execution_count": 12,
1096 |    "metadata": {
1097 |     "collapsed": false
1098 |    },
1099 |    "outputs": [
1100 |     {
1101 |      "data": {
1102 |       "text/plain": [
1103 |        "一线城市    3403\n",
1104 |        "三线城市    1064\n",
1105 |        "境外       150\n",
1106 |        "二线城市     131\n",
1107 |        "其他城市       4\n",
1108 |        "NaN        2\n",
1109 |        "Name: reg_preference_for_trad, dtype: int64"
1110 |       ]
1111 |      },
1112 |      "execution_count": 12,
1113 |      "metadata": {},
1114 |      "output_type": "execute_result"
1115 |     }
1116 |    ],
1117 |    "source": [
1118 |     "# 城市类型\n",
1119 |     "df['reg_preference_for_trad'].value_counts(dropna = False) #"
1120 |    ]
1121 |   },
1122 |   {
1123 |    "cell_type": "markdown",
1124 |    "metadata": {},
1125 |    "source": [
1126 |     "将未知的填充为其他城市，并且转为数字"
1127 |    ]
1128 |   },
1129 |   {
1130 |    "cell_type": "code",
1131 |    "execution_count": 13,
1132 |    "metadata": {
1133 |     "collapsed": false
1134 |    },
1135 |    "outputs": [],
1136 |    "source": [
1137 |     "df['reg_preference_for_trad'].fillna('其他城市',inplace = True)\n",
1138 |     "df['reg_preference_for_trad'].replace({'一线城市':1,'二线城市':2,'三线城市':3,'境外':4,'其他城市':5},inplace = True)"
1139 |    ]
1140 |   },
1141 |   {
1142 |    "cell_type": "markdown",
1143 |    "metadata": {},
1144 |    "source": [
1145 |     "'latest_query_time', 'loans_latest_time'为日期类型，先不动"
1146 |    ]
1147 |   },
1148 |   {
1149 |    "cell_type": "markdown",
1150 |    "metadata": {},
1151 |    "source": [
1152 |     "### 3.缺失值处理"
1153 |    ]
1154 |   },
1155 |   {
1156 |    "cell_type": "code",
1157 |    "execution_count": 14,
1158 |    "metadata": {
1159 |     "collapsed": false,
1160 |     "scrolled": true
1161 |    },
1162 |    "outputs": [
1163 |     {
1164 |      "data": {
1165 |       "text/plain": [
1166 |        "low_volume_percent                          2\n",
1167 |        "middle_volume_percent                       2\n",
1168 |        "take_amount_in_later_12_month_highest       0\n",
1169 |        "trans_amount_increase_rate_lately           3\n",
1170 |        "trans_activity_month                        2\n",
1171 |        "trans_activity_day                          2\n",
1172 |        "transd_mcc                                  2\n",
1173 |        "trans_days_interval_filter                  8\n",
1174 |        "trans_days_interval                         2\n",
1175 |        "regional_mobility                           2\n",
1176 |        "student_feature                          2998\n",
1177 |        "repayment_capability                        0\n",
1178 |        "is_high_user                                0\n",
1179 |        "number_of_trans_from_2011                   2\n",
1180 |        "first_transaction_time                      2\n",
1181 |        "historical_trans_amount                     0\n",
1182 |        "historical_trans_day                        2\n",
1183 |        "rank_trad_1_month                           2\n",
1184 |        "trans_amount_3_month                        0\n",
1185 |        "avg_consume_less_12_valid_month             2\n",
1186 |        "abs                                         0\n",
1187 |        "top_trans_count_last_1_month                2\n",
1188 |        "avg_price_last_12_month                     0\n",
1189 |        "avg_price_top_last_12_valid_month         104\n",
1190 |        "reg_preference_for_trad                     0\n",
1191 |        "trans_top_time_last_1_month                 8\n",
1192 |        "trans_top_time_last_6_month                 8\n",
1193 |        "consume_top_time_last_1_month               8\n",
1194 |        "consume_top_time_last_6_month               8\n",
1195 |        "cross_consume_count_last_1_month          426\n",
1196 |        "                                         ... \n",
1197 |        "loans_credibility_behavior                297\n",
1198 |        "loans_count                               297\n",
1199 |        "loans_settle_count                        297\n",
1200 |        "loans_overdue_count                       297\n",
1201 |        "loans_org_count_behavior                  297\n",
1202 |        "consfin_org_count_behavior                297\n",
1203 |        "loans_cash_count                          297\n",
1204 |        "latest_one_month_loan                     297\n",
1205 |        "latest_three_month_loan                   297\n",
1206 |        "latest_six_month_loan                     297\n",
1207 |        "history_suc_fee                           297\n",
1208 |        "history_fail_fee                          297\n",
1209 |        "latest_one_month_suc                      297\n",
1210 |        "latest_one_month_fail                     297\n",
1211 |        "loans_long_time                           297\n",
1212 |        "loans_latest_time                         297\n",
1213 |        "loans_credit_limit                        297\n",
1214 |        "loans_credibility_limit                   297\n",
1215 |        "loans_org_count_current                   297\n",
1216 |        "loans_product_count                       297\n",
1217 |        "loans_max_limit                           297\n",
1218 |        "loans_avg_limit                           297\n",
1219 |        "consfin_credit_limit                      297\n",
1220 |        "consfin_credibility                       297\n",
1221 |        "consfin_org_count_current                 297\n",
1222 |        "consfin_product_count                     297\n",
1223 |        "consfin_max_limit                         297\n",
1224 |        "consfin_avg_limit                         297\n",
1225 |        "latest_query_day                          304\n",
1226 |        "loans_latest_day                          297\n",
1227 |        "Length: 84, dtype: int64"
1228 |       ]
1229 |      },
1230 |      "execution_count": 14,
1231 |      "metadata": {},
1232 |      "output_type": "execute_result"
1233 |     }
1234 |    ],
1235 |    "source": [
1236 |     "df.isnull().sum() #查看缺失值"
1237 |    ]
1238 |   },
1239 |   {
1240 |    "cell_type": "markdown",
1241 |    "metadata": {},
1242 |    "source": [
1243 |     "学生缺的比较多"
1244 |    ]
1245 |   },
1246 |   {
1247 |    "cell_type": "code",
1248 |    "execution_count": 15,
1249 |    "metadata": {
1250 |     "collapsed": false,
1251 |     "scrolled": true
1252 |    },
1253 |    "outputs": [
1254 |     {
1255 |      "data": {
1256 |       "text/plain": [
1257 |        "NaN     2998\n",
1258 |        " 1.0    1754\n",
1259 |        " 2.0       2\n",
1260 |        "Name: student_feature, dtype: int64"
1261 |       ]
1262 |      },
1263 |      "execution_count": 15,
1264 |      "metadata": {},
1265 |      "output_type": "execute_result"
1266 |     }
1267 |    ],
1268 |    "source": [
1269 |     "df['student_feature'].value_counts(dropna=False) #True会默认把缺失值或者na，null这些值去掉"
1270 |    ]
1271 |   },
1272 |   {
1273 |    "cell_type": "markdown",
1274 |    "metadata": {},
1275 |    "source": [
1276 |     "#### 这里可能是不是学生的意思，暂时NA看做是0吧"
1277 |    ]
1278 |   },
1279 |   {
1280 |    "cell_type": "code",
1281 |    "execution_count": 16,
1282 |    "metadata": {
1283 |     "collapsed": false
1284 |    },
1285 |    "outputs": [
1286 |     {
1287 |      "data": {
1288 |       "text/plain": [
1289 |        "0.0    2998\n",
1290 |        "1.0    1754\n",
1291 |        "2.0       2\n",
1292 |        "Name: student_feature, dtype: int64"
1293 |       ]
1294 |      },
1295 |      "execution_count": 16,
1296 |      "metadata": {},
1297 |      "output_type": "execute_result"
1298 |     }
1299 |    ],
1300 |    "source": [
1301 |     "df['student_feature'].fillna(0,inplace=True) #True代码改变了源数据\n",
1302 |     "df['student_feature'].value_counts(dropna=False)"
1303 |    ]
1304 |   },
1305 |   {
1306 |    "cell_type": "markdown",
1307 |    "metadata": {},
1308 |    "source": [
1309 |     "#### 其余特征缺失的话可以用众数填充  \n",
1310 |     "也有说法是  \n",
1311 |     "数值型取中位数  \n",
1312 |     "日期取众数  "
1313 |    ]
1314 |   },
1315 |   {
1316 |    "cell_type": "code",
1317 |    "execution_count": 17,
1318 |    "metadata": {
1319 |     "collapsed": false
1320 |    },
1321 |    "outputs": [
1322 |     {
1323 |      "data": {
1324 |       "text/html": [
1325 |        "<div>\n",
1326 |        "<style scoped>\n",
1327 |        "    .dataframe tbody tr th:only-of-type {\n",
1328 |        "        vertical-align: middle;\n",
1329 |        "    }\n",
1330 |        "\n",
1331 |        "    .dataframe tbody tr th {\n",
1332 |        "        vertical-align: top;\n",
1333 |        "    }\n",
1334 |        "\n",
1335 |        "    .dataframe thead th {\n",
1336 |        "        text-align: right;\n",
1337 |        "    }\n",
1338 |        "</style>\n",
1339 |        "<table border=\"1\" class=\"dataframe\">\n",
1340 |        "  <thead>\n",
1341 |        "    <tr style=\"text-align: right;\">\n",
1342 |        "      <th></th>\n",
1343 |        "      <th>low_volume_percent</th>\n",
1344 |        "      <th>middle_volume_percent</th>\n",
1345 |        "      <th>take_amount_in_later_12_month_highest</th>\n",
1346 |        "      <th>trans_amount_increase_rate_lately</th>\n",
1347 |        "      <th>trans_activity_month</th>\n",
1348 |        "      <th>trans_activity_day</th>\n",
1349 |        "      <th>transd_mcc</th>\n",
1350 |        "      <th>trans_days_interval_filter</th>\n",
1351 |        "      <th>trans_days_interval</th>\n",
1352 |        "      <th>regional_mobility</th>\n",
1353 |        "      <th>...</th>\n",
1354 |        "      <th>loans_max_limit</th>\n",
1355 |        "      <th>loans_avg_limit</th>\n",
1356 |        "      <th>consfin_credit_limit</th>\n",
1357 |        "      <th>consfin_credibility</th>\n",
1358 |        "      <th>consfin_org_count_current</th>\n",
1359 |        "      <th>consfin_product_count</th>\n",
1360 |        "      <th>consfin_max_limit</th>\n",
1361 |        "      <th>consfin_avg_limit</th>\n",
1362 |        "      <th>latest_query_day</th>\n",
1363 |        "      <th>loans_latest_day</th>\n",
1364 |        "    </tr>\n",
1365 |        "  </thead>\n",
1366 |        "  <tbody>\n",
1367 |        "    <tr>\n",
1368 |        "      <th>0</th>\n",
1369 |        "      <td>0.01</td>\n",
1370 |        "      <td>0.99</td>\n",
1371 |        "      <td>0</td>\n",
1372 |        "      <td>0.90</td>\n",
1373 |        "      <td>0.55</td>\n",
1374 |        "      <td>0.313</td>\n",
1375 |        "      <td>17.0</td>\n",
1376 |        "      <td>27.0</td>\n",
1377 |        "      <td>26.0</td>\n",
1378 |        "      <td>3.0</td>\n",
1379 |        "      <td>...</td>\n",
1380 |        "      <td>2900.0</td>\n",
1381 |        "      <td>1688.0</td>\n",
1382 |        "      <td>1200.0</td>\n",
1383 |        "      <td>75.0</td>\n",
1384 |        "      <td>1.0</td>\n",
1385 |        "      <td>2.0</td>\n",
1386 |        "      <td>1200.0</td>\n",
1387 |        "      <td>1200.0</td>\n",
1388 |        "      <td>12.0</td>\n",
1389 |        "      <td>18.0</td>\n",
1390 |        "    </tr>\n",
1391 |        "    <tr>\n",
1392 |        "      <th>1</th>\n",
1393 |        "      <td>0.02</td>\n",
1394 |        "      <td>0.94</td>\n",
1395 |        "      <td>2000</td>\n",
1396 |        "      <td>1.28</td>\n",
1397 |        "      <td>1.00</td>\n",
1398 |        "      <td>0.458</td>\n",
1399 |        "      <td>19.0</td>\n",
1400 |        "      <td>30.0</td>\n",
1401 |        "      <td>14.0</td>\n",
1402 |        "      <td>4.0</td>\n",
1403 |        "      <td>...</td>\n",
1404 |        "      <td>3500.0</td>\n",
1405 |        "      <td>1758.0</td>\n",
1406 |        "      <td>15100.0</td>\n",
1407 |        "      <td>80.0</td>\n",
1408 |        "      <td>5.0</td>\n",
1409 |        "      <td>6.0</td>\n",
1410 |        "      <td>22800.0</td>\n",
1411 |        "      <td>9360.0</td>\n",
1412 |        "      <td>4.0</td>\n",
1413 |        "      <td>2.0</td>\n",
1414 |        "    </tr>\n",
1415 |        "    <tr>\n",
1416 |        "      <th>2</th>\n",
1417 |        "      <td>0.04</td>\n",
1418 |        "      <td>0.96</td>\n",
1419 |        "      <td>0</td>\n",
1420 |        "      <td>1.00</td>\n",
1421 |        "      <td>1.00</td>\n",
1422 |        "      <td>0.114</td>\n",
1423 |        "      <td>13.0</td>\n",
1424 |        "      <td>68.0</td>\n",
1425 |        "      <td>22.0</td>\n",
1426 |        "      <td>1.0</td>\n",
1427 |        "      <td>...</td>\n",
1428 |        "      <td>1600.0</td>\n",
1429 |        "      <td>1250.0</td>\n",
1430 |        "      <td>4200.0</td>\n",
1431 |        "      <td>87.0</td>\n",
1432 |        "      <td>1.0</td>\n",
1433 |        "      <td>1.0</td>\n",
1434 |        "      <td>4200.0</td>\n",
1435 |        "      <td>4200.0</td>\n",
1436 |        "      <td>2.0</td>\n",
1437 |        "      <td>6.0</td>\n",
1438 |        "    </tr>\n",
1439 |        "    <tr>\n",
1440 |        "      <th>3</th>\n",
1441 |        "      <td>0.00</td>\n",
1442 |        "      <td>0.96</td>\n",
1443 |        "      <td>2000</td>\n",
1444 |        "      <td>0.13</td>\n",
1445 |        "      <td>0.57</td>\n",
1446 |        "      <td>0.777</td>\n",
1447 |        "      <td>22.0</td>\n",
1448 |        "      <td>14.0</td>\n",
1449 |        "      <td>6.0</td>\n",
1450 |        "      <td>3.0</td>\n",
1451 |        "      <td>...</td>\n",
1452 |        "      <td>3200.0</td>\n",
1453 |        "      <td>1541.0</td>\n",
1454 |        "      <td>16300.0</td>\n",
1455 |        "      <td>80.0</td>\n",
1456 |        "      <td>5.0</td>\n",
1457 |        "      <td>5.0</td>\n",
1458 |        "      <td>30000.0</td>\n",
1459 |        "      <td>12180.0</td>\n",
1460 |        "      <td>2.0</td>\n",
1461 |        "      <td>4.0</td>\n",
1462 |        "    </tr>\n",
1463 |        "    <tr>\n",
1464 |        "      <th>4</th>\n",
1465 |        "      <td>0.01</td>\n",
1466 |        "      <td>0.99</td>\n",
1467 |        "      <td>0</td>\n",
1468 |        "      <td>0.46</td>\n",
1469 |        "      <td>1.00</td>\n",
1470 |        "      <td>0.175</td>\n",
1471 |        "      <td>13.0</td>\n",
1472 |        "      <td>66.0</td>\n",
1473 |        "      <td>42.0</td>\n",
1474 |        "      <td>1.0</td>\n",
1475 |        "      <td>...</td>\n",
1476 |        "      <td>2300.0</td>\n",
1477 |        "      <td>1630.0</td>\n",
1478 |        "      <td>8300.0</td>\n",
1479 |        "      <td>79.0</td>\n",
1480 |        "      <td>2.0</td>\n",
1481 |        "      <td>2.0</td>\n",
1482 |        "      <td>8400.0</td>\n",
1483 |        "      <td>8250.0</td>\n",
1484 |        "      <td>22.0</td>\n",
1485 |        "      <td>120.0</td>\n",
1486 |        "    </tr>\n",
1487 |        "  </tbody>\n",
1488 |        "</table>\n",
1489 |        "<p>5 rows × 84 columns</p>\n",
1490 |        "</div>"
1491 |       ],
1492 |       "text/plain": [
1493 |        "   low_volume_percent  middle_volume_percent  \\\n",
1494 |        "0                0.01                   0.99   \n",
1495 |        "1                0.02                   0.94   \n",
1496 |        "2                0.04                   0.96   \n",
1497 |        "3                0.00                   0.96   \n",
1498 |        "4                0.01                   0.99   \n",
1499 |        "\n",
1500 |        "   take_amount_in_later_12_month_highest  trans_amount_increase_rate_lately  \\\n",
1501 |        "0                                      0                               0.90   \n",
1502 |        "1                                   2000                               1.28   \n",
1503 |        "2                                      0                               1.00   \n",
1504 |        "3                                   2000                               0.13   \n",
1505 |        "4                                      0                               0.46   \n",
1506 |        "\n",
1507 |        "   trans_activity_month  trans_activity_day  transd_mcc  \\\n",
1508 |        "0                  0.55               0.313        17.0   \n",
1509 |        "1                  1.00               0.458        19.0   \n",
1510 |        "2                  1.00               0.114        13.0   \n",
1511 |        "3                  0.57               0.777        22.0   \n",
1512 |        "4                  1.00               0.175        13.0   \n",
1513 |        "\n",
1514 |        "   trans_days_interval_filter  trans_days_interval  regional_mobility  \\\n",
1515 |        "0                        27.0                 26.0                3.0   \n",
1516 |        "1                        30.0                 14.0                4.0   \n",
1517 |        "2                        68.0                 22.0                1.0   \n",
1518 |        "3                        14.0                  6.0                3.0   \n",
1519 |        "4                        66.0                 42.0                1.0   \n",
1520 |        "\n",
1521 |        "         ...         loans_max_limit  loans_avg_limit  consfin_credit_limit  \\\n",
1522 |        "0        ...                  2900.0           1688.0                1200.0   \n",
1523 |        "1        ...                  3500.0           1758.0               15100.0   \n",
1524 |        "2        ...                  1600.0           1250.0                4200.0   \n",
1525 |        "3        ...                  3200.0           1541.0               16300.0   \n",
1526 |        "4        ...                  2300.0           1630.0                8300.0   \n",
1527 |        "\n",
1528 |        "   consfin_credibility  consfin_org_count_current  consfin_product_count  \\\n",
1529 |        "0                 75.0                        1.0                    2.0   \n",
1530 |        "1                 80.0                        5.0                    6.0   \n",
1531 |        "2                 87.0                        1.0                    1.0   \n",
1532 |        "3                 80.0                        5.0                    5.0   \n",
1533 |        "4                 79.0                        2.0                    2.0   \n",
1534 |        "\n",
1535 |        "   consfin_max_limit  consfin_avg_limit  latest_query_day  loans_latest_day  \n",
1536 |        "0             1200.0             1200.0              12.0              18.0  \n",
1537 |        "1            22800.0             9360.0               4.0               2.0  \n",
1538 |        "2             4200.0             4200.0               2.0               6.0  \n",
1539 |        "3            30000.0            12180.0               2.0               4.0  \n",
1540 |        "4             8400.0             8250.0              22.0             120.0  \n",
1541 |        "\n",
1542 |        "[5 rows x 84 columns]"
1543 |       ]
1544 |      },
1545 |      "execution_count": 17,
1546 |      "metadata": {},
1547 |      "output_type": "execute_result"
1548 |     }
1549 |    ],
1550 |    "source": [
1551 |     "# print(type(df.columns[1]))\n",
1552 |     "for i in df.columns:\n",
1553 |     "    df[i].fillna(df[i].mode()[0],inplace = True)  #加[0]是因为众数可能有多个，返回不是一个数字\n",
1554 |     "df.head()"
1555 |    ]
1556 |   },
1557 |   {
1558 |    "cell_type": "markdown",
1559 |    "metadata": {
1560 |     "collapsed": true
1561 |    },
1562 |    "source": [
1563 |     "### 三、数据集切分"
1564 |    ]
1565 |   },
1566 |   {
1567 |    "cell_type": "code",
1568 |    "execution_count": 27,
1569 |    "metadata": {
1570 |     "collapsed": false
1571 |    },
1572 |    "outputs": [
1573 |     {
1574 |      "name": "stdout",
1575 |      "output_type": "stream",
1576 |      "text": [
1577 |       "(3327, 83) (1427, 83) (3327,) (1427,)\n"
1578 |      ]
1579 |     }
1580 |    ],
1581 |    "source": [
1582 |     "import numpy as np\n",
1583 |     "from sklearn.model_selection import train_test_split\n",
1584 |     "\n",
1585 |     "Y=df['status']\n",
1586 |     "X=df.drop('status',axis=1)\n",
1587 |     "X_train,X_test,Y_train,Y_test =train_test_split(X,Y,test_size=0.3,random_state = 2018)   \n",
1588 |     "print(X_train.shape, X_test.shape, Y_train.shape, Y_test.shape)"
1589 |    ]
1590 |   },
1591 |   {
1592 |    "cell_type": "code",
1593 |    "execution_count": null,
1594 |    "metadata": {
1595 |     "collapsed": true
1596 |    },
1597 |    "outputs": [],
1598 |    "source": []
1599 |   }
1600 |  ],
1601 |  "metadata": {
1602 |   "anaconda-cloud": {},
1603 |   "kernelspec": {
1604 |    "display_name": "Python [conda env:tensorflow-gpu]",
1605 |    "language": "python",
1606 |    "name": "conda-env-tensorflow-gpu-py"
1607 |   },
1608 |   "language_info": {
1609 |    "codemirror_mode": {
1610 |     "name": "ipython",
1611 |     "version": 3
1612 |    },
1613 |    "file_extension": ".py",
1614 |    "mimetype": "text/x-python",
1615 |    "name": "python",
1616 |    "nbconvert_exporter": "python",
1617 |    "pygments_lexer": "ipython3",
1618 |    "version": "3.5.4"
1619 |   }
1620 |  },
1621 |  "nbformat": 4,
1622 |  "nbformat_minor": 1
1623 | }
1624 | 


--------------------------------------------------------------------------------
/数据分析实践/task2_特征衍生和选择.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "### 【Task 2】\n",
  8 |     "特征衍生  \n",
  9 |     "特征挑选  分别用IV值和随机森林等进行特征选择  \n",
 10 |     "……以及你能想到特征工程处理"
 11 |    ]
 12 |   },
 13 |   {
 14 |    "cell_type": "markdown",
 15 |    "metadata": {},
 16 |    "source": [
 17 |     "### iv值特征选择"
 18 |    ]
 19 |   },
 20 |   {
 21 |    "cell_type": "code",
 22 |    "execution_count": null,
 23 |    "metadata": {
 24 |     "collapsed": true
 25 |    },
 26 |    "outputs": [],
 27 |    "source": [
 28 |     "def CalcIV(Xvar,Yvar):\n",
 29 |     "    N_0=np.sum(Yvar==0)\n",
 30 |     "    N_1=np.sum(Yvar==1)\n",
 31 |     "    N_0_group=np.zeros(np.unique(Xvar).shape)\n",
 32 |     "    \n",
 33 |     "    N_1_group=np.zeros(np.unique(Xvar).shape)\n",
 34 |     "    for i in range(len(np.unique(Xvar))):\n",
 35 |     "        N_0_group[i] = Yvar[(Xvar==np.unique(Xvar)[i])&(Yvar==0)].count()\n",
 36 |     "        N_1_group[i] = Yvar[(Xvar==np.unique(Xvar)[i])&(Yvar==1)].count()\n",
 37 |     "    iv = np.sum((N_0_group/N_0-N_1_group/N_1)*np.log((N_0_group/N_0)/(N_1_group/N_1)))\n",
 38 |     "    if iv>=1.0:## 处理极端值\n",
 39 |     "        iv=1\n",
 40 |     "    return iv\n",
 41 |     "\n",
 42 |     "def caliv_batch(df,Yvar):\n",
 43 |     "    ivlist=[]\n",
 44 |     "    for col in df.columns:\n",
 45 |     "        iv=CalcIV(df[col],Yvar)\n",
 46 |     "        ivlist.append(iv)\n",
 47 |     "    names=list(df.columns)\n",
 48 |     "    iv_df=pd.DataFrame({'Var':names,'Iv':ivlist},columns=['Var','Iv'])\n",
 49 |     "\n",
 50 |     "    return iv_df,ivlist\n",
 51 |     "im_iv, ivl = caliv_batch(data_prepared.iloc[:,:-1],data_prepared.iloc[:,-1])"
 52 |    ]
 53 |   },
 54 |   {
 55 |    "cell_type": "markdown",
 56 |    "metadata": {},
 57 |    "source": [
 58 |     "### 随机森林选择特征"
 59 |    ]
 60 |   },
 61 |   {
 62 |    "cell_type": "code",
 63 |    "execution_count": null,
 64 |    "metadata": {
 65 |     "collapsed": true
 66 |    },
 67 |    "outputs": [],
 68 |    "source": [
 69 |     "from sklearn.ensemble import RandomForestClassifier\n",
 70 |     "x_train = data_prepared.iloc[:,:-1]\n",
 71 |     "y_train = data_prepared.iloc[:,-1]\n",
 72 |     "feat_lables = x_train.columns\n",
 73 |     "forest = RandomForestClassifier(n_estimators=10000, random_state=0,n_jobs=1)\n",
 74 |     "forest.fit(x_train, y_train)\n",
 75 |     "importance = forest.feature_importances_\n",
 76 |     "imp_result = np.argsort(importance)[::-1]\n",
 77 |     "\n",
 78 |     "for i in range(x_train.shape[1]):\n",
 79 |     "    print(\"%2d. %-*s %f\"%(i+1, 30, feat_lables[i], importance[imp_result[i]]))"
 80 |    ]
 81 |   }
 82 |  ],
 83 |  "metadata": {
 84 |   "anaconda-cloud": {},
 85 |   "kernelspec": {
 86 |    "display_name": "Python [conda env:tensorflow-gpu]",
 87 |    "language": "python",
 88 |    "name": "conda-env-tensorflow-gpu-py"
 89 |   },
 90 |   "language_info": {
 91 |    "codemirror_mode": {
 92 |     "name": "ipython",
 93 |     "version": 3
 94 |    },
 95 |    "file_extension": ".py",
 96 |    "mimetype": "text/x-python",
 97 |    "name": "python",
 98 |    "nbconvert_exporter": "python",
 99 |    "pygments_lexer": "ipython3",
100 |    "version": "3.5.4"
101 |   }
102 |  },
103 |  "nbformat": 4,
104 |  "nbformat_minor": 1
105 | }
106 | 


--------------------------------------------------------------------------------
/数据分析实践/task3_建模和评分.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "### 任务3 - 建模（2天）\n",
  8 |     "用逻辑回归、svm和决策树；随机森林和XGBoost进行模型构建，评分方式任意，如准确率等。（不需要考虑模型调参）  \n"
  9 |    ]
 10 |   },
 11 |   {
 12 |    "cell_type": "code",
 13 |    "execution_count": null,
 14 |    "metadata": {
 15 |     "collapsed": true
 16 |    },
 17 |    "outputs": [],
 18 |    "source": [
 19 |     "import pandas as pd\n",
 20 |     "import warnings\n",
 21 |     "from sklearn.preprocessing import scale\n",
 22 |     "from sklearn.model_selection import cross_val_score\n",
 23 |     "from sklearn.linear_model import LogisticRegression\n",
 24 |     "from sklearn.tree import DecisionTreeClassifier\n",
 25 |     "from sklearn.svm import SVC\n",
 26 |     "from sklearn.ensemble import RandomForestClassifier\n",
 27 |     "from sklearn.ensemble import GradientBoostingClassifier\n",
 28 |     "from xgboost.sklearn import XGBClassifier\n",
 29 |     "import lightgbm as lgb\n",
 30 |     "\n",
 31 |     "\n",
 32 |     "# 读取数据集\n",
 33 |     "data_all = pd.read_csv('/home/infisa/wjht/project/DataWhale/data_all.csv', encoding='gbk')\n",
 34 |     "\n",
 35 |     "# 划分为5折交叉验证数据集\n",
 36 |     "df_y=data_all['status']\n",
 37 |     "df_X=data_all.drop(columns=['status'])\n",
 38 |     "df_X=scale(df_X,axis=0)  #将数据转化为标准数据\n",
 39 |     "#构建模型\n",
 40 |     "\n",
 41 |     "lr = LogisticRegression(random_state=2018,tol=1e-6)  # 逻辑回归模型\n",
 42 |     "\n",
 43 |     "tree = DecisionTreeClassifier(random_state=2018) #决策树模型\n",
 44 |     "\n",
 45 |     "svm = SVC(probability=True,random_state=2018,tol=1e-6)  # SVM模型\n",
 46 |     "\n",
 47 |     "forest=RandomForestClassifier(n_estimators=100,random_state=2018) #　随机森林\n",
 48 |     "\n",
 49 |     "Gbdt=GradientBoostingClassifier(random_state=2018) #CBDT\n",
 50 |     "\n",
 51 |     "Xgbc=XGBClassifier(random_state=2018)  #Xgbc\n",
 52 |     "\n",
 53 |     "gbm=lgb.LGBMClassifier(random_state=2018)  #lgb\n",
 54 |     "\n",
 55 |     "\n",
 56 |     "\n",
 57 |     "def muti_score(model):\n",
 58 |     "    warnings.filterwarnings('ignore')\n",
 59 |     "    accuracy = cross_val_score(model, df_X, df_y, scoring='accuracy', cv=5)\n",
 60 |     "    precision = cross_val_score(model, df_X, df_y, scoring='precision', cv=5)\n",
 61 |     "    recall = cross_val_score(model, df_X, df_y, scoring='recall', cv=5)\n",
 62 |     "    f1_score = cross_val_score(model, df_X, df_y, scoring='f1', cv=5)\n",
 63 |     "    auc = cross_val_score(model, df_X, df_y, scoring='roc_auc', cv=5)\n",
 64 |     "    print(\"准确率:\",accuracy.mean())\n",
 65 |     "    print(\"精确率:\",precision.mean())\n",
 66 |     "    print(\"召回率:\",recall.mean())\n",
 67 |     "    print(\"F1_score:\",f1_score.mean())\n",
 68 |     "    print(\"AUC:\",auc.mean())\n",
 69 |     "\n",
 70 |     "\n",
 71 |     "\n",
 72 |     "model_name=[\"lr\",\"tree\",\"svm\",\"forest\",\"Gbdt\",\"Xgbc\",\"gbm\"]\n",
 73 |     "for name in model_name:\n",
 74 |     "    model=eval(name)\n",
 75 |     "    print(name)\n",
 76 |     "    muti_score(model)\n",
 77 |     "\n",
 78 |     "\n",
 79 |     "'''\n",
 80 |     "lr\n",
 81 |     "准确率: 0.7890191148682617\n",
 82 |     "精确率: 0.6542724662896913\n",
 83 |     "召回率: 0.3377975457965613\n",
 84 |     "F1_score: 0.44525012166067884\n",
 85 |     "AUC: 0.7840451024530857\n",
 86 |     "tree\n",
 87 |     "准确率: 0.6962524533638791\n",
 88 |     "精确率: 0.39920670173446693\n",
 89 |     "召回率: 0.4157413593052284\n",
 90 |     "F1_score: 0.40705496051057793\n",
 91 |     "AUC: 0.6029856787858856\n",
 92 |     "svm\n",
 93 |     "准确率: 0.787758390223099\n",
 94 |     "精确率: 0.7351623295760905\n",
 95 |     "召回率: 0.24060335431243626\n",
 96 |     "F1_score: 0.36179547264664874\n",
 97 |     "AUC: 0.7640376541388867\n",
 98 |     "forest\n",
 99 |     "准确率: 0.7921756804332226\n",
100 |     "精确率: 0.7135700690071172\n",
101 |     "召回率: 0.2867128441334693\n",
102 |     "F1_score: 0.40835414886475174\n",
103 |     "AUC: 0.7752164698827589\n",
104 |     "Gbdt\n",
105 |     "准确率: 0.7938590063951863\n",
106 |     "精确率: 0.6604108594441386\n",
107 |     "召回率: 0.36633732991104395\n",
108 |     "F1_score: 0.4708811551285791\n",
109 |     "AUC: 0.7888240065764295\n",
110 |     "Xgbc\n",
111 |     "准确率: 0.7982740847293591\n",
112 |     "精确率: 0.6829783239831001\n",
113 |     "召回率: 0.3663162336064133\n",
114 |     "F1_score: 0.47673826685376613\n",
115 |     "AUC: 0.7914190511145234\n",
116 |     "gbm\n",
117 |     "准确率: 0.79049080811139\n",
118 |     "精确率: 0.6421783397519263\n",
119 |     "召回率: 0.3730354066312717\n",
120 |     "F1_score: 0.47150438344663004\n",
121 |     "AUC: 0.7776116341798183\n",
122 |     "'''"
123 |    ]
124 |   }
125 |  ],
126 |  "metadata": {
127 |   "anaconda-cloud": {},
128 |   "kernelspec": {
129 |    "display_name": "Python [conda env:tensorflow-gpu]",
130 |    "language": "python",
131 |    "name": "conda-env-tensorflow-gpu-py"
132 |   },
133 |   "language_info": {
134 |    "codemirror_mode": {
135 |     "name": "ipython",
136 |     "version": 3
137 |    },
138 |    "file_extension": ".py",
139 |    "mimetype": "text/x-python",
140 |    "name": "python",
141 |    "nbconvert_exporter": "python",
142 |    "pygments_lexer": "ipython3",
143 |    "version": "3.5.4"
144 |   }
145 |  },
146 |  "nbformat": 4,
147 |  "nbformat_minor": 1
148 | }
149 | 


--------------------------------------------------------------------------------
/数据分析实践/task5_模型调优.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "【Task 5】 模型调优（2天）  \n",
  8 |     "任务5：使用网格搜索法对5个模型进行调优（调参时采用五折交叉验证的方式），并进行模型评估，记得展示代码的运行结果。   \n",
  9 |     "时间：2天  "
 10 |    ]
 11 |   },
 12 |   {
 13 |    "cell_type": "code",
 14 |    "execution_count": 39,
 15 |    "metadata": {
 16 |     "collapsed": true
 17 |    },
 18 |    "outputs": [],
 19 |    "source": [
 20 |     "import pandas as pd \n",
 21 |     "from sklearn.model_selection import train_test_split\n",
 22 |     "\n",
 23 |     "from sklearn.preprocessing import StandardScaler\n",
 24 |     "from sklearn.linear_model import LogisticRegression\n",
 25 |     "from sklearn.svm import LinearSVC\n",
 26 |     "from sklearn.tree import DecisionTreeClassifier\n",
 27 |     "from sklearn.ensemble import BaggingClassifier, RandomForestClassifier, AdaBoostClassifier, GradientBoostingClassifier\n",
 28 |     "from sklearn.metrics import classification_report\n",
 29 |     "import xgboost as xgb\n",
 30 |     "import lightgbm as lgb\n",
 31 |     "from pandas import DataFrame,Series\n",
 32 |     "\n",
 33 |     "df = pd.read_csv('data.csv',encoding = 'gbk')\n",
 34 |     "\n",
 35 |     "\"\"\"\n",
 36 |     "数据处理\n",
 37 |     "\"\"\"\n",
 38 |     "\n",
 39 |     "###删除无关特征###\n",
 40 |     "df.drop(['Unnamed: 0','custid','trade_no','bank_card_no' ,'source','id_name'],inplace =True,axis = 1)\n",
 41 |     "\n",
 42 |     "###数据类型转换###（主要针对 obeject（文字类） \n",
 43 |     "df['reg_preference_for_trad'].fillna('其他城市',inplace = True)\n",
 44 |     "df['reg_preference_for_trad'].replace({'一线城市':1,'二线城市':2,'三线城市':3,'境外':4,'其他城市':5},inplace = True)\n",
 45 |     "\n",
 46 |     "# 处理日期格式 'latest_query_time', 'loans_latest_time'(暂时去掉？？？)\n",
 47 |     "\n",
 48 |     "df.drop(['latest_query_time', 'loans_latest_time'],inplace =True,axis = 1)\n",
 49 |     "\n",
 50 |     "\n",
 51 |     "###缺失值处理###\n",
 52 |     "df['student_feature'].fillna(0,inplace=True) \n",
 53 |     "for i in df.columns:\n",
 54 |     "    df[i].fillna(df[i].mode()[0],inplace = True)  #加[0]是因为众数可能有多个，返回不是一个数字"
 55 |    ]
 56 |   },
 57 |   {
 58 |    "cell_type": "markdown",
 59 |    "metadata": {},
 60 |    "source": [
 61 |     "### 评估方法一：切分数据集"
 62 |    ]
 63 |   },
 64 |   {
 65 |    "cell_type": "code",
 66 |    "execution_count": 40,
 67 |    "metadata": {
 68 |     "collapsed": false
 69 |    },
 70 |    "outputs": [],
 71 |    "source": [
 72 |     "###切分数据集###\n",
 73 |     "\n",
 74 |     "y=df['status']\n",
 75 |     "x=df.drop('status',axis=1)\n",
 76 |     "x_train,x_test,y_train,y_test =train_test_split(x,y,test_size=0.3,random_state = 2018)   \n",
 77 |     "\n",
 78 |     "features = x_train.columns #目前是83个特征\n",
 79 |     "scaler = StandardScaler()\n",
 80 |     "x_train = scaler.fit_transform(x_train)\n",
 81 |     "x_test = scaler.fit_transform(x_test)"
 82 |    ]
 83 |   },
 84 |   {
 85 |    "cell_type": "code",
 86 |    "execution_count": 41,
 87 |    "metadata": {
 88 |     "collapsed": false
 89 |    },
 90 |    "outputs": [
 91 |     {
 92 |      "name": "stdout",
 93 |      "output_type": "stream",
 94 |      "text": [
 95 |       "             precision    recall  f1-score   support\n",
 96 |       "\n",
 97 |       "          0    0.78588   0.92790   0.85101      1068\n",
 98 |       "          1    0.53614   0.24791   0.33905       359\n",
 99 |       "\n",
100 |       "avg / total    0.72306   0.75683   0.72221      1427\n",
101 |       "\n"
102 |      ]
103 |     }
104 |    ],
105 |    "source": [
106 |     "#随机森林\n",
107 |     "clf_rf = RandomForestClassifier()\n",
108 |     "clf_rf.fit(x_train, y_train)\n",
109 |     "rf_y_pred = clf_rf.predict(x_test)\n",
110 |     "\n",
111 |     "#评估 \n",
112 |     "ans = classification_report(y_test,rf_y_pred,digits=5)\n",
113 |     "print(ans)"
114 |    ]
115 |   },
116 |   {
117 |    "cell_type": "markdown",
118 |    "metadata": {},
119 |    "source": [
120 |     "### 评估方法二：交叉验证"
121 |    ]
122 |   },
123 |   {
124 |    "cell_type": "code",
125 |    "execution_count": 42,
126 |    "metadata": {
127 |     "collapsed": false
128 |    },
129 |    "outputs": [
130 |     {
131 |      "name": "stdout",
132 |      "output_type": "stream",
133 |      "text": [
134 |       "[ 0.77521008  0.78338591  0.77707676  0.76526316  0.77473684] 0.775134550981\n",
135 |       "             precision    recall  f1-score   support\n",
136 |       "\n",
137 |       "          0    0.79206   0.94664   0.86248      3561\n",
138 |       "          1    0.61847   0.25817   0.36428      1193\n",
139 |       "\n",
140 |       "avg / total    0.74850   0.77387   0.73746      4754\n",
141 |       "\n"
142 |      ]
143 |     }
144 |    ],
145 |    "source": [
146 |     "#  使用交叉验证:直接就可以验证模型，这里用全部数据集而不是训练集\n",
147 |     "from sklearn.model_selection import cross_val_predict,cross_val_score\n",
148 |     "\n",
149 |     "# #先做标准化  这里做不做都一样\n",
150 |     "scaler = StandardScaler()\n",
151 |     "x = scaler.fit_transform(x)\n",
152 |     "x= scaler.fit_transform(x)\n",
153 |     "\n",
154 |     "model_rf = RandomForestClassifier()\n",
155 |     "score_rf = cross_val_score(model_rf, x,y,cv = 5)\n",
156 |     "pred_rf = cross_val_predict(model_rf, x,y,cv = 5)\n",
157 |     "print(score_rf,score_rf.mean())\n",
158 |     "ans1 = classification_report(y,pred_rf,digits=5)\n",
159 |     "print(ans1)"
160 |    ]
161 |   },
162 |   {
163 |    "cell_type": "markdown",
164 |    "metadata": {},
165 |    "source": [
166 |     "## 模型微调"
167 |    ]
168 |   },
169 |   {
170 |    "cell_type": "markdown",
171 |    "metadata": {},
172 |    "source": [
173 |     "### 方法一：网格搜索"
174 |    ]
175 |   },
176 |   {
177 |    "cell_type": "code",
178 |    "execution_count": 46,
179 |    "metadata": {
180 |     "collapsed": false
181 |    },
182 |    "outputs": [
183 |     {
184 |      "data": {
185 |       "text/plain": [
186 |        "GridSearchCV(cv=5, error_score='raise',\n",
187 |        "       estimator=RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',\n",
188 |        "            max_depth=None, max_features='auto', max_leaf_nodes=None,\n",
189 |        "            min_impurity_decrease=0.0, min_impurity_split=None,\n",
190 |        "            min_samples_leaf=1, min_samples_split=2,\n",
191 |        "            min_weight_fraction_leaf=0.0, n_estimators=10, n_jobs=1,\n",
192 |        "            oob_score=False, random_state=None, verbose=0,\n",
193 |        "            warm_start=False),\n",
194 |        "       fit_params=None, iid=True, n_jobs=1,\n",
195 |        "       param_grid=[{'max_features': [2, 4, 6, 8], 'n_estimators': [3, 10, 30]}, {'bootstrap': [False], 'max_features': [2, 3, 4], 'n_estimators': [3, 10]}],\n",
196 |        "       pre_dispatch='2*n_jobs', refit=True, return_train_score=True,\n",
197 |        "       scoring=None, verbose=0)"
198 |       ]
199 |      },
200 |      "execution_count": 46,
201 |      "metadata": {},
202 |      "output_type": "execute_result"
203 |     }
204 |    ],
205 |    "source": [
206 |     "from sklearn.model_selection import GridSearchCV\n",
207 |     "param_grid = [\n",
208 |     "{'n_estimators': [3, 10, 30], 'max_features': [2, 4, 6, 8]},\n",
209 |     "{'bootstrap': [False], 'n_estimators': [3, 10], 'max_features': [2, 3, 4]},\n",
210 |     "]\n",
211 |     "rf_model = RandomForestClassifier()\n",
212 |     "grid_search = GridSearchCV(rf_model, param_grid, cv=5)\n",
213 |     "grid_search.fit(x,y)"
214 |    ]
215 |   },
216 |   {
217 |    "cell_type": "code",
218 |    "execution_count": 52,
219 |    "metadata": {
220 |     "collapsed": false
221 |    },
222 |    "outputs": [
223 |     {
224 |      "name": "stdout",
225 |      "output_type": "stream",
226 |      "text": [
227 |       "最高得分：0.78776\n",
228 |       "最优参数：n_estimators:30 max_features:8 bootstrap:True\n"
229 |      ]
230 |     }
231 |    ],
232 |    "source": [
233 |     "# 搜索结果\n",
234 |     "print('最高得分：%.5f'% grid_search.best_score_)\n",
235 |     "print('最优参数：n_estimators:{} max_features:{} bootstrap:{}'.format(grid_search.best_estimator_.n_estimators,\\\n",
236 |     "                                                        grid_search.best_estimator_.max_features,grid_search.best_estimator_.bootstrap))"
237 |    ]
238 |   },
239 |   {
240 |    "cell_type": "code",
241 |    "execution_count": null,
242 |    "metadata": {
243 |     "collapsed": true
244 |    },
245 |    "outputs": [],
246 |    "source": []
247 |   }
248 |  ],
249 |  "metadata": {
250 |   "anaconda-cloud": {},
251 |   "kernelspec": {
252 |    "display_name": "Python [default]",
253 |    "language": "python",
254 |    "name": "python3"
255 |   },
256 |   "language_info": {
257 |    "codemirror_mode": {
258 |     "name": "ipython",
259 |     "version": 3
260 |    },
261 |    "file_extension": ".py",
262 |    "mimetype": "text/x-python",
263 |    "name": "python",
264 |    "nbconvert_exporter": "python",
265 |    "pygments_lexer": "ipython3",
266 |    "version": "3.5.2"
267 |   }
268 |  },
269 |  "nbformat": 4,
270 |  "nbformat_minor": 1
271 | }
272 | 


--------------------------------------------------------------------------------
/数据分析实践/task6_模型融合.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "### 【Task 6】 模型融合（2天）  \n",
  8 |     "模型融合方式任意，并结合Task5给出你的最优结果。  \n",
  9 |     "时间：2天  \n",
 10 |     "例如Stacking融合，用你目前评分最高的模型作为基准模型，和其他模型进行stacking融合，得到最终模型及评分结果。  "
 11 |    ]
 12 |   },
 13 |   {
 14 |    "cell_type": "code",
 15 |    "execution_count": 5,
 16 |    "metadata": {},
 17 |    "outputs": [],
 18 |    "source": [
 19 |     "import pandas as pd \n",
 20 |     "from sklearn.model_selection import train_test_split\n",
 21 |     "\n",
 22 |     "from sklearn.preprocessing import StandardScaler\n",
 23 |     "from sklearn.linear_model import LogisticRegression\n",
 24 |     "from sklearn.svm import SVC\n",
 25 |     "from sklearn.tree import DecisionTreeClassifier\n",
 26 |     "from sklearn.ensemble import BaggingClassifier, RandomForestClassifier, AdaBoostClassifier, GradientBoostingClassifier\n",
 27 |     "from sklearn.metrics import classification_report\n",
 28 |     "import xgboost as xgb\n",
 29 |     "import lightgbm as lgb\n",
 30 |     "from pandas import DataFrame,Series\n",
 31 |     "\n",
 32 |     "df = pd.read_csv('data.csv',encoding = 'gbk')\n",
 33 |     "\n",
 34 |     "\"\"\"\n",
 35 |     "数据处理\n",
 36 |     "\"\"\"\n",
 37 |     "\n",
 38 |     "###删除无关特征###\n",
 39 |     "df.drop(['Unnamed: 0','custid','trade_no','bank_card_no' ,'source','id_name'],inplace =True,axis = 1)\n",
 40 |     "\n",
 41 |     "###数据类型转换###（主要针对 obeject（文字类） \n",
 42 |     "df['reg_preference_for_trad'].fillna('其他城市',inplace = True)\n",
 43 |     "df['reg_preference_for_trad'].replace({'一线城市':1,'二线城市':2,'三线城市':3,'境外':4,'其他城市':5},inplace = True)\n",
 44 |     "\n",
 45 |     "# 处理日期格式 'latest_query_time', 'loans_latest_time'(暂时去掉？？？)\n",
 46 |     "\n",
 47 |     "df.drop(['latest_query_time', 'loans_latest_time'],inplace =True,axis = 1)\n",
 48 |     "\n",
 49 |     "\n",
 50 |     "###缺失值处理###\n",
 51 |     "df['student_feature'].fillna(0,inplace=True) \n",
 52 |     "for i in df.columns:\n",
 53 |     "    df[i].fillna(df[i].mode()[0],inplace = True)  #加[0]是因为众数可能有多个，返回不是一个数字\n",
 54 |     "    \n",
 55 |     "###切分数据集###\n",
 56 |     "\n",
 57 |     "y=df['status']\n",
 58 |     "x=df.drop('status',axis=1)\n",
 59 |     "x_train,x_test,y_train,y_test =train_test_split(x,y,test_size=0.3,random_state = 2018)   \n",
 60 |     "\n",
 61 |     "\n",
 62 |     "### 归一化 ###\n",
 63 |     "features = x_train.columns #目前是83个特征\n",
 64 |     "scaler = StandardScaler()\n",
 65 |     "x_train = scaler.fit_transform(x_train)\n",
 66 |     "x_test = scaler.fit_transform(x_test)\n"
 67 |    ]
 68 |   },
 69 |   {
 70 |    "cell_type": "markdown",
 71 |    "metadata": {},
 72 |    "source": [
 73 |     "### 选三个效果好的模型"
 74 |    ]
 75 |   },
 76 |   {
 77 |    "cell_type": "code",
 78 |    "execution_count": 6,
 79 |    "metadata": {},
 80 |    "outputs": [
 81 |     {
 82 |      "name": "stdout",
 83 |      "output_type": "stream",
 84 |      "text": [
 85 |       "              precision    recall  f1-score   support\n",
 86 |       "\n",
 87 |       "           0    0.81006   0.93446   0.86783      1068\n",
 88 |       "           1    0.64103   0.34819   0.45126       359\n",
 89 |       "\n",
 90 |       "    accuracy                        0.78697      1427\n",
 91 |       "   macro avg    0.72555   0.64132   0.65954      1427\n",
 92 |       "weighted avg    0.76754   0.78697   0.76303      1427\n",
 93 |       "\n"
 94 |      ]
 95 |     },
 96 |     {
 97 |      "name": "stderr",
 98 |      "output_type": "stream",
 99 |      "text": [
100 |       "D:\\MyCodeEnvironment\\Anaconda3\\envs\\p3\\lib\\site-packages\\sklearn\\linear_model\\logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.\n",
101 |       "  FutureWarning)\n"
102 |      ]
103 |     }
104 |    ],
105 |    "source": [
106 |     "# LR\n",
107 |     "LR = LogisticRegression()\n",
108 |     "LR.fit(x_train, y_train)\n",
109 |     "lr_y_pred = LR.predict(x_test)\n",
110 |     "ans = classification_report(y_test,lr_y_pred,digits=5)\n",
111 |     "print(ans)"
112 |    ]
113 |   },
114 |   {
115 |    "cell_type": "code",
116 |    "execution_count": 7,
117 |    "metadata": {
118 |     "scrolled": false
119 |    },
120 |    "outputs": [
121 |     {
122 |      "name": "stdout",
123 |      "output_type": "stream",
124 |      "text": [
125 |       "              precision    recall  f1-score   support\n",
126 |       "\n",
127 |       "           0    0.78895   0.96255   0.86714      1068\n",
128 |       "           1    0.67742   0.23398   0.34783       359\n",
129 |       "\n",
130 |       "    accuracy                        0.77926      1427\n",
131 |       "   macro avg    0.73318   0.59827   0.60749      1427\n",
132 |       "weighted avg    0.76089   0.77926   0.73650      1427\n",
133 |       "\n"
134 |      ]
135 |     }
136 |    ],
137 |    "source": [
138 |     "#svm\n",
139 |     "SVM = SVC(gamma='auto',probability=True)\n",
140 |     "SVM.fit(x_train, y_train)\n",
141 |     "svm_y_pred = SVM.predict(x_test)\n",
142 |     "svm_y_pro = SVM.predict_proba(x_test)\n",
143 |     "# print(svm_y_pro)\n",
144 |     "ans = classification_report(y_test,svm_y_pred,digits=5)\n",
145 |     "print(ans)"
146 |    ]
147 |   },
148 |   {
149 |    "cell_type": "code",
150 |    "execution_count": 8,
151 |    "metadata": {},
152 |    "outputs": [
153 |     {
154 |      "name": "stdout",
155 |      "output_type": "stream",
156 |      "text": [
157 |       "              precision    recall  f1-score   support\n",
158 |       "\n",
159 |       "           0    0.79694   0.92603   0.85665      1068\n",
160 |       "           1    0.57527   0.29805   0.39266       359\n",
161 |       "\n",
162 |       "    accuracy                        0.76804      1427\n",
163 |       "   macro avg    0.68610   0.61204   0.62465      1427\n",
164 |       "weighted avg    0.74117   0.76804   0.73992      1427\n",
165 |       "\n"
166 |      ]
167 |     }
168 |    ],
169 |    "source": [
170 |     "# lgbm\n",
171 |     "LGBM = lgb.LGBMClassifier(boosting_type='gbdt', num_leaves=31, max_depth=-1, learning_rate=0.1, n_estimators=250,\n",
172 |     "                              max_bin=255, subsample_for_bin=200000, objective=None, min_split_gain=0.0, min_child_weight=0.001,\n",
173 |     "                              min_child_samples=20, subsample=1.0, subsample_freq=1, colsample_bytree=1.0, reg_alpha=0.0,\n",
174 |     "                              reg_lambda=0.5, random_state=None, n_jobs=-1, silent=True)\n",
175 |     "LGBM.fit(x_train, y_train)\n",
176 |     "lgbm_y_pred = LGBM.predict(x_test)\n",
177 |     "\n",
178 |     "ans = classification_report(y_test, lgbm_y_pred,digits=5)\n",
179 |     "print(ans)"
180 |    ]
181 |   },
182 |   {
183 |    "cell_type": "code",
184 |    "execution_count": 9,
185 |    "metadata": {},
186 |    "outputs": [
187 |     {
188 |      "name": "stdout",
189 |      "output_type": "stream",
190 |      "text": [
191 |       "              precision    recall  f1-score   support\n",
192 |       "\n",
193 |       "           0    0.79433   0.94382   0.86264      1068\n",
194 |       "           1    0.62025   0.27298   0.37911       359\n",
195 |       "\n",
196 |       "    accuracy                        0.77505      1427\n",
197 |       "   macro avg    0.70729   0.60840   0.62088      1427\n",
198 |       "weighted avg    0.75053   0.77505   0.74100      1427\n",
199 |       "\n"
200 |      ]
201 |     }
202 |    ],
203 |    "source": [
204 |     "# XGB\n",
205 |     "XGB = xgb.XGBClassifier(max_depth=6, num_class =2,learning_rate=0.1, n_estimators=100, silent=True, objective='multi:softmax',\n",
206 |     "                        nthread=32, gamma=0.1, min_child_weight=3, max_delta_step=0, subsample=1, colsample_bytree=1,\n",
207 |     "                        colsample_bylevel=1, reg_alpha=0, reg_lambda=1, scale_pos_weight=1, base_score=0.5, seed=2018,\n",
208 |     "                        missing=None)\n",
209 |     "XGB.fit(x_train, y_train)\n",
210 |     "xgb2_y_pred = XGB.predict(x_test)\n",
211 |     "\n",
212 |     "ans = classification_report(y_test, xgb2_y_pred,digits=5)\n",
213 |     "print(ans)"
214 |    ]
215 |   },
216 |   {
217 |    "cell_type": "code",
218 |    "execution_count": 10,
219 |    "metadata": {},
220 |    "outputs": [
221 |     {
222 |      "name": "stdout",
223 |      "output_type": "stream",
224 |      "text": [
225 |       "              precision    recall  f1-score   support\n",
226 |       "\n",
227 |       "           0    0.79709   0.76873   0.78265      1068\n",
228 |       "           1    0.37783   0.41783   0.39683       359\n",
229 |       "\n",
230 |       "    accuracy                        0.68045      1427\n",
231 |       "   macro avg    0.58746   0.59328   0.58974      1427\n",
232 |       "weighted avg    0.69161   0.68045   0.68559      1427\n",
233 |       "\n"
234 |      ]
235 |     }
236 |    ],
237 |    "source": [
238 |     "DT = DecisionTreeClassifier()\n",
239 |     "DT.fit(x_train, y_train)\n",
240 |     "dt_y_pred = DT.predict(x_test)\n",
241 |     "dt_y_pro = DT.predict_proba(x_test)\n",
242 |     "# print(svm_y_pro)\n",
243 |     "ans = classification_report(y_test,dt_y_pred,digits=5)\n",
244 |     "print(ans)"
245 |    ]
246 |   },
247 |   {
248 |    "cell_type": "markdown",
249 |    "metadata": {},
250 |    "source": [
251 |     "### 模型融合"
252 |    ]
253 |   },
254 |   {
255 |    "cell_type": "code",
256 |    "execution_count": 12,
257 |    "metadata": {},
258 |    "outputs": [
259 |     {
260 |      "name": "stderr",
261 |      "output_type": "stream",
262 |      "text": [
263 |       "D:\\MyCodeEnvironment\\Anaconda3\\envs\\p3\\lib\\site-packages\\sklearn\\linear_model\\logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.\n",
264 |       "  FutureWarning)\n"
265 |      ]
266 |     },
267 |     {
268 |      "name": "stdout",
269 |      "output_type": "stream",
270 |      "text": [
271 |       "              precision    recall  f1-score   support\n",
272 |       "\n",
273 |       "           0    0.79652   0.94195   0.86315      1068\n",
274 |       "           1    0.62195   0.28412   0.39006       359\n",
275 |       "\n",
276 |       "    accuracy                        0.77645      1427\n",
277 |       "   macro avg    0.70923   0.61304   0.62660      1427\n",
278 |       "weighted avg    0.75260   0.77645   0.74413      1427\n",
279 |       "\n"
280 |      ]
281 |     }
282 |    ],
283 |    "source": [
284 |     "from mlxtend.classifier import StackingCVClassifier\n",
285 |     "StackingModel = StackingCVClassifier(classifiers=[XGB,LGBM, SVM],\n",
286 |     "                                   use_probas=True, \n",
287 |     "                                   meta_classifier=LR,\n",
288 |     "                                   cv=5,\n",
289 |     "                                   )\n",
290 |     "StackingModel.fit(x_train, y_train)\n",
291 |     "sm_y_pred = StackingModel.predict(x_test)\n",
292 |     "ans = classification_report(y_test,sm_y_pred,digits=5)\n",
293 |     "print(ans)"
294 |    ]
295 |   },
296 |   {
297 |    "cell_type": "code",
298 |    "execution_count": null,
299 |    "metadata": {
300 |     "collapsed": true
301 |    },
302 |    "outputs": [],
303 |    "source": []
304 |   }
305 |  ],
306 |  "metadata": {
307 |   "anaconda-cloud": {},
308 |   "kernelspec": {
309 |    "display_name": "Python 3",
310 |    "language": "python",
311 |    "name": "python3"
312 |   },
313 |   "language_info": {
314 |    "codemirror_mode": {
315 |     "name": "ipython",
316 |     "version": 3
317 |    },
318 |    "file_extension": ".py",
319 |    "mimetype": "text/x-python",
320 |    "name": "python",
321 |    "nbconvert_exporter": "python",
322 |    "pygments_lexer": "ipython3",
323 |    "version": "3.6.9"
324 |   }
325 |  },
326 |  "nbformat": 4,
327 |  "nbformat_minor": 1
328 | }
329 | 


--------------------------------------------------------------------------------
/数据分析实践/多分类的评估问题.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 52,
  6 |    "metadata": {
  7 |     "collapsed": false
  8 |    },
  9 |    "outputs": [
 10 |     {
 11 |      "name": "stdout",
 12 |      "output_type": "stream",
 13 |      "text": [
 14 |       "[[  8.79681649e-01   1.20307538e-01   1.08131372e-05]\n",
 15 |       " [  7.99706325e-01   2.00263292e-01   3.03825365e-05]\n",
 16 |       " [  8.53796795e-01   1.46177302e-01   2.59031285e-05]\n",
 17 |       " [  8.25383127e-01   1.74558937e-01   5.79356669e-05]\n",
 18 |       " [  8.97323628e-01   1.02665167e-01   1.12050036e-05]\n",
 19 |       " [  9.26986574e-01   7.30004562e-02   1.29693872e-05]\n",
 20 |       " [  8.95064974e-01   1.04895775e-01   3.92506205e-05]\n",
 21 |       " [  8.61839691e-01   1.38141399e-01   1.89095833e-05]\n",
 22 |       " [  8.03156719e-01   1.96758495e-01   8.47861140e-05]\n",
 23 |       " [  7.95421554e-01   2.04552763e-01   2.56832240e-05]\n",
 24 |       " [  8.92083069e-01   1.07910759e-01   6.17176870e-06]\n",
 25 |       " [  8.63364991e-01   1.36600589e-01   3.44201798e-05]\n",
 26 |       " [  7.88177618e-01   2.11794929e-01   2.74526810e-05]\n",
 27 |       " [  8.35079702e-01   1.64888155e-01   3.21426418e-05]\n",
 28 |       " [  9.28349898e-01   7.16491356e-02   9.66254924e-07]\n",
 29 |       " [  9.64535656e-01   3.54620850e-02   2.25877936e-06]\n",
 30 |       " [  9.40906153e-01   5.90890027e-02   4.84421830e-06]\n",
 31 |       " [  8.91740161e-01   1.08245661e-01   1.41772124e-05]\n",
 32 |       " [  8.96525617e-01   1.03467608e-01   6.77567332e-06]\n",
 33 |       " [  9.23615524e-01   7.63726510e-02   1.18248373e-05]\n",
 34 |       " [  8.30668332e-01   1.69316458e-01   1.52093733e-05]\n",
 35 |       " [  9.21914602e-01   7.80675598e-02   1.78384021e-05]\n",
 36 |       " [  9.26584671e-01   7.34068679e-02   8.46162713e-06]\n",
 37 |       " [  8.67785629e-01   1.32146178e-01   6.81931916e-05]\n",
 38 |       " [  8.41271506e-01   1.58655904e-01   7.25903122e-05]\n",
 39 |       " [  7.77263282e-01   2.22695181e-01   4.15365716e-05]\n",
 40 |       " [  8.81389224e-01   1.18568969e-01   4.18075826e-05]\n",
 41 |       " [  8.69974782e-01   1.30013638e-01   1.15794893e-05]\n",
 42 |       " [  8.60034106e-01   1.39955486e-01   1.04082979e-05]\n",
 43 |       " [  8.32052869e-01   1.67892968e-01   5.41625519e-05]\n",
 44 |       " [  8.07811588e-01   1.92136477e-01   5.19350231e-05]\n",
 45 |       " [  8.72544939e-01   1.27438925e-01   1.61360155e-05]\n",
 46 |       " [  9.33948477e-01   6.60477336e-02   3.78900866e-06]\n",
 47 |       " [  9.46250501e-01   5.37475145e-02   1.98493064e-06]\n",
 48 |       " [  7.95421554e-01   2.04552763e-01   2.56832240e-05]\n",
 49 |       " [  8.47610513e-01   1.52377535e-01   1.19520539e-05]\n",
 50 |       " [  8.70019435e-01   1.29976367e-01   4.19728170e-06]\n",
 51 |       " [  7.95421554e-01   2.04552763e-01   2.56832240e-05]\n",
 52 |       " [  8.31024910e-01   1.68917216e-01   5.78737851e-05]\n",
 53 |       " [  8.57737250e-01   1.42246900e-01   1.58501104e-05]\n",
 54 |       " [  9.00222082e-01   9.97646975e-02   1.32206853e-05]\n",
 55 |       " [  6.90741687e-01   3.09094698e-01   1.63615590e-04]\n",
 56 |       " [  8.66068303e-01   1.33887708e-01   4.39884356e-05]\n",
 57 |       " [  9.16308833e-01   8.36288777e-02   6.22895883e-05]\n",
 58 |       " [  9.15519114e-01   8.44392129e-02   4.16734713e-05]\n",
 59 |       " [  8.20309627e-01   1.79642381e-01   4.79919885e-05]\n",
 60 |       " [  9.09855663e-01   9.01327650e-02   1.15724381e-05]\n",
 61 |       " [  8.51214451e-01   1.48746052e-01   3.94971199e-05]\n",
 62 |       " [  8.95519736e-01   1.04472911e-01   7.35323849e-06]\n",
 63 |       " [  8.51563342e-01   1.48419676e-01   1.69821772e-05]\n",
 64 |       " [  2.98900777e-02   8.60393138e-01   1.09716785e-01]\n",
 65 |       " [  3.74487166e-02   7.05572459e-01   2.56978825e-01]\n",
 66 |       " [  1.17957675e-02   7.48252356e-01   2.39951876e-01]\n",
 67 |       " [  1.32920493e-02   6.51770445e-01   3.34937506e-01]\n",
 68 |       " [  1.09868088e-02   6.98832091e-01   2.90181101e-01]\n",
 69 |       " [  1.07669519e-02   5.83013186e-01   4.06219862e-01]\n",
 70 |       " [  2.15200540e-02   5.37732882e-01   4.40747064e-01]\n",
 71 |       " [  1.08418544e-01   7.68766189e-01   1.22815267e-01]\n",
 72 |       " [  1.77270021e-02   8.27562690e-01   1.54710308e-01]\n",
 73 |       " [  3.30493839e-02   5.28708770e-01   4.38241846e-01]\n",
 74 |       " [  2.93117962e-02   7.72717609e-01   1.97970595e-01]\n",
 75 |       " [  4.09569813e-02   6.19765980e-01   3.39277039e-01]\n",
 76 |       " [  1.95378252e-02   8.79697992e-01   1.00764183e-01]\n",
 77 |       " [  8.73285529e-03   5.96503817e-01   3.94763328e-01]\n",
 78 |       " [  1.67434866e-01   7.12756209e-01   1.19808925e-01]\n",
 79 |       " [  4.75535678e-02   8.43626581e-01   1.08819852e-01]\n",
 80 |       " [  1.22530319e-02   4.23869480e-01   5.63877488e-01]\n",
 81 |       " [  3.84753639e-02   8.50175432e-01   1.11349204e-01]\n",
 82 |       " [  3.09968794e-03   5.96264678e-01   4.00635634e-01]\n",
 83 |       " [  3.59781700e-02   8.08752206e-01   1.55269624e-01]\n",
 84 |       " [  6.20745751e-03   2.73106189e-01   7.20686354e-01]\n",
 85 |       " [  5.81151228e-02   8.19701311e-01   1.22183566e-01]\n",
 86 |       " [  1.95840574e-03   5.33800891e-01   4.64240703e-01]\n",
 87 |       " [  8.77628703e-03   7.04654010e-01   2.86569703e-01]\n",
 88 |       " [  3.69274341e-02   8.38990091e-01   1.24082475e-01]\n",
 89 |       " [  3.61807169e-02   8.28744840e-01   1.35074443e-01]\n",
 90 |       " [  8.14489700e-03   7.77156946e-01   2.14698157e-01]\n",
 91 |       " [  4.64006697e-03   5.23164549e-01   4.72195384e-01]\n",
 92 |       " [  1.33500103e-02   5.63205976e-01   4.23444014e-01]\n",
 93 |       " [  1.28473017e-01   8.31361691e-01   4.01652917e-02]\n",
 94 |       " [  3.60902230e-02   8.03217466e-01   1.60692311e-01]\n",
 95 |       " [  5.05096042e-02   8.46149445e-01   1.03340951e-01]\n",
 96 |       " [  5.69724571e-02   8.11250984e-01   1.31776559e-01]\n",
 97 |       " [  1.22453086e-03   3.99201919e-01   5.99573550e-01]\n",
 98 |       " [  1.03123407e-02   3.65034695e-01   6.24652965e-01]\n",
 99 |       " [  4.17476538e-02   4.77844283e-01   4.80408063e-01]\n",
100 |       " [  1.90525287e-02   7.45629538e-01   2.35317933e-01]\n",
101 |       " [  7.05352060e-03   7.56932682e-01   2.36013798e-01]\n",
102 |       " [  5.57541864e-02   6.67410837e-01   2.76834977e-01]\n",
103 |       " [  2.10790319e-02   6.62362244e-01   3.16558724e-01]\n",
104 |       " [  8.98003281e-03   5.99716389e-01   3.91303578e-01]\n",
105 |       " [  1.52196906e-02   6.32329159e-01   3.52451150e-01]\n",
106 |       " [  3.47695685e-02   7.98625645e-01   1.66604786e-01]\n",
107 |       " [  9.15416570e-02   7.95877151e-01   1.12581192e-01]\n",
108 |       " [  1.98418694e-02   6.40871800e-01   3.39286330e-01]\n",
109 |       " [  4.81040905e-02   7.31039981e-01   2.20855929e-01]\n",
110 |       " [  3.44565240e-02   6.77463657e-01   2.88079819e-01]\n",
111 |       " [  3.38822929e-02   7.96899915e-01   1.69217792e-01]\n",
112 |       " [  2.54574647e-01   6.90791330e-01   5.46340233e-02]\n",
113 |       " [  3.63488963e-02   7.04234211e-01   2.59416893e-01]\n",
114 |       " [  1.86036022e-04   1.48760823e-01   8.51053141e-01]\n",
115 |       " [  8.09069371e-04   2.94422745e-01   7.04768186e-01]\n",
116 |       " [  2.78126551e-04   3.30535386e-01   6.69186488e-01]\n",
117 |       " [  4.56288643e-04   3.38732197e-01   6.60811514e-01]\n",
118 |       " [  2.51393977e-04   2.57092194e-01   7.42656412e-01]\n",
119 |       " [  6.03186905e-05   3.82744333e-01   6.17195349e-01]\n",
120 |       " [  2.04838186e-03   2.81103453e-01   7.16848165e-01]\n",
121 |       " [  1.23247784e-04   4.24393655e-01   5.75483097e-01]\n",
122 |       " [  1.59929758e-04   4.23195996e-01   5.76644074e-01]\n",
123 |       " [  3.56390886e-04   1.52542892e-01   8.47100717e-01]\n",
124 |       " [  2.99635433e-03   2.78024684e-01   7.18978962e-01]\n",
125 |       " [  6.45242833e-04   3.55681241e-01   6.43673516e-01]\n",
126 |       " [  6.81029987e-04   2.98859721e-01   7.00459249e-01]\n",
127 |       " [  6.28418142e-04   2.96807692e-01   7.02563890e-01]\n",
128 |       " [  6.10997845e-04   1.74593604e-01   8.24795398e-01]\n",
129 |       " [  1.09757190e-03   1.73257823e-01   8.25644605e-01]\n",
130 |       " [  7.99254871e-04   3.48929847e-01   6.50270898e-01]\n",
131 |       " [  1.93443479e-04   2.38473708e-01   7.61332849e-01]\n",
132 |       " [  1.30064976e-05   4.20137191e-01   5.79849802e-01]\n",
133 |       " [  6.81548718e-04   4.69975854e-01   5.29342597e-01]\n",
134 |       " [  5.04477452e-04   2.25292722e-01   7.74202801e-01]\n",
135 |       " [  1.33913767e-03   2.30143290e-01   7.68517573e-01]\n",
136 |       " [  3.82097113e-05   4.28006955e-01   5.71954836e-01]\n",
137 |       " [  2.05299242e-03   4.00421888e-01   5.97525119e-01]\n",
138 |       " [  6.77847072e-04   2.37204010e-01   7.62118143e-01]\n",
139 |       " [  4.56383243e-04   3.97527741e-01   6.02015876e-01]\n",
140 |       " [  3.19858866e-03   3.83866887e-01   6.12934525e-01]\n",
141 |       " [  3.42364119e-03   3.27541103e-01   6.69035256e-01]\n",
142 |       " [  3.00544917e-04   2.98288662e-01   7.01410793e-01]\n",
143 |       " [  6.78376797e-04   5.10705151e-01   4.88616472e-01]\n",
144 |       " [  1.61719140e-04   4.27941843e-01   5.71896438e-01]\n",
145 |       " [  6.44775841e-04   3.44845359e-01   6.54509865e-01]\n",
146 |       " [  2.75279882e-04   2.78027400e-01   7.21697320e-01]\n",
147 |       " [  2.07731418e-03   4.90652652e-01   5.07270034e-01]\n",
148 |       " [  3.54683506e-04   4.42580814e-01   5.57064503e-01]\n",
149 |       " [  1.82017584e-04   3.42008155e-01   6.57809828e-01]\n",
150 |       " [  6.30908753e-04   1.28602511e-01   8.70766580e-01]\n",
151 |       " [  9.21940559e-04   3.20888055e-01   6.78190005e-01]\n",
152 |       " [  4.29311663e-03   3.18426266e-01   6.77280618e-01]\n",
153 |       " [  1.16680587e-03   3.00989509e-01   6.97843685e-01]\n",
154 |       " [  4.46290865e-04   2.02461924e-01   7.97091785e-01]\n",
155 |       " [  2.15227432e-03   2.48822456e-01   7.49025270e-01]\n",
156 |       " [  8.09069371e-04   2.94422745e-01   7.04768186e-01]\n",
157 |       " [  2.91162367e-04   2.24919706e-01   7.74789132e-01]\n",
158 |       " [  4.50477099e-04   1.53984748e-01   8.45564775e-01]\n",
159 |       " [  1.15724730e-03   2.33616548e-01   7.65226205e-01]\n",
160 |       " [  9.19025197e-04   3.79220387e-01   6.19860588e-01]\n",
161 |       " [  1.45811816e-03   2.98379693e-01   7.00162189e-01]\n",
162 |       " [  1.09779827e-03   1.31785617e-01   8.67116585e-01]\n",
163 |       " [  1.68397530e-03   2.81057800e-01   7.17258224e-01]] [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n",
164 |       " 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1\n",
165 |       " 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2\n",
166 |       " 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2\n",
167 |       " 2 2]\n"
168 |      ]
169 |     }
170 |    ],
171 |    "source": [
172 |     "from sklearn import datasets\n",
173 |     "import numpy as np\n",
174 |     "from sklearn.preprocessing import label_binarize\n",
175 |     "from sklearn.linear_model import LogisticRegression\n",
176 |     "from sklearn.metrics import confusion_matrix, precision_score, accuracy_score,recall_score, f1_score,roc_auc_score\n",
177 |     "\n",
178 |     "\n",
179 |     "iris = datasets.load_iris()\n",
180 |     "x, y = iris.data, iris.target\n",
181 |     "n_class = len(set(iris.target))\n",
182 |     "y_one_hot = label_binarize(y, np.arange(n_class))\n",
183 |     "\n",
184 |     "# alpha = np.logspace(-2, 2, 20)  #设置超参数范围\n",
185 |     "# model = LogisticRegressionCV(Cs = alpha, cv = 3, penalty = 'l2')  #使用L2正则化\n",
186 |     "model = LogisticRegression()\n",
187 |     "model.fit(x, y)\n",
188 |     "y_score = model.predict(x)\n",
189 |     "y_score_pro = model.predict_proba(x) #返回的是\n",
190 |     "y_score_one_hot = label_binarize(y_score, np.arange(n_class))\n",
191 |     "print(y_score_pro,y)"
192 |    ]
193 |   },
194 |   {
195 |    "cell_type": "code",
196 |    "execution_count": 53,
197 |    "metadata": {
198 |     "collapsed": false
199 |    },
200 |    "outputs": [
201 |     {
202 |      "name": "stdout",
203 |      "output_type": "stream",
204 |      "text": [
205 |       "confusion_matrix\n",
206 |       " [[50  0  0]\n",
207 |       " [ 0 45  5]\n",
208 |       " [ 0  1 49]]\n"
209 |      ]
210 |     }
211 |    ],
212 |    "source": [
213 |     "from sklearn.metrics import confusion_matrix, precision_score, accuracy_score,recall_score, f1_score,roc_auc_score\n",
214 |     "\n",
215 |     "obj1 = confusion_matrix(y, y_score)\n",
216 |     "print('confusion_matrix\\n', obj1)"
217 |    ]
218 |   },
219 |   {
220 |    "cell_type": "code",
221 |    "execution_count": 54,
222 |    "metadata": {
223 |     "collapsed": false,
224 |     "scrolled": true
225 |    },
226 |    "outputs": [
227 |     {
228 |      "name": "stdout",
229 |      "output_type": "stream",
230 |      "text": [
231 |       "[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n",
232 |       " 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1\n",
233 |       " 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2\n",
234 |       " 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2\n",
235 |       " 2 2]\n",
236 |       "accuracy:0.96\n",
237 |       "precision:0.96\n",
238 |       "recall:0.96\n",
239 |       "f1-score:0.96\n",
240 |       "AUC:0.97\n",
241 |       "\n"
242 |      ]
243 |     }
244 |    ],
245 |    "source": [
246 |     "print(y)\n",
247 |     "print('accuracy:{}'.format(accuracy_score(y, y_score)))\n",
248 |     "print('precision:{}'.format(precision_score(y, y_score,average='micro')))\n",
249 |     "print('recall:{}'.format(recall_score(y, y_score,average='micro')))\n",
250 |     "print('f1-score:{}'.format(f1_score(y, y_score,average='micro')))\n",
251 |     "print('AUC:{}\\n'.format(roc_auc_score(y_one_hot, y_score_one_hot,average='micro')))\n",
252 |     " #形式一：原始值（0或1或2）\n",
253 |     "     #形式二：各类概率值\n",
254 |     "         # 形式三：one-hot值"
255 |    ]
256 |   },
257 |   {
258 |    "cell_type": "code",
259 |    "execution_count": 55,
260 |    "metadata": {
261 |     "collapsed": false
262 |    },
263 |    "outputs": [
264 |     {
265 |      "name": "stdout",
266 |      "output_type": "stream",
267 |      "text": [
268 |       "accuracy:0.96\n",
269 |       "\n",
270 |       "precision:0.96\n",
271 |       "\n",
272 |       "recall:0.96\n",
273 |       "\n",
274 |       "f1-score:0.96\n",
275 |       "\n",
276 |       "AUC:0.97\n",
277 |       "\n"
278 |      ]
279 |     }
280 |    ],
281 |    "source": [
282 |     "print('accuracy:{}\\n'.format(accuracy_score(y_one_hot, y_score_one_hot)))\n",
283 |     "print('precision:{}\\n'.format(precision_score(y_one_hot, y_score_one_hot,average='micro')))\n",
284 |     "print('recall:{}\\n'.format(recall_score(y_one_hot, y_score_one_hot,average='micro')))\n",
285 |     "print('f1-score:{}\\n'.format(f1_score(y_one_hot, y_score_one_hot,average='micro')))\n",
286 |     "print('AUC:{}\\n'.format(roc_auc_score(y_one_hot, y_score_one_hot,average='micro')))"
287 |    ]
288 |   },
289 |   {
290 |    "cell_type": "code",
291 |    "execution_count": 56,
292 |    "metadata": {
293 |     "collapsed": false
294 |    },
295 |    "outputs": [
296 |     {
297 |      "data": {
298 |       "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYUAAAEKCAYAAAD9xUlFAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAIABJREFUeJzt3XucTfX+x/HXJ0KoFLpRhyLHMDPuJDmkRBe6KIp0c0tK\n/Ajdy6mjc3ROSop0v6Akow6lFCndlOuM1JDQjejizpjP74+97TNpzGzMnrX3zPv5eOzHmb322mu9\n1zitz3zX5bPM3REREQE4JOgAIiISP1QUREQkQkVBREQiVBRERCRCRUFERCJUFEREJEJFQUREIlQU\nREQkQkVBREQiSgYdYH9VqlTJq1WrFnQMEZGE8vnnn//s7pXzmy/hikK1atWYP39+0DFERBKKmX0b\nzXw6fCQiIhEqCiIiEqGiICIiESoKIiISoaIgIiIRKgoiIhKhoiAiIhEqCiIiEqGiICIiESoKIiIS\noaIgIiIRKgoiIhKhoiAiIhEqCiIiEhGzomBmT5nZOjNbuo/PzcweNrNMM1tsZg1ilUVERKITy5HC\nM0C7PD5vD9QMv3oBj8Uwi4iIRCFmRcHd3wc25jFLR+A5D/kYqGBmx8cqj4iI5C/IJ69VAdbkeL82\nPO2HYOLEl2ue/pT3lq8POoaIFJCs337Cs3ZxaMWqrBpxXtBx9ikhHsdpZr0IHWLipJNOKtR1a+cs\nIgfDs3ez6Yv/8uv7z1Hq+FM57vL7g46UpyCLwnfAiTneVw1P+xN3HweMA2jUqJHHPtr/BFkQWteq\nzNPXNAls/SJycJYtW0aPHj2YN28e7dq1Y+zYsYX+h+3+CrIoTAP6mdlEoCnwm7sHdugovxFBPA/3\nRCT+bN26lZYtW5Kdnc1zzz1Ht27dMLOgY+UrZkXBzCYArYBKZrYWuAs4FMDdHwemA+cCmcBW4JpY\nZdnjQA8Fta5VOQZpRKQo+vLLL6lVqxZly5blxRdfJDU1lWOPPTboWFGLWVFw98vz+dyBGwpqfQVx\n7F+Ha0TkQG3bto27776bkSNH8uyzz9KtWzfatm0bdKz9lhAnmqMRbUHQjl9ECtr7779Pjx49+Prr\nr+nRowfnn39+0JEOWJEoCtc8/WnkZx37F5HCdM8993D33XdTvXp13nnnHdq0aRN0pIOS8L2Pch42\n0rF/ESksoSPg0KhRIwYMGMCSJUsSviAA2J4NSxSNGjXy+fPnA38uCDosJCKx9vPPPzNgwABq1qzJ\nnXfeGXScqJnZ5+7eKL/5EnqkoIIgIoXF3Xn55ZdJSkpi4sSJHHJIQu8+96lInFNQQRCRWPr+++/p\n27cvaWlpNGrUiHfeeYeUlJSgY8VE0Sx1IiIF6Mcff+Tdd9/lX//6Fx999FGRLQhQREYKIiIFbeXK\nlUybNo2bb76ZBg0asHr1aipUqBB0rJjTSEFEJIfdu3fzn//8h7p163LXXXfx448/AhSLggAqCiIi\nEenp6Zx++ukMHDiQM888k/T0dI477rigYxUqHT4SESHUwO5vf/sbZsZLL71Ely5dEqKBXUFTURCR\nYi0jI4PatWtTtmxZJk6cSGpqKpUrF98bYXX4SESKpa1btzJ48GCSk5N54YUXADjrrLOKdUGABB4p\n5Ox3JCKyP2bPnk3Pnj3JzMykd+/edOjQIehIcSNhRwrqdyQiB+Kuu+6idevWuDvvvvsujz/+OEce\neWTQseJGwhaFPXQ3s4hEY0+ftyZNmvB///d/LF68mNatWwecKv4kfFEQEcnL+vXrueKKK7j33nsB\nOO+88xg5ciRly5YNOFl8UlEQkSLJ3XnppZeoXbs2kydPplSpUkFHSggqCiJS5Kxdu5YOHTrQtWtX\natSowYIFCxg2bFjQsRKCioKIFDnr16/n/fff59///jcffvghderUCTpSwkjYS1JFRHLKzMzk9ddf\nZ8CAAdSvX581a9ZwxBFHBB0r4WikICIJLSsri5EjR5KcnMw999zDTz/9BKCCcIBUFEQkYS1ZsoTm\nzZszePBg2rZtS3p6Oscee2zQsRKaDh+JSELaunUrrVu35pBDDmHixIlcdtllxbKBXUFLyKKgFhci\nxdfSpUupU6cOZcuWZdKkSaSmplKpUqWgYxUZCXn4SC0uRIqfLVu2MHDgQFJSUiIN7Nq0aaOCUMAS\ncqSwh1pciBQPs2bNomfPnnzzzTf07duXjh07Bh2pyErIkYKIFB933HEHZ511FiVLlmTOnDk8+uij\nurIohlQURCQuZWdnA9C8eXNuueUWFi1aRMuWLQNOVfSpKIhIXFm3bh1dunThnnvuAaB9+/Y88MAD\nHHbYYQEnKx5UFEQkLrg7L7zwArVr1+a1115TF9OAxLQomFk7M1tuZplmNjSXz480s9fNbJGZpZvZ\nNbHMIyLxac2aNZx//vlceeWV1KpViwULFjBkyJCgYxVLMSsKZlYCeBRoDyQBl5tZ0l6z3QBkuHsq\n0Ap40Mzy7G+76uctMUgrIkHasGEDH374IaNGjWLu3LkkJe29q5DCEstLUpsAme6+EsDMJgIdgYwc\n8zhwuIVuQywPbASy8lroph1ZlEf3KIgkuq+++opp06YxaNAg6tWrx5o1azj88MODjlXsxfLwURVg\nTY73a8PTchoN1Aa+B5YA/d09O5qF6x4FkcSUlZXFAw88QEpKCvfdd1+kgZ0KQnwI+kTzOcBC4ASg\nHjDazP50AbKZ9TKz+WY2v7ADikjBWbRoEU2bNmXo0KGce+65ZGRkqIFdnIllUfgOODHH+6rhaTld\nA0zxkEzgG+Cvey/I3ce5eyN3bxSztCISU1u3bqVNmzZ89913TJ48mSlTpnD88ccHHUv2Esui8BlQ\n08yqh08edwGm7TXPaqANgJkdC9QCVsYwk4gUssWLF+PulC1blldeeYWMjAwuueSSoGPJPsSsKLh7\nFtAPeAtYBrzs7ulm1sfM+oRnGw40N7MlwCxgiLv/HKtMIlJ4Nm/eTP/+/alXrx7PP/88AK1bt+bo\no48OOJnkJaYN8dx9OjB9r2mP5/j5e6BtLDOISOF7++236dWrF6tWraJfv35cdNFFQUeSKEU1UjCz\nUmZWI9ZhRCTx3XbbbbRt25bSpUszd+5cHnnkEV1ZlEDyLQpmdh6hy0XfDr+vZ2avxTqYiCSWPQ3s\nWrRowbBhw1i4cCEtWrQIOJXsr2hGCvcCTYFfAdx9IaBRg4gA8OOPP9KpUyfuvvtuINTA7v7776dM\nmTLBBpMDEk1R2OXuv+41zWMRRkQSh7vzzDPPkJSUxBtvvKFnHBQR0ZxoXmZmlwGHmFl14Cbg49jG\nEpF49u2339KrVy9mzpxJixYtGD9+PLVq1Qo6lhSAaEYK/YCGQDYwBdgB9I9lKBGJb7/++iufffYZ\no0ePZs6cOSoIRUg0I4Vz3H0IEOlja2YXEyoQIlJMLF++nGnTpjF48GBSU1NZvXo15cuXDzqWFLBo\nRgq35zLttoIOIiLxadeuXfzjH/8gNTWVESNGsG7dOgAVhCJqnyMFMzsHaAdUMbN/5/joCEKHkkSk\niFuwYAHXXXcdCxYsoFOnTowePZpjjjkm6FgSQ3kdPloHLAW2A+k5pm8C/vQUNREpWrZu3crZZ5/N\noYceyquvvsrFF18cdCQpBPssCu6+AFhgZi+6+/ZCzCQiAVqwYAH16tWjbNmyTJ48mdTUVI466qig\nY0khieacQhUzm2hmi83sqz2vmCcTkUK1adMm+vXrR4MGDSIN7Fq1aqWCUMxEUxSeAZ4GjNDzll8G\nJsUwk4gUsjfffJO6desyZswY+vfvr0NFxVg0RaGsu78F4O4r3P12QsVBRIqAYcOG0b59e8qVK8eH\nH37IQw89pCuLirFo7lPYYWaHACvCz0H4DlDLQ5EEt3v3bkqUKEGrVq0oWbIkt99+O6VLlw46lgQs\nmqIwAChHqL3FfcCRwLWxDCUisfPDDz9www03UKdOHYYPH84555zDOeecE3QsiRP5Hj5y90/cfZO7\nr3b3K929A7Aq9tFEpCC5O08//TRJSUnMmDFDJ5AlV3kWBTNrbGYXmlml8Ps6ZvYc8EmhpBORArFq\n1Sratm3LtddeS3JyMosWLWLgwIFBx5I4tM+iYGb/AF4EugJvmtndwHvAIuDUQkknIgXit99+44sv\nvmDMmDHMnj2bU0/Vf8KSu7zOKXQEUt19m5kdDawBkt19ZeFEE5GDkZGRwbRp0xg6dGikgV25cuWC\njiVxLq/DR9vdfRuAu28EvlJBEIl/O3fu5O9//zv169dn5MiRkQZ2KggSjbxGCieb2Z722AZUz/Ee\nd9fdLSJxZv78+Vx33XUsXryYLl26MGrUKDWwk/2SV1G4ZK/3o2MZREQOzpYtWzjnnHMoU6YMaWlp\ndOjQIehIkoDyaog3qzCDiMiB+eKLL6hXrx7lypXjtddeIyUlhQoVKgQdSxJUNG0uRCQO/f777/Tt\n25eGDRvywgsvANCyZUsVBDko0dzRLCJxZvr06fTu3Zvvv/+egQMHcsklex/tFTkwUY8UzExNUUTi\nwJAhQzjvvPM44ogjmDdvHg8++KCuLJICk+9IwcyaAE8S6nl0kpmlAj3c/cZYhxOREHcnOzubEiVK\n0KZNG8qUKcOtt96qBnZS4KIZKTwMnA9sAHD3RUDrWIYSkf/57rvvuPDCC7nrrrsAaNu2Lffcc48K\ngsRENEXhEHf/dq9pu2MRRkT+x9154oknSEpKYubMmVSqVCnoSFIMRHOieU34EJKbWQngRkCP4xSJ\noW+++YbrrruO9957j1atWvHEE09Qo0aNoGNJMRDNSOF6YCBwEvAT0Cw8LV9m1s7MlptZppkN3cc8\nrcxsoZmlm9mcaIOLFGWbN29m8eLFjB07llmzZqkgSKGJZqSQ5e5d9nfB4VHFo8DZwFrgMzOb5u4Z\nOeapAIwB2rn7ajPT/fhSbC1dupRp06Zx6623kpyczOrVqylbtmzQsaSYiWak8JmZTTezq8xsfx7D\n2QTIdPeV7r4TmEio82pOVwBT3H01gLuv24/lixQJO3fu5J577qFBgwb85z//iTSwU0GQIETz5LVT\ngL8DDYElZjbVzKIZOVQh1G57j7XhaTmdChxlZrPN7HMz657bgsysl5nNN7P5UaxXJGF89tlnNGzY\nkLvvvptLL72UjIwMNbCTQEV185q7z3P3m4AGwO+EHr5TEEoSKjbnAecAd5jZn57+4e7j3L2Ruzcq\noPWKBG7Lli20a9eOX375hWnTpvHiiy9SuXLloGNJMRfNzWvlCR326QLUBtKA5lEs+zvgxBzvq4an\n5bQW2ODuW4AtZvY+kEo+Vze1rqX/cCRxzZ8/nwYNGlCuXDnS0tJITk7myCOPDDqWCBDdSGEpoSuO\n/unuNdz9/9w9mmc0fwbUNLPqZlaKUFGZttc8aUALMytpZmWBpsCy/Bb89DVNoli9SHz57bff6N27\nN40bN440sGvRooUKgsSVaK4+Otnds/d3we6eZWb9gLeAEsBT7p5uZn3Cnz/u7svM7E1gMZANjHf3\npfu7LpF49/rrr9OnTx9+/PFHBg0aRKdOnYKOJJIrc/fcPzB70N3/z8xeA/40U1BPXit9fE3f8cPX\nQaxa5IAMHjyYkSNHkpyczJNPPknjxo2DjiTFkJl9Hs152bxGCpPC/6snronsJ3dn9+7dlCxZkrZt\n23LEEUcwZMgQSpUqFXQ0kTztc6QQmcGsn7uPzm9aYdFIQeLd2rVruf7660lJSeG+++4LOo4IEP1I\nIZoTzdfmMu26/Y8kUrRlZ2czduxYkpKSePfddznuuOOCjiSy3/Z5+MjMOhO6Yqi6mU3J8dHhwK+x\nDiaSSFauXMm1117LnDlzaNOmDePGjePkk08OOpbIfsvrnMKnhJ6hUJVQD6M9NgELYhlKJNFs2bKF\njIwMxo8fz7XXXouZBR1J5IDke04h3uicgsSLJUuWkJaWxu233w7Atm3bOOywwwJOJZK7gz6nsKeN\ntZn9YmYbc7x+MbONBRlWJJHs2LGDO++8kwYNGvDwww9HGtipIEhRkNeJ5j2P3KwEVM7x2vNepNj5\n+OOPadCgAcOHD+fyyy9n2bJlamAnRco+zynkuIv5ROB7d99pZi2AFOAFQo3xRIqNLVu2cN5551Gu\nXDmmT59O+/btg44kUuCiuSR1KqFHcZ4CPA3UBF6KaSqROPLJJ5+QnZ1NuXLleP3110lPT1dBkCIr\nmqKQ7e67gIuBR9x9AH9+LoJIkfPrr7/So0cPmjVrFmlg17x5cw4/fH+eNSWSWKJ6HKeZXQpcCVwY\nnnZo7CKJBG/q1Kn07duXdevWMWTIEC699NKgI4kUimjvaG5NqHX2SjOrDkyIbSyR4AwcOJCLLrqI\nY445hk8++YQRI0boyiIpNvIdKbj7UjO7CahhZn8l9NxlNXSRIiVnA7tzzz2XihUrcsstt3DooRoU\nS/ESTUO8M4DnCT01zYDjgCvd/cPYx/sz3bwmBW316tX06dOH+vXrq4GdFFkF2RDvP8C57n66uzcn\n9DzlUQcbUCRo2dnZjBkzhjp16jBnzhxOOOGEoCOJBC6aE82l3D1jz5vw09LUFF4SWmZmJtdeey1z\n587l7LPPZty4cVSrVi3oWCKBi6YofGFmjxO6YQ2gK2qIJwlu+/btfPXVVzz99NNcddVVamAnEhbN\nOYUywE1Ai/CkuYTuV9ge42y50jkFOVALFy4kLS2Nu+66CwgVhjJlygScSqRwRHtOIc+iYGbJwClA\nurvHxZ5YRUH21/bt2xk+fDgPPPAAlSpVYvHixepXJMVOQXRJvZVQi4uuwNtmltsT2ETi2rx586hf\nvz73338/3bp1IyMjQwVBJA95nVPoCqS4+xYzqwxMB54qnFgiB2/Lli1ccMEFlC9fnjfffJNzzjkn\n6EgicS+vorDD3bcAuPt6M4vm8lWRwH300Uc0bdqUcuXK8cYbb1C3bl31KxKJUl47+pPNbEr49Rpw\nSo73U/L4nkggfvnlF6699lqaN2/O888/D8Bpp52mgiCyH/IaKVyy1/vRsQwicjCmTJnCDTfcwPr1\n6xk2bBidO3cOOpJIQsrrITuzCjOIyIEaMGAADz30EPXq1WP69OnUr18/6EgiCSuam9dE4k7OBnbn\nn38+xxxzDIMGDVIDO5GDlO/Na/FG9ynIqlWr6N27Nw0aNOAf//hH0HFEEkJBNsTbs8DSBxdJ5OBk\nZ2fzyCOPULduXebNm8df/vKXoCOJFDn5FgUza2JmS4Cvw+9TzeyRmCcTyeHrr7+mZcuW3HTTTZxx\nxhksXbqUPn36BB1LpMiJZqTwMHA+sAHA3RcRehJbvsysnZktN7NMMxuax3yNzSzLzDpFs1wpfnbu\n3MmKFSt47rnnmD59ukYJIjESTVE4xN2/3Wva7vy+ZGYlgEeB9kAScLmZJe1jvgeAmVFkkWJkwYIF\n3H333QDUqVOHVatWceWVV6qjqUgMRVMU1phZE8DNrISZ3Qx8FcX3mhB6dOdKd98JTAQ65jLfjcCr\nwLpoQ0vRtn37doYNG0bjxo0ZO3Ys69evB6B0aZ3WEom1aIrC9cBA4CTgJ6BZeFp+qgBrcrxfG54W\nYWZVgIuAx6IJK0XfBx98QGpqKiNGjKB79+5kZGRQuXLloGOJFBv53qfg7uuALjFa/0PAEHfPzuuQ\ngJn1AnoBlDquRoyiSNA2b95Mx44dOeKII5g5cyZnn3120JFEip18i4KZPQH86WYGd++Vz1e/A07M\n8b5qeFpOjYCJ4YJQCTjXzLLcfepe6xoHjIPQfQr5ZZbE8sEHH9C8eXPKly/Pf//7X+rWrUv58uWD\njiVSLEVz+OgdYFb49SFwDLAjiu99BtQ0s+rhZzp3AablnMHdq7t7NXevBkwG+u5dEKTo2rBhA927\nd+eMM86INLBr1qyZCoJIgKI5fDQp53szex74IIrvZZlZP+AtoATwlLunm1mf8OePH1hkSXTuzuTJ\nk+nXrx8bN27kjjvuoEuXWB2hFJH9cSC9j6oDx0Yzo7tPJ/RwnpzTci0G7n71AWSRBDRgwABGjRpF\nw4YNmTlzJqmpqUFHEpGwaM4p/ML/zikcAmwE9nkjmkhu3J2srCwOPfRQOnTowAknnMDAgQMpWVI9\nGUXiSZ4N8Sx0BvhE/neCONsD7qCnhniJ55tvvqFXr140bNiQESNGBB1HpFgqkIZ44QIw3d13h1+6\n8keitnv3bkaNGkXdunX55JNPOPnkk4OOJCL5iGbsvtDM6rv7gpinkSLjq6++4uqrr+ajjz6iffv2\njB07lhNPPDH/L4pIoPZZFMyspLtnAfWBz8xsBbAFMEKDiAaFlFESUFZWFt9++y0vvPACV1xxhfoV\niSSIvEYKnwINgA6FlEUS3Pz580lLS2P48OEkJSWxcuVK9SsSSTB5nVMwAHdfkdurkPJJAti2bRu3\n3HILTZs25amnnlIDO5EEltdIobKZDdzXh+7+7xjkkQQzZ84cevToQWZmJj179uSf//wnFSpUCDqW\niBygvIpCCaA84RGDyN42b97MxRdfTIUKFZg1axZnnnlm0JFE5CDlVRR+cPd7Cy2JJIy5c+dy+umn\nU758eWbMmEGdOnUoV65c0LFEpADke05BZI+ff/6Zbt260bJly0gDuyZNmqggiBQheY0U2hRaColr\n7s7LL7/MjTfeyC+//MJdd92lBnYiRdQ+i4K7byzMIBK/+vfvzyOPPELjxo2ZNWsWycnJQUcSkRhR\nNzLJlbuza9cuSpUqxUUXXcRf/vIXbr75ZkqUKBF0NBGJoTwb4sUjNcSLvRUrVtCzZ08aNWrEP//5\nz6DjiEgBKJCGeFK87N69m3//+98kJyfz+eefU6tWraAjiUgh0+EjAeDLL7/kqquu4tNPP+WCCy7g\nscceo0qVKkHHEpFCpqIgAGRnZ/P9998zYcIEOnfurAZ2IsWUikIx9umnn5KWlsZ9991HUlISK1as\noFSpUkHHEpEA6ZxCMbR161YGDRrEaaedxrPPPhtpYKeCICIqCsXMe++9R3JyMg8++CA9e/YkPT2d\nypUrBx1LROKEDh8VI5s3b+bSSy+lQoUKvPfee7Rq1SroSCISZzRSKAZmz55NdnZ2pIHd4sWLVRBE\nJFcqCkXY+vXrufzyy2ndujUvvPACAI0bN6Zs2bIBJxOReKXDR0WQuzNhwgRuuukmNm3axPDhw9XA\nTkSioqJQBN144408+uijNGvWjCeffJKkpKSgI4lIglBRKCKys7PJysqiVKlSdOrUiRo1anDjjTeq\ngZ2I7Bc1xCsCvv76a3r27Enjxo3517/+FXQcEYlDaohXDGRlZTFy5EhSUlJYuHAhtWvXDjqSiCQ4\nHT5KUMuWLaN79+7Mnz+fjh07MmbMGE444YSgY4lIglNRSGA//fQTkyZN4tJLL1UDOxEpEDE9fGRm\n7cxsuZllmtnQXD7vamaLzWyJmc0zs9RY5kl0H3/8McOGDQOgdu3arFixgssuu0wFQUQKTMyKgpmV\nAB4F2gNJwOVmtve1kd8Af3P3ZGA4MC5WeRLZli1bGDBgAM2bN+fFF1+MNLA79NBDA04mIkVNLEcK\nTYBMd1/p7juBiUDHnDO4+zx3/yX89mOgagzzJKR33nmHunXr8tBDD9G3b181sBORmIrlOYUqwJoc\n79cCTfOY/zpgRm4fmFkvoBdAqeNqFFS+uLd582a6dOnC0Ucfzfvvv88ZZ5wRdCQRKeLi4pJUM2tN\nqCgMye1zdx/n7o2iuca2KHj33XfZvXs35cuX56233mLRokUqCCJSKGJZFL4DTszxvmp42h+YWQow\nHujo7htimCfu/fTTT1x22WW0adMm0sCuYcOGHHbYYQEnE5HiIpZF4TOgpplVN7NSQBdgWs4ZzOwk\nYApwpbt/FcMscc3def7550lKSoo8HvOKK64IOpaIFEMxO6fg7llm1g94CygBPOXu6WbWJ/z548Cd\nQEVgTPiyyqzicogopxtuuIHHHnuM0047jSeffFJ3JotIYNT7KCDZ2dns2rWL0qVLM2fOHBYvXkzf\nvn3VwE5EYiLa3kcqCgFYvnw5PXr0oGnTpowcOTLoOCJSDKghXhzatWsXI0aMIDU1laVLl5KcnBx0\nJBGRP1Dvo0KSnp7OlVdeyYIFC7j44ot59NFHOe6444KOJSLyByoKhaREiRJs3LiRyZMnc8kllwQd\nR0QkVzp8FEPz5s1jyJDQ/Xh//etfyczMVEEQkbimohADmzdv5qabbqJFixZMmjSJn3/+GYCSJTUw\nE5H4pqJQwGbOnEndunUZPXo0/fr1Y+nSpVSqVCnoWCIiUdGfrgVo8+bNdO3alYoVKzJ37lxOP/30\noCOJiOwXjRQKwNtvvx1pYDdz5kwWLlyogiAiCUlF4SD88MMPXHLJJbRt25YXX3wRgPr161OmTJmA\nk4mIHBgVhQPg7jzzzDMkJSXx3//+lxEjRqiBnYgUCTqncACuv/56xo4dS4sWLRg/fjy1atUKOpJI\nXNq1axdr165l+/btQUcpNsqUKUPVqlUP+HG9KgpRytnA7oorriAlJYU+ffpwyCEabInsy9q1azn8\n8MOpVq0a4U7IEkPuzoYNG1i7di3Vq1c/oGVojxaFZcuWccYZZ3DrrbcC0LJlS/r27auCIJKP7du3\nU7FiRRWEQmJmVKxY8aBGZtqr5WHXrl3cf//91KtXjy+//JL69esHHUkk4aggFK6D/X2rKOxDeno6\nTZo04bbbbqNjx45kZGTQrVu3oGOJyAGYOnUqZsaXX34JwOzZszn//PP/MM/VV1/N5MmTgdAfhEOH\nDqVmzZo0aNCA0047jRkzZkS1rh07dtC5c2dq1KhB06ZNWbVqVa7zTZo0iZSUFOrUqRNphwPw7bff\n0qZNG1JSUmjVqhVr166NfFaiRAnq1atHvXr16NChw/78CqKmorAPJUuW5LfffmPKlCm8/PLLHHvs\nsUFHEpEDNGHCBFq0aMGECROimv+OO+7ghx9+YOnSpXzxxRdMnTqVTZs2RfXdJ598kqOOOorMzEwG\nDBjwhx3+Hhs2bGDw4MHMmjWL9PR0fvzxR2bNmgXAoEGD6N69O4sXL+bOO+9k2LBhke8ddthhLFy4\nkIULFzLOlts2AAANDElEQVRt2rQ/LbcgqCjkMHfuXAYNGgRArVq1+Oqrr7jooosCTiUiB2Pz5s18\n8MEHPPnkk0ycODHf+bdu3coTTzzBI488QunSpQE49thjueyyy6JaX1paGldddRUAnTp1YtasWez9\nMLOVK1dSs2ZNKleuDMBZZ53Fq6++CkBGRgZnnnkmAK1btyYtLS26DS0guvoI2LRpE0OHDmXMmDFU\nr16doUOHUqlSJTWwEylA1Yb+NybLXTXivDw/T0tLo127dpx66qlUrFiRzz//PM/5MzMzOemkkzji\niCNy/bxz584sX778T9MHDhxI9+7d+e677zjxxBOB0BGHI488kg0bNvyhB1qNGjVYvnw5q1atomrV\nqkydOpWdO3cCkJqaypQpU+jfvz+vvfYamzZtYsOGDZETyA0aNKBUqVIMHTqUCy+8MM9tORDFfq83\nY8YMevfuzdq1a7n55pv5+9//Trly5YKOJSIFZMKECfTv3x+ALl26MGHCBC644IJc543mJO2kSZMO\nOtNRRx3FY489RufOnTnkkENo3rw5K1asAGDkyJH069ePZ555hpYtW1KlSpXIs9u//fZbqlSpwsqV\nKznzzDNJTk7mlFNOOeg8ORXrorBp0ya6d+/OMcccw7x582jWrFnQkUSKrPz+oo+FjRs38u6777Jk\nyRLMjN27d2NmXHXVVfzyyy9/mrdSpUrUqFGD1atX8/vvv+c6WshvpFClShXWrFlD1apVycrK4rff\nfqNixYp/mv+CCy6IFKdx48ZFdvwnnHACU6ZMAUKHvl599VUqVKgAQJUqVQA4+eSTadWqFQsWLCjw\nooC7J9Sr1HE1/GBkZ2f7jBkzPCsry93dFy5c6Nu3bz+oZYpI7jIyMgJd/9ixY71Xr15/mNayZUuf\nPXu2V6tWLZJv1apVftJJJ/mvv/7q7u6DBw/2q6++2nfs2OHu7uvWrfOXX345qnWOHj3ae/fu7e7u\nEyZM8EsvvTTX+X766Sd3d9+4caOnpqb68uXL3d19/fr1vnv3bnd3v/XWW/2OO+6IzLdnX7V+/Xqv\nUaOGp6en57rs3H7vwHyPYh8b+E5+f18HUxS+//57v/DCCx3wZ5999oCXIyLRCbootGrVymfMmPGH\naaNGjfI+ffr4Bx984E2bNvXU1FRv1KiRz5w5MzLPjh07fPDgwX7KKad4nTp1vEmTJv7mm29Gtc5t\n27Z5p06d/JRTTvHGjRv7ihUrIp+lpqZGfu7SpYvXrl3ba9eu7RMmTIhMf+WVV7xGjRpes2ZNv+66\n6yKF4MMPP/S6det6SkqK161b18ePH7/PDAdTFMz3Oise70ofX9N3/PD1fn3H3Xn66acZOHAgO3bs\n4N5772XAgAE6kSwSY8uWLaN27dpBxyh2cvu9m9nn7t4ov+8Wi71inz59GDduHC1btmT8+PHUrFkz\n6EgiInGpyBaF3bt3s2vXLsqUKUO3bt2oX78+vXr1Ur8iEZE8FMk9ZHp6Oqeffnqkgd0ZZ5yhjqYi\nIlEoUnvJnTt3Mnz4cOrXr09mZiaNGzcOOpJIsZdo5y0T3cH+vovM4aMlS5bQtWtXlixZQpcuXXj4\n4Ycjt5CLSDDKlCkTuRtX3VJjz8PPUziYRwIXmaJQqlQptm7dSlpaWsy6B4rI/qlatSpr165l/fr1\nQUcpNvY8ee1AxfSSVDNrB4wCSgDj3X3EXp9b+PNzga3A1e7+RV7LzHlJ6pw5c5g2bRoPPvggEDq5\nvOeuQBER+Z9oL0mN2TkFMysBPAq0B5KAy80saa/Z2gM1w69ewGPRLPv333/n+uuvp1WrVkydOpWf\nf/4ZQAVBROQgxfJEcxMg091XuvtOYCLQca95OgLPhW+4+xioYGbH57XQ7B1bqFOnDuPGjWPgwIEs\nWbLkD90HRUTkwMXynEIVYE2O92uBplHMUwX4YZ9L3bSeI0+oxeTJk2nadO/FiYjIwUiIE81m1ovQ\n4SWAHenp6UuLWUfTSsDPQYcoZNrm4kHbXHj+Es1MsSwK3wEn5nhfNTxtf+fB3ccB4wDMbH40J0uK\nEm1z8aBtLh7ifZtjeU7hM6CmmVU3s1JAF2Dvh4pOA7pbSDPgN3ff96EjERGJqZiNFNw9y8z6AW8R\nuiT1KXdPN7M+4c8fB6YTuhw1k9AlqdfEKo+IiOQvpucU3H06oR1/zmmP5/jZgRv2c7HjCiBaotE2\nFw/a5uIhrrc54Z6nICIisVOkGuKJiMjBiduiYGbtzGy5mWWa2dBcPjczezj8+WIzaxBEzoIUxTZ3\nDW/rEjObZ2apQeQsSPltc475GptZlpl1Ksx8BS2a7TWzVma20MzSzWxOYWcsaFH8//pIM3vdzBaF\ntznhzy2a2VNmts7Mlu7j8/jdf0XzzM7CfhE6Mb0COBkoBSwCkvaa51xgBmBAM+CToHMXwjY3B44K\n/9y+OGxzjvneJXR+qlPQuWP8b1wByABOCr8/JujchbDNtwIPhH+uDGwESgWd/SC3uyXQAFi6j8/j\ndv8VryOFmLTIiHP5brO7z3P3X8JvPyZ0X0cii+bfGeBG4FVgXWGGi4FotvcKYIq7rwZw9+KwzQ4c\nHm6QWZ5QUcgq3JgFy93fJ7Qd+xK3+694LQr7an+xv/Mkkv3dnusI/aWRyPLdZjOrAlxElM0S41w0\n/8anAkeZ2Wwz+9zMuhdautiIZptHA7WB74ElQH93zy6ceIGJ2/1XQrS5kD8ys9aEikKLoLMUgoeA\nIe6eXUwe0lISaAi0AQ4DPjKzj939q2BjxdQ5wELgTOAU4G0zm+vuvwcbq3iK16JQYC0yEkhU22Nm\nKcB4oL27byikbLESzTY3AiaGC0Il4Fwzy3L3qYUTsUBFs71rgQ3uvgXYYmbvA6lAohaFaLb5GmCE\nhw62Z5rZN8BfgU8LJ2Ig4nb/Fa+Hj4pji4x8t9nMTgKmAFcWkb8c891md6/u7tXcvRowGeiboAUB\novv/dRrQwsxKmllZQp2FlxVyzoIUzTavJjQywsyOBWoBKws1ZeGL2/1XXI4UvBi2yIhym+8EKgJj\nwn85Z3kcN9bKT5TbXGREs73uvszM3gQWA9mEnliY62WNiSDKf+PhwDNmtoTQ1ThD3D2hO6ea2QSg\nFVDJzNYCdwGHQvzvv3RHs4iIRMTr4SMREQmAioKIiESoKIiISISKgoiIRKgoiIhIhIqCxB0z2x3u\nErrnVS2PeavtqxPlfq5zdriT5yIz+9DMah3AMi40s6Qc7+81s7Py+c50M6twgDk/M7N6UXzn5vA9\nDyL5UlGQeLTN3evleK0qpPV2dfdU4FngXwfw/QuBSFFw9zvd/Z28vuDu57r7r/u5nj05xxBdzpsB\nFQWJioqCJITwiGCumX0RfjXPZZ46ZvZpeHSx2Mxqhqd3yzF9rJmVyGd17wM1wt9tY2YLLPQMi6fM\nrHR4+ggzywivZ2Q4TwfgX+H1nGJmz5hZJws9T+CVHDlbmdkb4Z9XmVmlA8z5ETmaqJnZY2Y230LP\nJLgnPO0m4ATgPTN7LzytrZl9FP49vmJm5fNZjxQjKgoSjw7LcejotfC0dcDZ7t4A6Aw8nMv3+gCj\n3L0eoZ5Ja82sdnj+08PTdwNd81n/BcASMysDPAN0dvdkQh0ArjezioQ6t9Zx9xTg7+4+j1DrgsHh\n0c2KHMt7B2hqZuXC7zsTaiEdcYA52wE5W37cFr7DPQX4m5mluPvDhLqPtnb31uECdDtwVvh3OR8Y\nmM96pBiJyzYXUuxtC+8YczoUGB0+hr6bUIvpvX0E3GZmVQk9k+BrM2tDqOvoZ+HWIIex7+cyvGhm\n24BVhJ7hUAv4JkefqWeBGwi1et4OPBn+i/+NvDYm3OrhTeACM5sMnAfcstds+5uzFKFnD+T8PV1m\nZr0I/Xd9PKFDWYv3+m6z8PQPw+spRej3JgKoKEjiGAD8RKhj6CGEdsp/4O4vmdknhHa6082sN6Fe\nOs+6+7Ao1tHV3efveWNmR+c2U3gn34TQjrwT0I9Q2+e8TAzPtxGY7+6b9vp8v3ICnxM6n/AIcLGZ\nVQcGAY3d/RczewYok8t3DXjb3S+PYj1SDOnwkSSKI4Efwg9fuZJQc7U/MLOTgZXhQyZphA6jzAI6\nmdkx4XmONrO/RLnO5UA1M6sRfn8lMCd8DP5Id59OqFjteVb2JuDwfSxrDqHHM/Zkr0NHYfuVM9xm\n+g6gmZn9FTgC2AL8ZqFOo+1zzJ4z18fA6Xu2yczKmVluoy4pplQUJFGMAa4ys0WEeu1vyWWey4Cl\nZrYQqEvocYcZhI6hzzSzxcDbhA6t5MvdtxPqXvlKuINnNvA4oR3sG+HlfcD/jslPBAaHT0yfstey\ndhM6zNSeXA43HUhOd98GPEjoPMYiYAHwJfAS8GGOWccBb5rZe+6+HrgamBBez0eEfp8igLqkiohI\nDhopiIhIhIqCiIhEqCiIiEiEioKIiESoKIiISISKgoiIRKgoiIhIhIqCiIhE/D+sEC/LFFLCXQAA\nAABJRU5ErkJggg==\n",
299 |       "text/plain": [
300 |        "<matplotlib.figure.Figure at 0x15bfaa50cc0>"
301 |       ]
302 |      },
303 |      "metadata": {},
304 |      "output_type": "display_data"
305 |     }
306 |    ],
307 |    "source": [
308 |     "import matplotlib.pyplot as plt\n",
309 |     "from sklearn.metrics import roc_curve\n",
310 |     "%matplotlib inline\n",
311 |     "auc = roc_auc_score(y_one_hot, y_score_pro,average='micro')\n",
312 |     "fpr, tpr, thresholds = roc_curve(y_one_hot.ravel(),y_score_pro.ravel())  # ravel()表示平铺开来\n",
313 |     "plt.plot(fpr, tpr, linewidth = 2,label='AUC=%.3f' % auc)\n",
314 |     "plt.plot([0,1],[0,1], 'k--')\n",
315 |     "plt.axis([0,1.1,0,1.1])\n",
316 |     "plt.xlabel('False Postivie Rate')\n",
317 |     "plt.ylabel('True Positive Rate')\n",
318 |     "plt.legend()\n",
319 |     "plt.show()\n"
320 |    ]
321 |   },
322 |   {
323 |    "cell_type": "code",
324 |    "execution_count": null,
325 |    "metadata": {
326 |     "collapsed": true
327 |    },
328 |    "outputs": [],
329 |    "source": []
330 |   }
331 |  ],
332 |  "metadata": {
333 |   "kernelspec": {
334 |    "display_name": "Python [default]",
335 |    "language": "python",
336 |    "name": "python3"
337 |   },
338 |   "language_info": {
339 |    "codemirror_mode": {
340 |     "name": "ipython",
341 |     "version": 3
342 |    },
343 |    "file_extension": ".py",
344 |    "mimetype": "text/x-python",
345 |    "name": "python",
346 |    "nbconvert_exporter": "python",
347 |    "pygments_lexer": "ipython3",
348 |    "version": "3.5.2"
349 |   }
350 |  },
351 |  "nbformat": 4,
352 |  "nbformat_minor": 1
353 | }
354 | 


--------------------------------------------------------------------------------
/爬虫与网页分析/data/1396354:
--------------------------------------------------------------------------------
  1 | <!DOCTYPE html>
  2 | <html lang="zh-cmn-Hans" class="">
  3 | <head>
  4 |     <meta http-equiv="Content-Type" content="text/html; charset=utf-8">
  5 |     <meta name="renderer" content="webkit">
  6 |     <meta name="referrer" content="always">
  7 |     <meta name="google-site-verification" content="ok0wCgT20tBBgo9_zat2iAcimtN4Ftf5ccsh092Xeyw" />
  8 |     <title>
  9 |     孙敬季 (豆瓣)
 10 | </title>
 11 |     
 12 |     <meta name="baidu-site-verification" content="cZdR4xxR7RxmM4zE" />
 13 |     <meta http-equiv="Pragma" content="no-cache">
 14 |     <meta http-equiv="Expires" content="Sun, 6 Mar 2005 01:00:00 GMT">
 15 |     
 16 |     <meta name="keywords" content="孙敬季 Jingji Sun,简介,个人资料,图片,电影作品,获奖情况,合作影人"/>
 17 |     <meta name="description" content="孙敬季简介、图片写真、获奖情况及电影作品一览"/>
 18 | 
 19 |     <link rel="apple-touch-icon" href="https://img3.doubanio.com/f/movie/d59b2715fdea4968a450ee5f6c95c7d7a2030065/pics/movie/apple-touch-icon.png">
 20 |     <link href="https://img3.doubanio.com/f/shire/bf61b1fa02f564a4a8f809da7c7179b883a56146/css/douban.css" rel="stylesheet" type="text/css">
 21 |     <link href="https://img3.doubanio.com/f/shire/ae3f5a3e3085968370b1fc63afcecb22d3284848/css/separation/_all.css" rel="stylesheet" type="text/css">
 22 |     <link href="https://img3.doubanio.com/f/movie/8864d3756094f5272d3c93e30ee2e324665855b0/css/movie/base/init.css" rel="stylesheet">
 23 |     <script type="text/javascript">var _head_start = new Date();</script>
 24 |     <script type="text/javascript" src="https://img3.doubanio.com/f/movie/0495cb173e298c28593766009c7b0a953246c5b5/js/movie/lib/jquery.js"></script>
 25 |     <script type="text/javascript" src="https://img3.doubanio.com/f/shire/d07bddd8077c84e11c8e93a31a024ef714f938fc/js/douban.js"></script>
 26 |     <script type="text/javascript" src="https://img3.doubanio.com/f/shire/0efdc63b77f895eaf85281fb0e44d435c6239a3f/js/separation/_all.js"></script>
 27 |     
 28 | <link rel="alternate" href="android-app://com.douban.movie/doubanmovie/celebrity/1396354/" />
 29 | <link rel="stylesheet" type="text/css" href="https://img3.doubanio.com/f/movie/5c131bbdf002cf3c8ffada7c52640f616de723ce/css/movie/celebrity.css" />
 30 | <link rel="stylesheet" type="text/css" href="https://img3.doubanio.com/f/movie/a6d2e5a112ca7dfc3f4e023e67d1fdb398a0d819/css/movie/show.css" />
 31 | <script type="text/javascript" src="https://img3.doubanio.com/f/shire/77323ae72a612bba8b65f845491513ff3329b1bb/js/do.js" data-cfg-autoload="false"></script>
 32 | 
 33 |     <style type="text/css">
 34 | .mod { margin-bottom: 25px }
 35 | .mod .hd { margin-bottom: 10px }
 36 | fieldset {border:1px solid #DDDDDD;margin-bottom:15px;padding:0 10px;}
 37 | .mod h2{color:#333d48;}
 38 | #headline img { width: 135px;}
 39 | </style>
 40 |     <style type="text/css">img { max-width: 100%; }</style>
 41 |     <script type="text/javascript"></script>
 42 |     <style type="text/css">
 43 | #opt-bar .a-btn {
 44 |     color: #000; background: #fff3e7; border: 1px solid #e7d7ca; padding: 0; outline: none; float:left; height:22px;
 45 |     line-height:22px; width:46px; font:12px Helvetica,Arial,sans-serif;cursor: pointer;
 46 | }
 47 | 
 48 |         .minisubmit{ color:#bbb; vertical-align: baseline; }
 49 |         .minisubmit:hover { color:#fff;background-color:#bbb;}
 50 |     </style>
 51 | 
 52 |     <link rel="shortcut icon" href="https://img3.doubanio.com/favicon.ico" type="image/x-icon">
 53 | </head>
 54 | 
 55 | <body>
 56 |   
 57 |     <script type="text/javascript">var _body_start = new Date();</script>
 58 | 
 59 |     
 60 |     
 61 | 
 62 | 
 63 | 
 64 |     <link href="//img3.doubanio.com/dae/accounts/resources/0246c88/shire/bundle.css" rel="stylesheet" type="text/css">
 65 | 
 66 | 
 67 | 
 68 | <div id="db-global-nav" class="global-nav">
 69 |   <div class="bd">
 70 |     
 71 | <div class="top-nav-info">
 72 |   <a href="https://accounts.douban.com/passport/login?source=movie" class="nav-login" rel="nofollow">登录/注册</a>
 73 | </div>
 74 | 
 75 | 
 76 |     <div class="top-nav-doubanapp">
 77 |   <a href="https://www.douban.com/doubanapp/app?channel=top-nav" class="lnk-doubanapp">下载豆瓣客户端</a>
 78 |   <div id="doubanapp-tip">
 79 |     <a href="https://www.douban.com/doubanapp/app?channel=qipao" class="tip-link">豆瓣 <span class="version">6.0</span> 全新发布</a>
 80 |     <a href="javascript: void 0;" class="tip-close">×</a>
 81 |   </div>
 82 |   <div id="top-nav-appintro" class="more-items">
 83 |     <p class="appintro-title">豆瓣</p>
 84 |     <p class="qrcode">扫码直接下载</p>
 85 |     <div class="download">
 86 |       <a href="https://www.douban.com/doubanapp/redirect?channel=top-nav&direct_dl=1&download=iOS">iPhone</a>
 87 |       <span>·</span>
 88 |       <a href="https://www.douban.com/doubanapp/redirect?channel=top-nav&direct_dl=1&download=Android" class="download-android">Android</a>
 89 |     </div>
 90 |   </div>
 91 | </div>
 92 | 
 93 |     
 94 | 
 95 | 
 96 | <div class="global-nav-items">
 97 |   <ul>
 98 |     <li class="">
 99 |       <a href="https://www.douban.com" target="_blank" data-moreurl-dict="{&quot;from&quot;:&quot;top-nav-click-main&quot;,&quot;uid&quot;:&quot;0&quot;}">豆瓣</a>
100 |     </li>
101 |     <li class="">
102 |       <a href="https://book.douban.com" target="_blank" data-moreurl-dict="{&quot;from&quot;:&quot;top-nav-click-book&quot;,&quot;uid&quot;:&quot;0&quot;}">读书</a>
103 |     </li>
104 |     <li class="on">
105 |       <a href="https://movie.douban.com"  data-moreurl-dict="{&quot;from&quot;:&quot;top-nav-click-movie&quot;,&quot;uid&quot;:&quot;0&quot;}">电影</a>
106 |     </li>
107 |     <li class="">
108 |       <a href="https://music.douban.com" target="_blank" data-moreurl-dict="{&quot;from&quot;:&quot;top-nav-click-music&quot;,&quot;uid&quot;:&quot;0&quot;}">音乐</a>
109 |     </li>
110 |     <li class="">
111 |       <a href="https://www.douban.com/location" target="_blank" data-moreurl-dict="{&quot;from&quot;:&quot;top-nav-click-location&quot;,&quot;uid&quot;:&quot;0&quot;}">同城</a>
112 |     </li>
113 |     <li class="">
114 |       <a href="https://www.douban.com/group" target="_blank" data-moreurl-dict="{&quot;from&quot;:&quot;top-nav-click-group&quot;,&quot;uid&quot;:&quot;0&quot;}">小组</a>
115 |     </li>
116 |     <li class="">
117 |       <a href="https://read.douban.com&#47;?dcs=top-nav&amp;dcm=douban" target="_blank" data-moreurl-dict="{&quot;from&quot;:&quot;top-nav-click-read&quot;,&quot;uid&quot;:&quot;0&quot;}">阅读</a>
118 |     </li>
119 |     <li class="">
120 |       <a href="https://douban.fm&#47;?from_=shire_top_nav" target="_blank" data-moreurl-dict="{&quot;from&quot;:&quot;top-nav-click-fm&quot;,&quot;uid&quot;:&quot;0&quot;}">FM</a>
121 |     </li>
122 |     <li class="">
123 |       <a href="https://time.douban.com&#47;?dt_time_source=douban-web_top_nav" target="_blank" data-moreurl-dict="{&quot;from&quot;:&quot;top-nav-click-time&quot;,&quot;uid&quot;:&quot;0&quot;}">时间</a>
124 |     </li>
125 |     <li class="">
126 |       <a href="https://market.douban.com&#47;?utm_campaign=douban_top_nav&amp;utm_source=douban&amp;utm_medium=pc_web" target="_blank" data-moreurl-dict="{&quot;from&quot;:&quot;top-nav-click-market&quot;,&quot;uid&quot;:&quot;0&quot;}">豆品</a>
127 |     </li>
128 |     <li>
129 |       <a href="#more" class="bn-more"><span>更多</span></a>
130 |       <div class="more-items">
131 |         <table cellpadding="0" cellspacing="0">
132 |           <tbody>
133 |             <tr>
134 |               <td>
135 |                 <a href="https://ypy.douban.com" target="_blank" data-moreurl-dict="{&quot;from&quot;:&quot;top-nav-click-ypy&quot;,&quot;uid&quot;:&quot;0&quot;}">豆瓣摄影</a>
136 |               </td>
137 |             </tr>
138 |           </tbody>
139 |         </table>
140 |       </div>
141 |     </li>
142 |   </ul>
143 | </div>
144 | 
145 |   </div>
146 | </div>
147 | <script>
148 |   ;window._GLOBAL_NAV = {
149 |     DOUBAN_URL: "https://www.douban.com",
150 |     N_NEW_NOTIS: 0,
151 |     N_NEW_DOUMAIL: 0
152 |   };
153 | </script>
154 | 
155 | 
156 | 
157 |     <script src="//img3.doubanio.com/dae/accounts/resources/0246c88/shire/bundle.js" defer="defer"></script>
158 | 
159 | 
160 | 
161 | 
162 |     
163 | 
164 | 
165 | 
166 |     <link href="//img3.doubanio.com/dae/accounts/resources/0246c88/movie/bundle.css" rel="stylesheet" type="text/css">
167 | 
168 | 
169 | 
170 | 
171 | <div id="db-nav-movie" class="nav">
172 |   <div class="nav-wrap">
173 |   <div class="nav-primary">
174 |     <div class="nav-logo">
175 |       <a href="https:&#47;&#47;movie.douban.com">豆瓣电影</a>
176 |     </div>
177 |     <div class="nav-search">
178 |       <form action="https:&#47;&#47;movie.douban.com/subject_search" method="get">
179 |         <fieldset>
180 |           <legend>搜索：</legend>
181 |           <label for="inp-query">
182 |           </label>
183 |           <div class="inp"><input id="inp-query" name="search_text" size="22" maxlength="60" placeholder="搜索电影、电视剧、综艺、影人" value=""></div>
184 |           <div class="inp-btn"><input type="submit" value="搜索"></div>
185 |           <input type="hidden" name="cat" value="1002" />
186 |         </fieldset>
187 |       </form>
188 |     </div>
189 |   </div>
190 |   </div>
191 |   <div class="nav-secondary">
192 |     
193 | 
194 | <div class="nav-items">
195 |   <ul>
196 |     <li    ><a href="https://movie.douban.com/cinema/nowplaying/"
197 |      >影讯&购票</a>
198 |     </li>
199 |     <li    ><a href="https://movie.douban.com/explore"
200 |      >选电影</a>
201 |     </li>
202 |     <li    ><a href="https://movie.douban.com/tv/"
203 |      >电视剧</a>
204 |     </li>
205 |     <li    ><a href="https://movie.douban.com/chart"
206 |      >排行榜</a>
207 |     </li>
208 |     <li    ><a href="https://movie.douban.com/tag/"
209 |      >分类</a>
210 |     </li>
211 |     <li    ><a href="https://movie.douban.com/review/best/"
212 |      >影评</a>
213 |     </li>
214 |     <li    ><a href="https://movie.douban.com/annual/2018?source=navigation"
215 |      >2018年度榜单</a>
216 |     </li>
217 |     <li    ><a href="https://www.douban.com/standbyme/2018?source=navigation"
218 |      >2018书影音报告</a>
219 |     </li>
220 |   </ul>
221 | </div>
222 | 
223 |     <a href="https://movie.douban.com/annual/2018?source=movie_navigation" class="movieannual2018"></a>
224 |   </div>
225 | </div>
226 | 
227 | <script id="suggResult" type="text/x-jquery-tmpl">
228 |   <li data-link="{{= url}}">
229 |             <a href="{{= url}}" onclick="moreurl(this, {from:'movie_search_sugg', query:'{{= keyword }}', subject_id:'{{= id}}', i: '{{= index}}', type: '{{= type}}'})">
230 |             <img src="{{= img}}" width="40" />
231 |             <p>
232 |                 <em>{{= title}}</em>
233 |                 {{if year}}
234 |                     <span>{{= year}}</span>
235 |                 {{/if}}
236 |                 {{if sub_title}}
237 |                     <br /><span>{{= sub_title}}</span>
238 |                 {{/if}}
239 |                 {{if address}}
240 |                     <br /><span>{{= address}}</span>
241 |                 {{/if}}
242 |                 {{if episode}}
243 |                     {{if episode=="unknow"}}
244 |                         <br /><span>集数未知</span>
245 |                     {{else}}
246 |                         <br /><span>共{{= episode}}集</span>
247 |                     {{/if}}
248 |                 {{/if}}
249 |             </p>
250 |         </a>
251 |         </li>
252 |   </script>
253 | 
254 | 
255 | 
256 | 
257 |     <script src="//img3.doubanio.com/dae/accounts/resources/0246c88/movie/bundle.js" defer="defer"></script>
258 | 
259 | 
260 | 
261 | 
262 | 
263 |     
264 |     <div id="wrapper">
265 |         
266 | 
267 |         
268 |     <div id="content">
269 |         
270 |     <h1>孙敬季 Jingji Sun</h1>
271 | 
272 |         <div class="grid-16-8 clearfix">
273 |             
274 |             
275 |             <div class="article">
276 |                 
277 | 
278 | 
279 | 
280 | 
281 | <div id="headline" class="item">
282 |     <div class="pic">
283 |         <a class="nbg" title="孙敬季 Jingji Sun" href="https://img3.doubanio.com/view/celebrity/s_ratio_celebrity/public/p1545044221.36.jpg">
284 |             <img alt="孙敬季 Jingji Sun"
285 |                 title="点击看大图"
286 |                 src="https://img3.doubanio.com/view/celebrity/s_ratio_celebrity/public/p1545044221.36.jpg">
287 |         </a>
288 |         <span class="gact">
289 |             <a class="" rel="nofollow" href="https://movie.douban.com/celebrity/1396354/detail_edit">增改描述、换头像</a>
290 |         </span>
291 |     </div>
292 |     <div class="info">
293 |         
294 |     <ul class="">
295 |     
296 |         <li>
297 |             <span>性别</span>: 
298 |         男
299 |         </li>
300 |     
301 |         <li>
302 |             <span>星座</span>: 
303 |         天蝎座
304 |         </li>
305 |     
306 |         <li>
307 |             <span>出生日期</span>: 
308 |         1985-10-27
309 |         </li>
310 |     
311 |         <li>
312 |             <span>出生地</span>: 
313 |         中国,吉林,长春
314 |         </li>
315 |     
316 |         <li>
317 |             <span>职业</span>: 
318 |         演员
319 |         </li>
320 | 
321 |     
322 |         <li>
323 |             <span>imdb编号</span>: 
324 |         <a href="http://www.imdb.com/name/nm9914238" target="_blank">nm9914238</a>
325 |         </li>
326 |     
327 |     </ul>
328 | 
329 |     </div>
330 | </div>
331 | 
332 | 
333 | 
334 | 
335 | 
336 | 
337 | 
338 | 
339 | 
340 | 
341 | <div id="opt-bar" class="mod">
342 |     <div class="ll">
343 | 
344 |             
345 |     
346 |     <form method="POST" action="connect" class="miniform">
347 |         <input type="submit" class="a-btn j " value="收藏" title="" />
348 |     </form>
349 | 
350 | 
351 |         
352 |     
353 |     <span class="rec" id="None-1396354">
354 |     <a href= "#"
355 |         data-type="None"
356 |         data-url="https://movie.douban.com/celebrity/1396354/"
357 |         data-desc=""
358 |         data-title="影人 孙敬季 Jingji Sun (来自豆瓣)"
359 |         data-pic="https://img3.doubanio.com/view/celebrity/s_ratio_celebrity/public/p1545044221.36.jpeg"
360 |         class="bn-sharing ">
361 |         分享到
362 |     </a> &nbsp;&nbsp;
363 |     </span>
364 | 
365 |     <script>
366 |         if (!window.DoubanShareMenuList) {
367 |             window.DoubanShareMenuList = [];
368 |         }
369 |         var __cache_url = __cache_url || {};
370 | 
371 |         (function(u){
372 |             if(__cache_url[u]) return;
373 |             __cache_url[u] = true;
374 |             window.DoubanShareIcons = 'https://img3.doubanio.com/f/shire/d15ffd71f3f10a7210448fec5a68eaec66e7f7d0/pics/ic_shares.png';
375 | 
376 |             var initShareButton = function() {
377 |                 $.ajax({url:u,dataType:'script',cache:true});
378 |             };
379 | 
380 |             if (typeof Do == 'function' && 'ready' in Do) {
381 |                 Do(
382 |                     'https://img3.doubanio.com/f/shire/8377b9498330a2e6f056d863987cc7a37eb4d486/css/ui/dialog.css',
383 |                     'https://img3.doubanio.com/f/shire/4ea3216519a6183c7bcd4f7d1a6d4fd57ce1a244/js/ui/dialog.js',
384 |                     'https://img3.doubanio.com/f/movie/c4ab132ff4d3d64a83854c875ea79b8b541faf12/js/movie/lib/qrcode.min.js',
385 |                     initShareButton
386 |                 );
387 |             } else if(typeof Douban == 'object' && 'loader' in Douban) {
388 |                 Douban.loader.batch(
389 |                     'https://img3.doubanio.com/f/shire/8377b9498330a2e6f056d863987cc7a37eb4d486/css/ui/dialog.css',
390 |                     'https://img3.doubanio.com/f/shire/4ea3216519a6183c7bcd4f7d1a6d4fd57ce1a244/js/ui/dialog.js',
391 |                     'https://img3.doubanio.com/f/movie/c4ab132ff4d3d64a83854c875ea79b8b541faf12/js/movie/lib/qrcode.min.js'
392 |                 ).done(initShareButton);
393 |             }
394 | 
395 |         })('https://img3.doubanio.com/f/movie/32be6727ed3ad8f6c4a417d8a086355c3e7d1d27/js/movie/lib/sharebutton.js');
396 |     </script>
397 | 
398 | 
399 |     </div>
400 |     <div class="rr">
401 |         
402 | 
403 | 
404 | 
405 | 
406 |         
407 | 
408 | 
409 | 
410 | 
411 | 
412 | <div class="rec-sec">
413 | <span class="rec">
414 |     <script id="movie-share" type="text/x-html-snippet">
415 |         
416 |     <form class="movie-share" action="/j/share" method="POST">
417 |         <div class="clearfix form-bd">
418 |             <div class="input-area">
419 |                 <textarea name="text" class="share-text" cols="72" data-mention-api="https://api.douban.com/shuo/in/complete?alt=xd&amp;callback=?"></textarea>
420 |                 <input type="hidden" name="target-id" value="1396354">
421 |                 <input type="hidden" name="target-type" value="3">
422 |                 <input type="hidden" name="title" value="孙敬季 Jingji Sun">
423 |                 <input type="hidden" name="desc" value="1985-10-27 / 辛亥革命 / 地下凶猛">
424 |                 <input type="hidden" name="redir" value=""/>
425 |                 <div class="mentioned-highlighter"></div>
426 |             </div>
427 | 
428 |             <div class="info-area">
429 |                     <img class="media" src="https://img3.doubanio.com/view/celebrity/s_ratio_celebrity/public/p1545044221.36.jpg" />
430 |                 <strong>孙敬季 Jingji Sun</strong>
431 |                 <p>1985-10-27 / 辛亥革命 / 地下凶猛</p>
432 |                 <p class="error server-error">&nbsp;</p>
433 |             </div>
434 |         </div>
435 |         <div class="form-ft">
436 |             <div class="form-ft-inner">
437 |                 
438 | 
439 | 
440 | 
441 |                 <span class="avail-num-indicator">140</span>
442 |                 <span class="bn-flat">
443 |                     <input type="submit" value="推荐" />
444 |                 </span>
445 |             </div>
446 |         </div>
447 |     </form>
448 |     
449 |     <div id="suggest-mention-tmpl" style="display:none;">
450 |         <ul>
451 |             {{#users}}
452 |             <li id="{{uid}}">
453 |               <img src="{{avatar}}">{{{username}}}&nbsp;<span>({{{uid}}})</span>
454 |             </li>
455 |             {{/users}}
456 |         </ul>
457 |     </div>
458 | 
459 | 
460 |     </script>
461 | 
462 |         
463 |         <a href="/accounts/register?reason=recommend"  class="j a_show_login lnk-sharing" share-id="1396354" data-mode="plain" data-name="孙敬季 Jingji Sun" data-type="" data-desc="1985-10-27 / 辛亥革命 / 地下凶猛" data-href="https://movie.douban.com/celebrity/1396354/" data-image="https://img3.doubanio.com/view/celebrity/s_ratio_celebrity/public/p1545044221.36.jpg" data-properties="{}" data-redir="" data-text="" data-apikey="" data-curl="" data-count="10" data-object_kind="1043" data-object_id="1396354" data-target_type="rec" data-target_action="0" data-action_props="{&#34;actor_url&#34;:&#34;https:\/\/movie.douban.com\/celebrity\/1396354\/&#34;,&#34;actor_name&#34;:&#34;孙敬季 Jingji Sun&#34;}">推荐</a>
464 | </span>
465 | 
466 | 
467 | </div>
468 | 
469 | 
470 | 
471 | 
472 | 
473 |     </div>
474 | </div>
475 | 
476 | 
477 | 
478 | 
479 | 
480 | 
481 | 
482 | 
483 | 
484 | 
485 | 
486 | 
487 | <div id="intro" class="mod">
488 |     <div class="hd">
489 |     <h2>
490 |         影人简介
491 |             &nbsp;&middot;&nbsp;&middot;&nbsp;&middot;&nbsp;&middot;&nbsp;&middot;&nbsp;&middot;
492 |     </h2>
493 | </div>
494 |     <div class="bd">
495 |             　　孙敬季，1985年10月27日出生，吉林长春人。毕业于北京舞蹈学院音乐剧系。全能艺人，表演、声乐、舞蹈、武术等。是中国内地童星男演员。童年主演电影《九香》 电视剧《后妈》而被人熟知。2002年参加周星驰全国选星活动脱颖而出并荣获冠军。2005年出演由周星驰监制的电视剧《功夫状元》表现不俗。2006年参加第一届北京大学生戏剧节荣获“优秀男演员奖”。2009年参演小柯原创音乐剧《凭什么爱我》等脍炙人口的作品。
496 |     </div>
497 | </div>
498 | 
499 | 
500 | 
501 | <div id="photos" class="mod">
502 |     <div class="hd">
503 |     <h2>
504 |         影人图片
505 |             &nbsp;&middot;&nbsp;&middot;&nbsp;&middot;&nbsp;&middot;&nbsp;&middot;&nbsp;&middot;
506 |             <span class="pl">&nbsp;(
507 |                 
508 |                 <a href="https://movie.douban.com/celebrity/1396354/photos/" target="_self">全部0张</a>&nbsp;&middot;&nbsp;<a href="https://movie.douban.com/celebrity/1396354/photos/upload" target="_self">上传照片</a>
509 |                 ) </span>
510 |     </h2>
511 | </div>
512 |     
513 | <ul class="pic-col5">
514 | </ul>
515 | 
516 | </div>
517 | 
518 | 
519 | 
520 | 
521 | 
522 | 
523 | 
524 | 
525 | 
526 | 
527 | <div id="recent_movies" class="mod">
528 |     <div class="hd">
529 |         
530 |     <h2>
531 |         最近的5部作品（已上映）
532 |             &nbsp;&middot;&nbsp;&middot;&nbsp;&middot;&nbsp;&middot;&nbsp;&middot;&nbsp;&middot;
533 |             <span class="pl">&nbsp;(
534 |                 
535 |                     <a href="https://movie.douban.com/celebrity/1396354/movies?sortby=time&amp;format=pic" target="_self">全部</a>
536 |                 ) </span>
537 |     </h2>
538 | 
539 |     </div>
540 |         <div class="bd">
541 |             <ul class="list-s">
542 |                     <li class="">
543 |                             <h3>2018</h3>
544 |                         <div class="pic">
545 |                             <a href="https://movie.douban.com/subject/24843621/">
546 |                                 <img class=""
547 |                                     alt="地下凶猛"
548 |                                     src="https://img3.doubanio.com/view/photo/s_ratio_poster/public/p2525713512.jpg"
549 |                                     title="地下凶猛"/>
550 |                             </a>
551 |                         </div>
552 |                         <div class="info">
553 |                             <a href="https://movie.douban.com/subject/24843621/" title="地下凶猛" class="">地下凶猛</a>
554 |                                 <em></em>
555 |                         </div>
556 |                     </li>
557 |                     <li class="">
558 |                             <h3>2011</h3>
559 |                         <div class="pic">
560 |                             <a href="https://movie.douban.com/subject/4896305/">
561 |                                 <img class=""
562 |                                     alt="辛亥革命"
563 |                                     src="https://img3.doubanio.com/view/photo/s_ratio_poster/public/p1240455745.jpg"
564 |                                     title="辛亥革命"/>
565 |                             </a>
566 |                         </div>
567 |                         <div class="info">
568 |                             <a href="https://movie.douban.com/subject/4896305/" title="辛亥革命" class="">辛亥革命</a>
569 |                                 <em>6.3</em>
570 |                         </div>
571 |                     </li>
572 |             </ul>
573 |         </div>
574 | </div>
575 | 
576 | 
577 | 
578 |     
579 | 
580 | <div id="best_movies" class="mod">
581 |     <div class="hd">
582 |     <h2>
583 |         最受好评的5部作品
584 |             &nbsp;&middot;&nbsp;&middot;&nbsp;&middot;&nbsp;&middot;&nbsp;&middot;&nbsp;&middot;
585 |             <span class="pl">&nbsp;(
586 |                 
587 |                     <a href="https://movie.douban.com/celebrity/1396354/movies?sortby=vote&amp;format=pic" target="_self">全部</a>
588 |                 ) </span>
589 |     </h2>
590 | </div>
591 |     <div class="bd">
592 |         <ul class="list-s">
593 |             <li class="">
594 |                 <div class="pic">
595 |                     <a href="https://movie.douban.com/subject/4896305/">
596 |                         <img alt="辛亥革命"
597 |                             src="https://img3.doubanio.com/view/photo/s_ratio_poster/public/p1240455745.jpg"
598 |                             title="辛亥革命"
599 |                             class="" />
600 |                     </a>
601 |                 </div>
602 |                 <div class="info">
603 |                     <a href="https://movie.douban.com/subject/4896305/" title="辛亥革命" class="">辛亥革命</a>
604 |                     <em class="">6.3</em>
605 |                         <div class="pl">2011</div>
606 |                 </div>
607 |             </li>
608 |             <li class="">
609 |                 <div class="pic">
610 |                     <a href="https://movie.douban.com/subject/24843621/">
611 |                         <img alt="地下凶猛"
612 |                             src="https://img3.doubanio.com/view/photo/s_ratio_poster/public/p2525713512.jpg"
613 |                             title="地下凶猛"
614 |                             class="" />
615 |                     </a>
616 |                 </div>
617 |                 <div class="info">
618 |                     <a href="https://movie.douban.com/subject/24843621/" title="地下凶猛" class="">地下凶猛</a>
619 |                     <em class=""></em>
620 |                         <div class="pl">2018</div>
621 |                 </div>
622 |             </li>
623 |         </ul>
624 |     </div>
625 | </div>
626 | 
627 | 
628 | 
629 | 
630 | 
631 | 
632 |             </div>
633 |             <div class="aside">
634 |                 
635 | 
636 | 
637 | 
638 | 
639 | 
640 | 
641 | 
642 | 
643 | 
644 | 
645 | <!-- douban ad begin -->
646 | <div id="dale_movie_celebrity_top_right"></div>
647 | <!-- douban ad end -->
648 | 
649 | 
650 | 
651 | 
652 | <div id="fans" class="mod">
653 |     <div class="hd">
654 |     <h2>
655 |         孙敬季的影迷（0）
656 |             &nbsp;&middot;&nbsp;&middot;&nbsp;&middot;&nbsp;&middot;&nbsp;&middot;&nbsp;&middot;
657 |             <span class="pl">&nbsp;(
658 |                 
659 |                     <a href="https://movie.douban.com/celebrity/1396354/fans" target="_self">全部</a>
660 |                 ) </span>
661 |     </h2>
662 | </div>
663 |     
664 | <ul class="list-s">
665 | </ul>
666 | 
667 | </div>
668 | 
669 | <div id="dale_movie_celebrity_middle_mini"></div>
670 | 
671 |             </div>
672 |             <div class="extra">
673 |                 
674 |     <div id="dale_movie_celebrity_bottom"></div>
675 | 
676 |             </div>
677 |         </div>
678 |     </div>
679 | 
680 |         
681 |     <div id="footer">
682 |             <div class="footer-extra"></div>
683 |         
684 | <span id="icp" class="fleft gray-link">
685 |     &copy; 2005－2019 douban.com, all rights reserved 北京豆网科技有限公司
686 | </span>
687 | 
688 | <a href="https://www.douban.com/hnypt/variformcyst.py" style="display: none;"></a>
689 | 
690 | <span class="fright">
691 |     <a href="https://www.douban.com/about">关于豆瓣</a>
692 |     · <a href="https://www.douban.com/jobs">在豆瓣工作</a>
693 |     · <a href="https://www.douban.com/about?topic=contactus">联系我们</a>
694 |     · <a href="https://www.douban.com/about?policy=disclaimer">免责声明</a>
695 |     
696 |     · <a href="https://help.douban.com/?app=movie" target="_blank">帮助中心</a>
697 |     · <a href="https://www.douban.com/doubanapp/">移动应用</a>
698 |     · <a href="https://www.douban.com/partner/">豆瓣广告</a>
699 | </span>
700 | 
701 |     </div>
702 | 
703 |     </div>
704 |     <!-- COLLECTED JS -->
705 |         
706 |         
707 |     <link rel="stylesheet" type="text/css" href="https://img3.doubanio.com/f/shire/8377b9498330a2e6f056d863987cc7a37eb4d486/css/ui/dialog.css" />
708 |     <link rel="stylesheet" type="text/css" href="https://img3.doubanio.com/f/movie/4aca95d66d37ec0712b3d19973b5d8feb75f2f05/css/movie/mod/reg_login_pop.css" />
709 |     <script type="text/javascript" src="https://img3.doubanio.com/f/shire/77323ae72a612bba8b65f845491513ff3329b1bb/js/do.js" data-cfg-autoload="false"></script>
710 |     <script type="text/javascript" src="https://img3.doubanio.com/f/shire/4ea3216519a6183c7bcd4f7d1a6d4fd57ce1a244/js/ui/dialog.js"></script>
711 |     <script type="text/javascript">
712 |         var HTTPS_DB='https://www.douban.com';
713 | var account_pop={open:function(o,e){e?referrer="?referrer="+encodeURIComponent(e):referrer="?referrer="+window.location.href;var n="",i="",t=448;n="用户登录",i="https://accounts.douban.com/passport/login_popup?source=movie";var r=document.location.protocol+"//"+document.location.hostname,a=dui.Dialog({width:340,title:n,height:t,cls:"account_pop",isHideTitle:!0,modal:!0,content:"<iframe scrolling='no' frameborder='0' width='340' height='"+t+"' src='"+i+"' name='"+r+"'></iframe>"},!0),c=a.node;if(c.undelegate(),c.delegate(".dui-dialog-close","click",function(){var o=$("body");o.find("#login_msk").hide(),o.find(".account_pop").remove()}),$(window).width()<478){var d="";"reg"===o?d=HTTPS_DB+"/accounts/register"+referrer:"login"===o&&(d=HTTPS_DB+"/accounts/login"+referrer),window.location.href=d}else a.open();$(window).bind("message",function(o){"https://accounts.douban.com"===o.originalEvent.origin&&(c.find("iframe").css("height",o.originalEvent.data),c.height(o.originalEvent.data),a.update())})}};Douban&&Douban.init_show_login&&(Douban.init_show_login=function(o){var e=$(o);e.click(function(){var o=e.data("ref")||"";return account_pop.open("login",o),!1})}),Do(function(){$("body").delegate(".pop_register","click",function(o){o.preventDefault();var e=$(this).data("ref")||"";return account_pop.open("reg",e),!1}),$("body").delegate(".pop_login","click",function(o){o.preventDefault();var e=$(this).data("ref")||"";return account_pop.open("login",e),!1})});
714 |     </script>
715 | 
716 |     
717 | <!-- douban ad begin -->
718 | 
719 | 
720 | 
721 | 
722 | 
723 |     
724 | <script type="text/javascript">
725 |     (function (global) {
726 |         var newNode = global.document.createElement('script'),
727 |             existingNode = global.document.getElementsByTagName('script')[0],
728 |             adSource = '//erebor.douban.com/',
729 |             userId = '',
730 |             browserId = 'oPXQ4Tiv_Ig',
731 |             criteria = '3:/celebrity/1396354/',
732 |             preview = '',
733 |             debug = false,
734 |             adSlots = ['dale_movie_celebrity_top_right', 'dale_movie_celebrity_bottom', 'dale_movie_celebrity_middle_mini'];
735 | 
736 |         global.DoubanAdRequest = {src: adSource, uid: userId, bid: browserId, crtr: criteria, prv: preview, debug: debug};
737 |         global.DoubanAdSlots = (global.DoubanAdSlots || []).concat(adSlots);
738 | 
739 |         newNode.setAttribute('type', 'text/javascript');
740 |         newNode.setAttribute('src', 'https://img3.doubanio.com/f/adjs/d94309c97a130c8c6be446ca7b35add7cce9b84e/ad.release.js');
741 |         newNode.setAttribute('async', true);
742 |         existingNode.parentNode.insertBefore(newNode, existingNode);
743 |     })(this);
744 | </script>
745 | 
746 | 
747 | 
748 | 
749 | 
750 | 
751 | 
752 | 
753 | 
754 | 
755 | <!-- douban ad end -->
756 | 
757 |     <script type="text/javascript" src="https://img3.doubanio.com/f/shire/1aa9d13e2c2ebdbd027307b5f91550cbd4ec2279/js/lib/jquery.ba-hashchange.min.js"></script>
758 |     <script type="text/javascript">
759 |         $(function(){
760 |             $(window).hashchange();
761 |             if(window.location.href.indexOf("?")>0){
762 |                 if(window.history.pushState){
763 |                     window.history.pushState(null, document.title,  window.location.href.substring(0,window.location.href.indexOf("?")));
764 |                 }
765 |             }
766 |         });
767 |     </script>
768 | 
769 | 
770 |     
771 |   
772 | 
773 | 
774 | 
775 | 
776 | 
777 | 
778 | 
779 | 
780 | 
781 | <script type="text/javascript">
782 | var _paq = _paq || [];
783 | _paq.push(['trackPageView']);
784 | _paq.push(['enableLinkTracking']);
785 | (function() {
786 |     var p=(('https:' == document.location.protocol) ? 'https' : 'http'), u=p+'://fundin.douban.com/';
787 |     _paq.push(['setTrackerUrl', u+'piwik']);
788 |     _paq.push(['setSiteId', '100001']);
789 |     var d=document, g=d.createElement('script'), s=d.getElementsByTagName('script')[0];
790 |     g.type='text/javascript';
791 |     g.defer=true;
792 |     g.async=true;
793 |     g.src=p+'://img3.doubanio.com/dae/fundin/piwik.js';
794 |     s.parentNode.insertBefore(g,s);
795 | })();
796 | </script>
797 | 
798 | <script type="text/javascript">
799 | var setMethodWithNs = function(namespace) {
800 |   var ns = namespace ? namespace + '.' : ''
801 |     , fn = function(string) {
802 |         if(!ns) {return string}
803 |         return ns + string
804 |       }
805 |   return fn
806 | }
807 | 
808 | var gaWithNamespace = function(fn, namespace) {
809 |   var method = setMethodWithNs(namespace)
810 |   fn.call(this, method)
811 | }
812 | 
813 | var _gaq = _gaq || []
814 |   , accounts = [
815 |       { id: 'UA-7019765-1', namespace: 'douban' }
816 |     , { id: 'UA-7019765-19', namespace: '' }
817 |     ]
818 |   , gaInit = function(account) {
819 |       gaWithNamespace(function(method) {
820 |         gaInitFn.call(this, method, account)
821 |       }, account.namespace)
822 |     }
823 |   , gaInitFn = function(method, account) {
824 |       _gaq.push([method('_setAccount'), account.id]);
825 |       _gaq.push([method('_setSampleRate'), '5']);
826 | 
827 |       
828 |   _gaq.push([method('_addOrganic'), 'google', 'q'])
829 |   _gaq.push([method('_addOrganic'), 'baidu', 'wd'])
830 |   _gaq.push([method('_addOrganic'), 'soso', 'w'])
831 |   _gaq.push([method('_addOrganic'), 'youdao', 'q'])
832 |   _gaq.push([method('_addOrganic'), 'so.360.cn', 'q'])
833 |   _gaq.push([method('_addOrganic'), 'sogou', 'query'])
834 |   if (account.namespace) {
835 |     _gaq.push([method('_addIgnoredOrganic'), '豆瓣'])
836 |     _gaq.push([method('_addIgnoredOrganic'), 'douban'])
837 |     _gaq.push([method('_addIgnoredOrganic'), '豆瓣网'])
838 |     _gaq.push([method('_addIgnoredOrganic'), 'www.douban.com'])
839 |   }
840 | 
841 |       if (account.namespace === 'douban') {
842 |         _gaq.push([method('_setDomainName'), '.douban.com'])
843 |       }
844 | 
845 |         _gaq.push([method('_setCustomVar'), 1, 'responsive_view_mode', 'desktop', 3])
846 | 
847 |         _gaq.push([method('_setCustomVar'), 2, 'login_status', '0', 2]);
848 | 
849 |       _gaq.push([method('_trackPageview')])
850 |     }
851 | 
852 | for(var i = 0, l = accounts.length; i < l; i++) {
853 |   var account = accounts[i]
854 |   gaInit(account)
855 | }
856 | 
857 | 
858 | ;(function() {
859 |     var ga = document.createElement('script');
860 |     ga.src = ('https:' == document.location.protocol ? 'https://ssl' : 'http://www') + '.google-analytics.com/ga.js';
861 |     ga.setAttribute('async', 'true');
862 |     document.documentElement.firstChild.appendChild(ga);
863 | })()
864 | </script>
865 | 
866 | 
867 | 
868 | 
869 | 
870 | 
871 | 
872 | 
873 |       
874 |     
875 | 
876 |     <!-- brand57-docker-->
877 | 
878 |   <script>_SPLITTEST=''</script>
879 | </body>
880 | 
881 | </html>
882 | 
883 | 
884 | 


--------------------------------------------------------------------------------
/爬虫与网页分析/data/1404079:
--------------------------------------------------------------------------------
  1 | <!DOCTYPE html>
  2 | <html lang="zh-cmn-Hans" class="">
  3 | <head>
  4 |     <meta http-equiv="Content-Type" content="text/html; charset=utf-8">
  5 |     <meta name="renderer" content="webkit">
  6 |     <meta name="referrer" content="always">
  7 |     <meta name="google-site-verification" content="ok0wCgT20tBBgo9_zat2iAcimtN4Ftf5ccsh092Xeyw" />
  8 |     <title>
  9 |     托马斯·埃尔姆斯 (豆瓣)
 10 | </title>
 11 |     
 12 |     <meta name="baidu-site-verification" content="cZdR4xxR7RxmM4zE" />
 13 |     <meta http-equiv="Pragma" content="no-cache">
 14 |     <meta http-equiv="Expires" content="Sun, 6 Mar 2005 01:00:00 GMT">
 15 |     
 16 |     <meta name="keywords" content="托马斯·埃尔姆斯 Thomas Elms,简介,个人资料,图片,电影作品,获奖情况,合作影人"/>
 17 |     <meta name="description" content="托马斯·埃尔姆斯简介、图片写真、获奖情况及电影作品一览"/>
 18 | 
 19 |     <link rel="apple-touch-icon" href="https://img3.doubanio.com/f/movie/d59b2715fdea4968a450ee5f6c95c7d7a2030065/pics/movie/apple-touch-icon.png">
 20 |     <link href="https://img3.doubanio.com/f/shire/bf61b1fa02f564a4a8f809da7c7179b883a56146/css/douban.css" rel="stylesheet" type="text/css">
 21 |     <link href="https://img3.doubanio.com/f/shire/ae3f5a3e3085968370b1fc63afcecb22d3284848/css/separation/_all.css" rel="stylesheet" type="text/css">
 22 |     <link href="https://img3.doubanio.com/f/movie/8864d3756094f5272d3c93e30ee2e324665855b0/css/movie/base/init.css" rel="stylesheet">
 23 |     <script type="text/javascript">var _head_start = new Date();</script>
 24 |     <script type="text/javascript" src="https://img3.doubanio.com/f/movie/0495cb173e298c28593766009c7b0a953246c5b5/js/movie/lib/jquery.js"></script>
 25 |     <script type="text/javascript" src="https://img3.doubanio.com/f/shire/d07bddd8077c84e11c8e93a31a024ef714f938fc/js/douban.js"></script>
 26 |     <script type="text/javascript" src="https://img3.doubanio.com/f/shire/0efdc63b77f895eaf85281fb0e44d435c6239a3f/js/separation/_all.js"></script>
 27 |     
 28 | <link rel="alternate" href="android-app://com.douban.movie/doubanmovie/celebrity/1404079/" />
 29 | <link rel="stylesheet" type="text/css" href="https://img3.doubanio.com/f/movie/5c131bbdf002cf3c8ffada7c52640f616de723ce/css/movie/celebrity.css" />
 30 | <link rel="stylesheet" type="text/css" href="https://img3.doubanio.com/f/movie/a6d2e5a112ca7dfc3f4e023e67d1fdb398a0d819/css/movie/show.css" />
 31 | <script type="text/javascript" src="https://img3.doubanio.com/f/shire/77323ae72a612bba8b65f845491513ff3329b1bb/js/do.js" data-cfg-autoload="false"></script>
 32 | 
 33 |     <style type="text/css">
 34 | .mod { margin-bottom: 25px }
 35 | .mod .hd { margin-bottom: 10px }
 36 | fieldset {border:1px solid #DDDDDD;margin-bottom:15px;padding:0 10px;}
 37 | .mod h2{color:#333d48;}
 38 | #headline img { width: 135px;}
 39 | </style>
 40 |     <style type="text/css">img { max-width: 100%; }</style>
 41 |     <script type="text/javascript"></script>
 42 |     <style type="text/css">
 43 | #opt-bar .a-btn {
 44 |     color: #000; background: #fff3e7; border: 1px solid #e7d7ca; padding: 0; outline: none; float:left; height:22px;
 45 |     line-height:22px; width:46px; font:12px Helvetica,Arial,sans-serif;cursor: pointer;
 46 | }
 47 | 
 48 |         .minisubmit{ color:#bbb; vertical-align: baseline; }
 49 |         .minisubmit:hover { color:#fff;background-color:#bbb;}
 50 |     </style>
 51 | 
 52 |     <link rel="shortcut icon" href="https://img3.doubanio.com/favicon.ico" type="image/x-icon">
 53 | </head>
 54 | 
 55 | <body>
 56 |   
 57 |     <script type="text/javascript">var _body_start = new Date();</script>
 58 | 
 59 |     
 60 |     
 61 | 
 62 | 
 63 | 
 64 |     <link href="//img3.doubanio.com/dae/accounts/resources/0246c88/shire/bundle.css" rel="stylesheet" type="text/css">
 65 | 
 66 | 
 67 | 
 68 | <div id="db-global-nav" class="global-nav">
 69 |   <div class="bd">
 70 |     
 71 | <div class="top-nav-info">
 72 |   <a href="https://accounts.douban.com/passport/login?source=movie" class="nav-login" rel="nofollow">登录/注册</a>
 73 | </div>
 74 | 
 75 | 
 76 |     <div class="top-nav-doubanapp">
 77 |   <a href="https://www.douban.com/doubanapp/app?channel=top-nav" class="lnk-doubanapp">下载豆瓣客户端</a>
 78 |   <div id="doubanapp-tip">
 79 |     <a href="https://www.douban.com/doubanapp/app?channel=qipao" class="tip-link">豆瓣 <span class="version">6.0</span> 全新发布</a>
 80 |     <a href="javascript: void 0;" class="tip-close">×</a>
 81 |   </div>
 82 |   <div id="top-nav-appintro" class="more-items">
 83 |     <p class="appintro-title">豆瓣</p>
 84 |     <p class="qrcode">扫码直接下载</p>
 85 |     <div class="download">
 86 |       <a href="https://www.douban.com/doubanapp/redirect?channel=top-nav&direct_dl=1&download=iOS">iPhone</a>
 87 |       <span>·</span>
 88 |       <a href="https://www.douban.com/doubanapp/redirect?channel=top-nav&direct_dl=1&download=Android" class="download-android">Android</a>
 89 |     </div>
 90 |   </div>
 91 | </div>
 92 | 
 93 |     
 94 | 
 95 | 
 96 | <div class="global-nav-items">
 97 |   <ul>
 98 |     <li class="">
 99 |       <a href="https://www.douban.com" target="_blank" data-moreurl-dict="{&quot;from&quot;:&quot;top-nav-click-main&quot;,&quot;uid&quot;:&quot;0&quot;}">豆瓣</a>
100 |     </li>
101 |     <li class="">
102 |       <a href="https://book.douban.com" target="_blank" data-moreurl-dict="{&quot;from&quot;:&quot;top-nav-click-book&quot;,&quot;uid&quot;:&quot;0&quot;}">读书</a>
103 |     </li>
104 |     <li class="on">
105 |       <a href="https://movie.douban.com"  data-moreurl-dict="{&quot;from&quot;:&quot;top-nav-click-movie&quot;,&quot;uid&quot;:&quot;0&quot;}">电影</a>
106 |     </li>
107 |     <li class="">
108 |       <a href="https://music.douban.com" target="_blank" data-moreurl-dict="{&quot;from&quot;:&quot;top-nav-click-music&quot;,&quot;uid&quot;:&quot;0&quot;}">音乐</a>
109 |     </li>
110 |     <li class="">
111 |       <a href="https://www.douban.com/location" target="_blank" data-moreurl-dict="{&quot;from&quot;:&quot;top-nav-click-location&quot;,&quot;uid&quot;:&quot;0&quot;}">同城</a>
112 |     </li>
113 |     <li class="">
114 |       <a href="https://www.douban.com/group" target="_blank" data-moreurl-dict="{&quot;from&quot;:&quot;top-nav-click-group&quot;,&quot;uid&quot;:&quot;0&quot;}">小组</a>
115 |     </li>
116 |     <li class="">
117 |       <a href="https://read.douban.com&#47;?dcs=top-nav&amp;dcm=douban" target="_blank" data-moreurl-dict="{&quot;from&quot;:&quot;top-nav-click-read&quot;,&quot;uid&quot;:&quot;0&quot;}">阅读</a>
118 |     </li>
119 |     <li class="">
120 |       <a href="https://douban.fm&#47;?from_=shire_top_nav" target="_blank" data-moreurl-dict="{&quot;from&quot;:&quot;top-nav-click-fm&quot;,&quot;uid&quot;:&quot;0&quot;}">FM</a>
121 |     </li>
122 |     <li class="">
123 |       <a href="https://time.douban.com&#47;?dt_time_source=douban-web_top_nav" target="_blank" data-moreurl-dict="{&quot;from&quot;:&quot;top-nav-click-time&quot;,&quot;uid&quot;:&quot;0&quot;}">时间</a>
124 |     </li>
125 |     <li class="">
126 |       <a href="https://market.douban.com&#47;?utm_campaign=douban_top_nav&amp;utm_source=douban&amp;utm_medium=pc_web" target="_blank" data-moreurl-dict="{&quot;from&quot;:&quot;top-nav-click-market&quot;,&quot;uid&quot;:&quot;0&quot;}">豆品</a>
127 |     </li>
128 |     <li>
129 |       <a href="#more" class="bn-more"><span>更多</span></a>
130 |       <div class="more-items">
131 |         <table cellpadding="0" cellspacing="0">
132 |           <tbody>
133 |             <tr>
134 |               <td>
135 |                 <a href="https://ypy.douban.com" target="_blank" data-moreurl-dict="{&quot;from&quot;:&quot;top-nav-click-ypy&quot;,&quot;uid&quot;:&quot;0&quot;}">豆瓣摄影</a>
136 |               </td>
137 |             </tr>
138 |           </tbody>
139 |         </table>
140 |       </div>
141 |     </li>
142 |   </ul>
143 | </div>
144 | 
145 |   </div>
146 | </div>
147 | <script>
148 |   ;window._GLOBAL_NAV = {
149 |     DOUBAN_URL: "https://www.douban.com",
150 |     N_NEW_NOTIS: 0,
151 |     N_NEW_DOUMAIL: 0
152 |   };
153 | </script>
154 | 
155 | 
156 | 
157 |     <script src="//img3.doubanio.com/dae/accounts/resources/0246c88/shire/bundle.js" defer="defer"></script>
158 | 
159 | 
160 | 
161 | 
162 |     
163 | 
164 | 
165 | 
166 |     <link href="//img3.doubanio.com/dae/accounts/resources/0246c88/movie/bundle.css" rel="stylesheet" type="text/css">
167 | 
168 | 
169 | 
170 | 
171 | <div id="db-nav-movie" class="nav">
172 |   <div class="nav-wrap">
173 |   <div class="nav-primary">
174 |     <div class="nav-logo">
175 |       <a href="https:&#47;&#47;movie.douban.com">豆瓣电影</a>
176 |     </div>
177 |     <div class="nav-search">
178 |       <form action="https:&#47;&#47;movie.douban.com/subject_search" method="get">
179 |         <fieldset>
180 |           <legend>搜索：</legend>
181 |           <label for="inp-query">
182 |           </label>
183 |           <div class="inp"><input id="inp-query" name="search_text" size="22" maxlength="60" placeholder="搜索电影、电视剧、综艺、影人" value=""></div>
184 |           <div class="inp-btn"><input type="submit" value="搜索"></div>
185 |           <input type="hidden" name="cat" value="1002" />
186 |         </fieldset>
187 |       </form>
188 |     </div>
189 |   </div>
190 |   </div>
191 |   <div class="nav-secondary">
192 |     
193 | 
194 | <div class="nav-items">
195 |   <ul>
196 |     <li    ><a href="https://movie.douban.com/cinema/nowplaying/"
197 |      >影讯&购票</a>
198 |     </li>
199 |     <li    ><a href="https://movie.douban.com/explore"
200 |      >选电影</a>
201 |     </li>
202 |     <li    ><a href="https://movie.douban.com/tv/"
203 |      >电视剧</a>
204 |     </li>
205 |     <li    ><a href="https://movie.douban.com/chart"
206 |      >排行榜</a>
207 |     </li>
208 |     <li    ><a href="https://movie.douban.com/tag/"
209 |      >分类</a>
210 |     </li>
211 |     <li    ><a href="https://movie.douban.com/review/best/"
212 |      >影评</a>
213 |     </li>
214 |     <li    ><a href="https://movie.douban.com/annual/2018?source=navigation"
215 |      >2018年度榜单</a>
216 |     </li>
217 |     <li    ><a href="https://www.douban.com/standbyme/2018?source=navigation"
218 |      >2018书影音报告</a>
219 |     </li>
220 |   </ul>
221 | </div>
222 | 
223 |     <a href="https://movie.douban.com/annual/2018?source=movie_navigation" class="movieannual2018"></a>
224 |   </div>
225 | </div>
226 | 
227 | <script id="suggResult" type="text/x-jquery-tmpl">
228 |   <li data-link="{{= url}}">
229 |             <a href="{{= url}}" onclick="moreurl(this, {from:'movie_search_sugg', query:'{{= keyword }}', subject_id:'{{= id}}', i: '{{= index}}', type: '{{= type}}'})">
230 |             <img src="{{= img}}" width="40" />
231 |             <p>
232 |                 <em>{{= title}}</em>
233 |                 {{if year}}
234 |                     <span>{{= year}}</span>
235 |                 {{/if}}
236 |                 {{if sub_title}}
237 |                     <br /><span>{{= sub_title}}</span>
238 |                 {{/if}}
239 |                 {{if address}}
240 |                     <br /><span>{{= address}}</span>
241 |                 {{/if}}
242 |                 {{if episode}}
243 |                     {{if episode=="unknow"}}
244 |                         <br /><span>集数未知</span>
245 |                     {{else}}
246 |                         <br /><span>共{{= episode}}集</span>
247 |                     {{/if}}
248 |                 {{/if}}
249 |             </p>
250 |         </a>
251 |         </li>
252 |   </script>
253 | 
254 | 
255 | 
256 | 
257 |     <script src="//img3.doubanio.com/dae/accounts/resources/0246c88/movie/bundle.js" defer="defer"></script>
258 | 
259 | 
260 | 
261 | 
262 | 
263 |     
264 |     <div id="wrapper">
265 |         
266 | 
267 |         
268 |     <div id="content">
269 |         
270 |     <h1>托马斯·埃尔姆斯 Thomas Elms</h1>
271 | 
272 |         <div class="grid-16-8 clearfix">
273 |             
274 |             
275 |             <div class="article">
276 |                 
277 | 
278 | 
279 | 
280 | 
281 | <div id="headline" class="item">
282 |     <div class="pic">
283 |         <a class="nbg" title="托马斯·埃尔姆斯 Thomas Elms" href="https://img3.doubanio.com/view/celebrity/s_ratio_celebrity/public/pqElHbrENhp0cel_avatar_uploaded1541512139.45.jpg">
284 |             <img alt="托马斯·埃尔姆斯 Thomas Elms"
285 |                 title="点击看大图"
286 |                 src="https://img3.doubanio.com/view/celebrity/s_ratio_celebrity/public/pqElHbrENhp0cel_avatar_uploaded1541512139.45.jpg">
287 |         </a>
288 |         <span class="gact">
289 |             <a rel="nofollow" href="https://movie.douban.com/celebrity/1404079/detail_edit">增改描述、换头像</a>
290 |         </span>
291 |     </div>
292 |     <div class="info">
293 |         
294 |     <ul class="">
295 |     
296 |         <li>
297 |             <span>性别</span>: 
298 |         男
299 |         </li>
300 |     
301 |     
302 |     
303 |     
304 |         <li>
305 |             <span>职业</span>: 
306 |         演员
307 |         </li>
308 |     
309 |     
310 |     
311 |     
312 |         <li>
313 |             <span>imdb编号</span>: 
314 |         <a href="http://www.imdb.com/name/nm7402258" target="_blank">nm7402258</a>
315 |         </li>
316 |     
317 |     </ul>
318 | 
319 |     </div>
320 | </div>
321 | 
322 | 
323 | 
324 | 
325 | 
326 | 
327 | 
328 | 
329 | 
330 | 
331 | <div id="opt-bar" class="mod">
332 |     <div class="ll">
333 | 
334 |             
335 |     
336 |     <form method="POST" action="connect" class="miniform">
337 |         <input type="submit" class="a-btn j " value="收藏" title="" />
338 |     </form>
339 | 
340 | 
341 |         
342 |     
343 |     <span class="rec" id="None-1404079">
344 |     <a href= "#"
345 |         data-type="None"
346 |         data-url="https://movie.douban.com/celebrity/1404079/"
347 |         data-desc=""
348 |         data-title="影人 托马斯·埃尔姆斯 Thomas Elms (来自豆瓣)"
349 |         data-pic="https://img3.doubanio.com/view/celebrity/s_ratio_celebrity/public/pqElHbrENhp0cel_avatar_uploaded1541512139.45.jpeg"
350 |         class="bn-sharing ">
351 |         分享到
352 |     </a> &nbsp;&nbsp;
353 |     </span>
354 | 
355 |     <script>
356 |         if (!window.DoubanShareMenuList) {
357 |             window.DoubanShareMenuList = [];
358 |         }
359 |         var __cache_url = __cache_url || {};
360 | 
361 |         (function(u){
362 |             if(__cache_url[u]) return;
363 |             __cache_url[u] = true;
364 |             window.DoubanShareIcons = 'https://img3.doubanio.com/f/shire/d15ffd71f3f10a7210448fec5a68eaec66e7f7d0/pics/ic_shares.png';
365 | 
366 |             var initShareButton = function() {
367 |                 $.ajax({url:u,dataType:'script',cache:true});
368 |             };
369 | 
370 |             if (typeof Do == 'function' && 'ready' in Do) {
371 |                 Do(
372 |                     'https://img3.doubanio.com/f/shire/8377b9498330a2e6f056d863987cc7a37eb4d486/css/ui/dialog.css',
373 |                     'https://img3.doubanio.com/f/shire/4ea3216519a6183c7bcd4f7d1a6d4fd57ce1a244/js/ui/dialog.js',
374 |                     'https://img3.doubanio.com/f/movie/c4ab132ff4d3d64a83854c875ea79b8b541faf12/js/movie/lib/qrcode.min.js',
375 |                     initShareButton
376 |                 );
377 |             } else if(typeof Douban == 'object' && 'loader' in Douban) {
378 |                 Douban.loader.batch(
379 |                     'https://img3.doubanio.com/f/shire/8377b9498330a2e6f056d863987cc7a37eb4d486/css/ui/dialog.css',
380 |                     'https://img3.doubanio.com/f/shire/4ea3216519a6183c7bcd4f7d1a6d4fd57ce1a244/js/ui/dialog.js',
381 |                     'https://img3.doubanio.com/f/movie/c4ab132ff4d3d64a83854c875ea79b8b541faf12/js/movie/lib/qrcode.min.js'
382 |                 ).done(initShareButton);
383 |             }
384 | 
385 |         })('https://img3.doubanio.com/f/movie/32be6727ed3ad8f6c4a417d8a086355c3e7d1d27/js/movie/lib/sharebutton.js');
386 |     </script>
387 | 
388 | 
389 |     </div>
390 |     <div class="rr">
391 |         
392 | 
393 | 
394 | 
395 | 
396 |         
397 | 
398 | 
399 | 
400 | 
401 | 
402 | <div class="rec-sec">
403 | <span class="rec">
404 |     <script id="movie-share" type="text/x-html-snippet">
405 |         
406 |     <form class="movie-share" action="/j/share" method="POST">
407 |         <div class="clearfix form-bd">
408 |             <div class="input-area">
409 |                 <textarea name="text" class="share-text" cols="72" data-mention-api="https://api.douban.com/shuo/in/complete?alt=xd&amp;callback=?"></textarea>
410 |                 <input type="hidden" name="target-id" value="1404079">
411 |                 <input type="hidden" name="target-type" value="3">
412 |                 <input type="hidden" name="title" value="托马斯·埃尔姆斯 Thomas Elms">
413 |                 <input type="hidden" name="desc" value="千次伤我心 / 秘令">
414 |                 <input type="hidden" name="redir" value=""/>
415 |                 <div class="mentioned-highlighter"></div>
416 |             </div>
417 | 
418 |             <div class="info-area">
419 |                     <img class="media" src="https://img3.doubanio.com/view/celebrity/s_ratio_celebrity/public/pqElHbrENhp0cel_avatar_uploaded1541512139.45.jpg" />
420 |                 <strong>托马斯·埃尔姆斯 Thomas Elms</strong>
421 |                 <p>千次伤我心 / 秘令</p>
422 |                 <p class="error server-error">&nbsp;</p>
423 |             </div>
424 |         </div>
425 |         <div class="form-ft">
426 |             <div class="form-ft-inner">
427 |                 
428 | 
429 | 
430 | 
431 |                 <span class="avail-num-indicator">140</span>
432 |                 <span class="bn-flat">
433 |                     <input type="submit" value="推荐" />
434 |                 </span>
435 |             </div>
436 |         </div>
437 |     </form>
438 |     
439 |     <div id="suggest-mention-tmpl" style="display:none;">
440 |         <ul>
441 |             {{#users}}
442 |             <li id="{{uid}}">
443 |               <img src="{{avatar}}">{{{username}}}&nbsp;<span>({{{uid}}})</span>
444 |             </li>
445 |             {{/users}}
446 |         </ul>
447 |     </div>
448 | 
449 | 
450 |     </script>
451 | 
452 |         
453 |         <a href="/accounts/register?reason=recommend"  class="j a_show_login lnk-sharing" share-id="1404079" data-mode="plain" data-name="托马斯·埃尔姆斯 Thomas Elms" data-type="" data-desc="千次伤我心 / 秘令" data-href="https://movie.douban.com/celebrity/1404079/" data-image="https://img3.doubanio.com/view/celebrity/s_ratio_celebrity/public/pqElHbrENhp0cel_avatar_uploaded1541512139.45.jpg" data-properties="{}" data-redir="" data-text="" data-apikey="" data-curl="" data-count="10" data-object_kind="1043" data-object_id="1404079" data-target_type="rec" data-target_action="0" data-action_props="{&#34;actor_url&#34;:&#34;https:\/\/movie.douban.com\/celebrity\/1404079\/&#34;,&#34;actor_name&#34;:&#34;托马斯·埃尔姆斯 Thomas Elms&#34;}">推荐</a>
454 | </span>
455 | 
456 | 
457 | </div>
458 | 
459 | 
460 | 
461 | 
462 | 
463 |     </div>
464 | </div>
465 | 
466 | 
467 | 
468 | 
469 | 
470 | 
471 | 
472 | 
473 | 
474 | 
475 | 
476 | 
477 | <div id="intro" class="mod">
478 |     <div class="hd">
479 |     <h2>
480 |         影人简介
481 |             &nbsp;&middot;&nbsp;&middot;&nbsp;&middot;&nbsp;&middot;&nbsp;&middot;&nbsp;&middot;
482 |     </h2>
483 | </div>
484 |     <div class="bd">
485 |             　　
486 |     </div>
487 | </div>
488 | 
489 | 
490 | 
491 | <div id="photos" class="mod">
492 |     <div class="hd">
493 |     <h2>
494 |         影人图片
495 |             &nbsp;&middot;&nbsp;&middot;&nbsp;&middot;&nbsp;&middot;&nbsp;&middot;&nbsp;&middot;
496 |             <span class="pl">&nbsp;(
497 |                 
498 |                 <a href="https://movie.douban.com/celebrity/1404079/photos/" target="_self">全部8张</a>&nbsp;&middot;&nbsp;<a href="https://movie.douban.com/celebrity/1404079/photos/upload" target="_self">上传照片</a>
499 |                 ) </span>
500 |     </h2>
501 | </div>
502 |     
503 | <ul class="pic-col5">
504 |         <li>
505 |             <a href="https://movie.douban.com/celebrity/1404079/photo/2544006557/">
506 |                 <img src="https://img1.doubanio.com/view/photo/sqxs/public/p2544006557.jpg">
507 |         </a>
508 |     </li>
509 |         <li>
510 |             <a href="https://movie.douban.com/celebrity/1404079/photo/2544006523/">
511 |                 <img src="https://img3.doubanio.com/view/photo/sqxs/public/p2544006523.jpg">
512 |         </a>
513 |     </li>
514 |         <li>
515 |             <a href="https://movie.douban.com/celebrity/1404079/photo/2544006522/">
516 |                 <img src="https://img3.doubanio.com/view/photo/sqxs/public/p2544006522.jpg">
517 |         </a>
518 |     </li>
519 |         <li>
520 |             <a href="https://movie.douban.com/celebrity/1404079/photo/2544006520/">
521 |                 <img src="https://img3.doubanio.com/view/photo/sqxs/public/p2544006520.jpg">
522 |         </a>
523 |     </li>
524 |         <li class="last">
525 |             <a href="https://movie.douban.com/celebrity/1404079/photo/2544006519/">
526 |                 <img src="https://img1.doubanio.com/view/photo/sqxs/public/p2544006519.jpg">
527 |         </a>
528 |     </li>
529 | </ul>
530 | 
531 | </div>
532 | 
533 | 
534 | 
535 | 
536 | 
537 | 
538 | 
539 | 
540 | 
541 | 
542 | <div id="recent_movies" class="mod">
543 |     <div class="hd">
544 |         
545 |     <h2>
546 |         最近的5部作品（已上映）
547 |             &nbsp;&middot;&nbsp;&middot;&nbsp;&middot;&nbsp;&middot;&nbsp;&middot;&nbsp;&middot;
548 |             <span class="pl">&nbsp;(
549 |                 
550 |                     <a href="https://movie.douban.com/celebrity/1404079/movies?sortby=time&amp;format=pic" target="_self">全部</a>
551 |                 ) </span>
552 |     </h2>
553 | 
554 |     </div>
555 |         <div class="bd">
556 |             <ul class="list-s">
557 |                     <li class="">
558 |                             <h3>2019</h3>
559 |                         <div class="pic">
560 |                             <a href="https://movie.douban.com/subject/30200428/">
561 |                                 <img class=""
562 |                                     alt="秘令 The Order"
563 |                                     src="https://img1.doubanio.com/view/photo/s_ratio_poster/public/p2548831619.jpg"
564 |                                     title="The Order"/>
565 |                             </a>
566 |                         </div>
567 |                         <div class="info">
568 |                             <a href="https://movie.douban.com/subject/30200428/" title="秘令" class="">秘令</a>
569 |                                 <em>5.3</em>
570 |                         </div>
571 |                     </li>
572 |                     <li class="">
573 |                             <h3>2018</h3>
574 |                         <div class="pic">
575 |                             <a href="https://movie.douban.com/subject/26377050/">
576 |                                 <img class=""
577 |                                     alt="千次伤我心 I Still See You"
578 |                                     src="https://img3.doubanio.com/view/photo/s_ratio_poster/public/p2536762001.jpg"
579 |                                     title="I Still See You"/>
580 |                             </a>
581 |                         </div>
582 |                         <div class="info">
583 |                             <a href="https://movie.douban.com/subject/26377050/" title="千次伤我心" class="">千次伤我心</a>
584 |                                 <em>6.4</em>
585 |                         </div>
586 |                     </li>
587 |             </ul>
588 |         </div>
589 | </div>
590 | 
591 | 
592 | 
593 |     
594 | 
595 | <div id="best_movies" class="mod">
596 |     <div class="hd">
597 |     <h2>
598 |         最受好评的5部作品
599 |             &nbsp;&middot;&nbsp;&middot;&nbsp;&middot;&nbsp;&middot;&nbsp;&middot;&nbsp;&middot;
600 |             <span class="pl">&nbsp;(
601 |                 
602 |                     <a href="https://movie.douban.com/celebrity/1404079/movies?sortby=vote&amp;format=pic" target="_self">全部</a>
603 |                 ) </span>
604 |     </h2>
605 | </div>
606 |     <div class="bd">
607 |         <ul class="list-s">
608 |             <li class="">
609 |                 <div class="pic">
610 |                     <a href="https://movie.douban.com/subject/26377050/">
611 |                         <img alt="千次伤我心 I Still See You"
612 |                             src="https://img3.doubanio.com/view/photo/s_ratio_poster/public/p2536762001.jpg"
613 |                             title="I Still See You"
614 |                             class="" />
615 |                     </a>
616 |                 </div>
617 |                 <div class="info">
618 |                     <a href="https://movie.douban.com/subject/26377050/" title="千次伤我心" class="">千次伤我心</a>
619 |                     <em class="">6.4</em>
620 |                         <div class="pl">2018</div>
621 |                 </div>
622 |             </li>
623 |             <li class="">
624 |                 <div class="pic">
625 |                     <a href="https://movie.douban.com/subject/30200428/">
626 |                         <img alt="秘令 The Order"
627 |                             src="https://img1.doubanio.com/view/photo/s_ratio_poster/public/p2548831619.jpg"
628 |                             title="The Order"
629 |                             class="" />
630 |                     </a>
631 |                 </div>
632 |                 <div class="info">
633 |                     <a href="https://movie.douban.com/subject/30200428/" title="秘令" class="">秘令</a>
634 |                     <em class="">5.3</em>
635 |                         <div class="pl">2019</div>
636 |                 </div>
637 |             </li>
638 |         </ul>
639 |     </div>
640 | </div>
641 | 
642 | 
643 | 
644 | 
645 | 
646 | 
647 |             </div>
648 |             <div class="aside">
649 |                 
650 | 
651 | 
652 | 
653 | 
654 | 
655 | 
656 | 
657 | 
658 | 
659 | 
660 | <!-- douban ad begin -->
661 | <div id="dale_movie_celebrity_top_right"></div>
662 | <!-- douban ad end -->
663 | 
664 | 
665 | 
666 | 
667 | <div id="fans" class="mod">
668 |     <div class="hd">
669 |     <h2>
670 |         托马斯·埃尔姆斯的影迷（7）
671 |             &nbsp;&middot;&nbsp;&middot;&nbsp;&middot;&nbsp;&middot;&nbsp;&middot;&nbsp;&middot;
672 |             <span class="pl">&nbsp;(
673 |                 
674 |                     <a href="https://movie.douban.com/celebrity/1404079/fans" target="_self">全部</a>
675 |                 ) </span>
676 |     </h2>
677 | </div>
678 |     
679 | <ul class="list-s">
680 |     <li class="">
681 |         <a class="nbg" href="https://www.douban.com/people/68613203/">
682 |             <img class="" alt="B.L.P" src="https://img1.doubanio.com/icon/user_normal.jpg">
683 |         </a>
684 |         <span><a class="" href="https://www.douban.com/people/68613203/">B.L.P</a></span>
685 |     </li>
686 |     <li class="">
687 |         <a class="nbg" href="https://www.douban.com/people/OkamiForever/">
688 |             <img class="" alt="Ōkami WTF" src="https://img1.doubanio.com/icon/u166972996-8.jpg">
689 |         </a>
690 |         <span><a class="" href="https://www.douban.com/people/OkamiForever/">Ōkami WTF</a></span>
691 |     </li>
692 |     <li class="">
693 |         <a class="nbg" href="https://www.douban.com/people/168929700/">
694 |             <img class="" alt="LYOOYL" src="https://img3.doubanio.com/icon/u168929700-1.jpg">
695 |         </a>
696 |         <span><a class="" href="https://www.douban.com/people/168929700/">LYOOYL</a></span>
697 |     </li>
698 |     <li class="">
699 |         <a class="nbg" href="https://www.douban.com/people/92775477/">
700 |             <img class="" alt="资深低阶路人" src="https://img1.doubanio.com/icon/u92775477-107.jpg">
701 |         </a>
702 |         <span><a class="" href="https://www.douban.com/people/92775477/">资深低阶路人</a></span>
703 |     </li>
704 |     <li class="">
705 |         <a class="nbg" href="https://www.douban.com/people/dannyshawcn/">
706 |             <img class="" alt="咕咕day" src="https://img3.doubanio.com/icon/u53085212-3.jpg">
707 |         </a>
708 |         <span><a class="" href="https://www.douban.com/people/dannyshawcn/">咕咕day</a></span>
709 |     </li>
710 |     <li class="">
711 |         <a class="nbg" href="https://www.douban.com/people/penguinman/">
712 |             <img class="" alt="微尘" src="https://img1.doubanio.com/icon/u1972010-27.jpg">
713 |         </a>
714 |         <span><a class="" href="https://www.douban.com/people/penguinman/">微尘</a></span>
715 |     </li>
716 |     <li class="">
717 |         <a class="nbg" href="https://www.douban.com/people/172781112/">
718 |             <img class="" alt="lifi" src="https://img1.doubanio.com/icon/user_normal.jpg">
719 |         </a>
720 |         <span><a class="" href="https://www.douban.com/people/172781112/">lifi</a></span>
721 |     </li>
722 | </ul>
723 | 
724 | </div>
725 | 
726 | <div id="dale_movie_celebrity_middle_mini"></div>
727 | 
728 |             </div>
729 |             <div class="extra">
730 |                 
731 |     <div id="dale_movie_celebrity_bottom"></div>
732 | 
733 |             </div>
734 |         </div>
735 |     </div>
736 | 
737 |         
738 |     <div id="footer">
739 |             <div class="footer-extra"></div>
740 |         
741 | <span id="icp" class="fleft gray-link">
742 |     &copy; 2005－2019 douban.com, all rights reserved 北京豆网科技有限公司
743 | </span>
744 | 
745 | <a href="https://www.douban.com/hnypt/variformcyst.py" style="display: none;"></a>
746 | 
747 | <span class="fright">
748 |     <a href="https://www.douban.com/about">关于豆瓣</a>
749 |     · <a href="https://www.douban.com/jobs">在豆瓣工作</a>
750 |     · <a href="https://www.douban.com/about?topic=contactus">联系我们</a>
751 |     · <a href="https://www.douban.com/about?policy=disclaimer">免责声明</a>
752 |     
753 |     · <a href="https://help.douban.com/?app=movie" target="_blank">帮助中心</a>
754 |     · <a href="https://www.douban.com/doubanapp/">移动应用</a>
755 |     · <a href="https://www.douban.com/partner/">豆瓣广告</a>
756 | </span>
757 | 
758 |     </div>
759 | 
760 |     </div>
761 |     <!-- COLLECTED JS -->
762 |         
763 |         
764 |     <link rel="stylesheet" type="text/css" href="https://img3.doubanio.com/f/shire/8377b9498330a2e6f056d863987cc7a37eb4d486/css/ui/dialog.css" />
765 |     <link rel="stylesheet" type="text/css" href="https://img3.doubanio.com/f/movie/4aca95d66d37ec0712b3d19973b5d8feb75f2f05/css/movie/mod/reg_login_pop.css" />
766 |     <script type="text/javascript" src="https://img3.doubanio.com/f/shire/77323ae72a612bba8b65f845491513ff3329b1bb/js/do.js" data-cfg-autoload="false"></script>
767 |     <script type="text/javascript" src="https://img3.doubanio.com/f/shire/4ea3216519a6183c7bcd4f7d1a6d4fd57ce1a244/js/ui/dialog.js"></script>
768 |     <script type="text/javascript">
769 |         var HTTPS_DB='https://www.douban.com';
770 | var account_pop={open:function(o,e){e?referrer="?referrer="+encodeURIComponent(e):referrer="?referrer="+window.location.href;var n="",i="",t=448;n="用户登录",i="https://accounts.douban.com/passport/login_popup?source=movie";var r=document.location.protocol+"//"+document.location.hostname,a=dui.Dialog({width:340,title:n,height:t,cls:"account_pop",isHideTitle:!0,modal:!0,content:"<iframe scrolling='no' frameborder='0' width='340' height='"+t+"' src='"+i+"' name='"+r+"'></iframe>"},!0),c=a.node;if(c.undelegate(),c.delegate(".dui-dialog-close","click",function(){var o=$("body");o.find("#login_msk").hide(),o.find(".account_pop").remove()}),$(window).width()<478){var d="";"reg"===o?d=HTTPS_DB+"/accounts/register"+referrer:"login"===o&&(d=HTTPS_DB+"/accounts/login"+referrer),window.location.href=d}else a.open();$(window).bind("message",function(o){"https://accounts.douban.com"===o.originalEvent.origin&&(c.find("iframe").css("height",o.originalEvent.data),c.height(o.originalEvent.data),a.update())})}};Douban&&Douban.init_show_login&&(Douban.init_show_login=function(o){var e=$(o);e.click(function(){var o=e.data("ref")||"";return account_pop.open("login",o),!1})}),Do(function(){$("body").delegate(".pop_register","click",function(o){o.preventDefault();var e=$(this).data("ref")||"";return account_pop.open("reg",e),!1}),$("body").delegate(".pop_login","click",function(o){o.preventDefault();var e=$(this).data("ref")||"";return account_pop.open("login",e),!1})});
771 |     </script>
772 | 
773 |     
774 | <!-- douban ad begin -->
775 | 
776 | 
777 | 
778 | 
779 | 
780 |     
781 | <script type="text/javascript">
782 |     (function (global) {
783 |         var newNode = global.document.createElement('script'),
784 |             existingNode = global.document.getElementsByTagName('script')[0],
785 |             adSource = '//erebor.douban.com/',
786 |             userId = '',
787 |             browserId = 'ywWE5no66Y8',
788 |             criteria = '3:/celebrity/1404079/',
789 |             preview = '',
790 |             debug = false,
791 |             adSlots = ['dale_movie_celebrity_top_right', 'dale_movie_celebrity_bottom', 'dale_movie_celebrity_middle_mini'];
792 | 
793 |         global.DoubanAdRequest = {src: adSource, uid: userId, bid: browserId, crtr: criteria, prv: preview, debug: debug};
794 |         global.DoubanAdSlots = (global.DoubanAdSlots || []).concat(adSlots);
795 | 
796 |         newNode.setAttribute('type', 'text/javascript');
797 |         newNode.setAttribute('src', 'https://img3.doubanio.com/f/adjs/d94309c97a130c8c6be446ca7b35add7cce9b84e/ad.release.js');
798 |         newNode.setAttribute('async', true);
799 |         existingNode.parentNode.insertBefore(newNode, existingNode);
800 |     })(this);
801 | </script>
802 | 
803 | 
804 | 
805 | 
806 | 
807 | 
808 | 
809 | 
810 | 
811 | 
812 | <!-- douban ad end -->
813 | 
814 |     <script type="text/javascript" src="https://img3.doubanio.com/f/shire/1aa9d13e2c2ebdbd027307b5f91550cbd4ec2279/js/lib/jquery.ba-hashchange.min.js"></script>
815 |     <script type="text/javascript">
816 |         $(function(){
817 |             $(window).hashchange();
818 |             if(window.location.href.indexOf("?")>0){
819 |                 if(window.history.pushState){
820 |                     window.history.pushState(null, document.title,  window.location.href.substring(0,window.location.href.indexOf("?")));
821 |                 }
822 |             }
823 |         });
824 |     </script>
825 | 
826 | 
827 |     
828 |   
829 | 
830 | 
831 | 
832 | 
833 | 
834 | 
835 | 
836 | 
837 | 
838 | <script type="text/javascript">
839 | var _paq = _paq || [];
840 | _paq.push(['trackPageView']);
841 | _paq.push(['enableLinkTracking']);
842 | (function() {
843 |     var p=(('https:' == document.location.protocol) ? 'https' : 'http'), u=p+'://fundin.douban.com/';
844 |     _paq.push(['setTrackerUrl', u+'piwik']);
845 |     _paq.push(['setSiteId', '100001']);
846 |     var d=document, g=d.createElement('script'), s=d.getElementsByTagName('script')[0];
847 |     g.type='text/javascript';
848 |     g.defer=true;
849 |     g.async=true;
850 |     g.src=p+'://img3.doubanio.com/dae/fundin/piwik.js';
851 |     s.parentNode.insertBefore(g,s);
852 | })();
853 | </script>
854 | 
855 | <script type="text/javascript">
856 | var setMethodWithNs = function(namespace) {
857 |   var ns = namespace ? namespace + '.' : ''
858 |     , fn = function(string) {
859 |         if(!ns) {return string}
860 |         return ns + string
861 |       }
862 |   return fn
863 | }
864 | 
865 | var gaWithNamespace = function(fn, namespace) {
866 |   var method = setMethodWithNs(namespace)
867 |   fn.call(this, method)
868 | }
869 | 
870 | var _gaq = _gaq || []
871 |   , accounts = [
872 |       { id: 'UA-7019765-1', namespace: 'douban' }
873 |     , { id: 'UA-7019765-19', namespace: '' }
874 |     ]
875 |   , gaInit = function(account) {
876 |       gaWithNamespace(function(method) {
877 |         gaInitFn.call(this, method, account)
878 |       }, account.namespace)
879 |     }
880 |   , gaInitFn = function(method, account) {
881 |       _gaq.push([method('_setAccount'), account.id]);
882 |       _gaq.push([method('_setSampleRate'), '5']);
883 | 
884 |       
885 |   _gaq.push([method('_addOrganic'), 'google', 'q'])
886 |   _gaq.push([method('_addOrganic'), 'baidu', 'wd'])
887 |   _gaq.push([method('_addOrganic'), 'soso', 'w'])
888 |   _gaq.push([method('_addOrganic'), 'youdao', 'q'])
889 |   _gaq.push([method('_addOrganic'), 'so.360.cn', 'q'])
890 |   _gaq.push([method('_addOrganic'), 'sogou', 'query'])
891 |   if (account.namespace) {
892 |     _gaq.push([method('_addIgnoredOrganic'), '豆瓣'])
893 |     _gaq.push([method('_addIgnoredOrganic'), 'douban'])
894 |     _gaq.push([method('_addIgnoredOrganic'), '豆瓣网'])
895 |     _gaq.push([method('_addIgnoredOrganic'), 'www.douban.com'])
896 |   }
897 | 
898 |       if (account.namespace === 'douban') {
899 |         _gaq.push([method('_setDomainName'), '.douban.com'])
900 |       }
901 | 
902 |         _gaq.push([method('_setCustomVar'), 1, 'responsive_view_mode', 'desktop', 3])
903 | 
904 |         _gaq.push([method('_setCustomVar'), 2, 'login_status', '0', 2]);
905 | 
906 |       _gaq.push([method('_trackPageview')])
907 |     }
908 | 
909 | for(var i = 0, l = accounts.length; i < l; i++) {
910 |   var account = accounts[i]
911 |   gaInit(account)
912 | }
913 | 
914 | 
915 | ;(function() {
916 |     var ga = document.createElement('script');
917 |     ga.src = ('https:' == document.location.protocol ? 'https://ssl' : 'http://www') + '.google-analytics.com/ga.js';
918 |     ga.setAttribute('async', 'true');
919 |     document.documentElement.firstChild.appendChild(ga);
920 | })()
921 | </script>
922 | 
923 | 
924 | 
925 | 
926 | 
927 | 
928 | 
929 | 
930 |       
931 |     
932 | 
933 |     <!-- brand38-docker-->
934 | 
935 |   <script>_SPLITTEST=''</script>
936 | </body>
937 | 
938 | </html>
939 | 
940 | 
941 | 


--------------------------------------------------------------------------------
/爬虫与网页分析/网页解析.py:
--------------------------------------------------------------------------------
  1 | # --coding:utf-8--
  2 | # Author:Clark Xu, Hang Shang
  3 | # Time:2019/3/20 16:42
  4 | # # -*- coding: utf-8 -*-
  5 | import requests
  6 | import os
  7 | import time
  8 | import sys
  9 | 
 10 | reload(sys)
 11 | sys.setdefaultencoding('utf8')
 12 | 
 13 | 
 14 | from bs4 import BeautifulSoup
 15 | import json
 16 | 
 17 | import os
 18 | current_path = sys.path[0]
 19 | if 'final_result' in os.listdir(current_path):
 20 |     os.remove('final_result')
 21 | def name_extract(soup1):
 22 |     '''
 23 |     Inputs:
 24 |         soup1 : the source code BeatifulSoup data format
 25 | 
 26 |     Returns:
 27 |         actor name
 28 |     '''
 29 |     tmp_info = soup1.find("h1")
 30 |     return "" if tmp_info == None else soup1.find("h1").text.split(" ")[0]
 31 | 
 32 | def basic_info_extract(soup1):
 33 |     '''extract personal information
 34 |     Inputs:
 35 |         soup1 : the source code BeatifulSoup data format
 36 | 
 37 |     Returns:
 38 |         basic info (补充)
 39 |     '''
 40 | 
 41 |     b = soup1.find_all("div",{"class":"info"})
 42 |     basic_info_dict = {'star': '', 'bd': '', 'bp': '', 'imdb': ''}
 43 |     if len(b) == 0:
 44 |         return ["","","",""]
 45 | 
 46 |     c = b[0].find("ul")
 47 |     if c == None:
 48 |         return ["", "", "", ""]
 49 | 
 50 |     info_text = c.find_all("li")#.text
 51 | 
 52 |     basic_info_list = []
 53 |     for i in range(len(info_text)):
 54 |         if info_text[i].text.replace(u" ", u"") == u"":
 55 |             continue
 56 |         info_text[i] = info_text[i].text.replace(u" ", u"").replace('\n', '')
 57 | 
 58 |         text_element = info_text[i].strip().split(':')
 59 |         # 获取类别
 60 |         info_class = info_text[i].split(":")[0]
 61 |         # if(d[i].text):
 62 |         if info_class == u"星座":
 63 |             basic_info_dict['star'] = info_text[i].split(":")[1]
 64 |         if info_class == u"出生日期":
 65 |             basic_info_dict['bd'] = info_text[i].split(":")[1]
 66 |         if info_class == u"出生地":
 67 |             basic_info_dict['bp'] = info_text[i].split(":")[1]
 68 |         if info_class == u"imdb编号":
 69 |             basic_info_dict['imdb'] = info_text[i].split(":")[1]
 70 | 
 71 | 
 72 |     basic_info_list.append(basic_info_dict['star'])
 73 |     basic_info_list.append(basic_info_dict['bd'])
 74 |     basic_info_list.append(basic_info_dict['bp'])
 75 |     basic_info_list.append(basic_info_dict['imdb'])
 76 | 
 77 |     return basic_info_list
 78 | 
 79 | # celebrity introduction
 80 | def intro_extract(soup1):
 81 |     '''actor information
 82 |     Inputs:
 83 |         soup1 : the source code BeatifulSoup data format
 84 | 
 85 |     Returns:
 86 |         actor info
 87 |     '''
 88 |     intro = soup1.find_all("div", attrs = {"id" : "intro", "class" : "mod"})
 89 |     if len(intro) == 0:
 90 |         return ""
 91 |     brief = intro[0].find_all("div", {"class": "bd"})
 92 |     return 0 if len(brief) == 0 else brief[0].text.replace(" ","").replace('\n', '').replace('\u3000',"")
 93 | 
 94 | def movies_extract(soup1):
 95 |     '''Recently five movies
 96 |     Inputs:
 97 |         soup1 : the source code BeatifulSoup data format
 98 | 
 99 |     Returns:
100 |         five most recently movies/master work
101 |     '''
102 |     master_work=[]
103 |     movie = ""
104 |     recent = soup1.find_all("div",attrs = { "id" : "recent_movies" , "class" : "mod"})
105 |     if len(recent) !=0:
106 |         latest_five = recent[0].find_all("div",{"class":"bd"})
107 |         if len(latest_five) != 0:
108 |             b1 = latest_five[0].find("ul")
109 |             if len(b1) != 0:
110 |                 c1 = b1.find_all("li")
111 |                 for i in range(len(c1)):
112 |                     d1 = c1[i].find_all("div", attrs = {"class":"info"})
113 |                     if len(d1) != 0:
114 |                         e1 = d1[0].find_all("a")
115 |                         if len(e1) != 0:
116 |                             m_name = e1[0].get('title')
117 |                             if e1[0].get('href') != None:
118 |                                 m_id = e1[0].get('href').split("/")[-2]
119 |                             else:
120 |                                 m_id = ''
121 |                             a_1 = {"name": m_name, "douban": m_id}
122 |                             # j_1 = json.dumps(a_1, ensure_ascii=False)
123 |                             master_work.append(a_1)
124 | 
125 |     best = soup1.find_all("div",attrs = {"id":"recent_movies","class":"mod"})
126 |     if len(best) != 0:
127 |         best_five = recent[0].find_all("div",{"class":"bd"})
128 |         if len(best_five) != 0:
129 |             b2=best_five[0].find("ul")
130 |             if len(b2) != 0:
131 |                 c2=b2.find_all("li")
132 |                 for i in range(len(c2)):
133 |                     d2 = c2[i].find_all("div",attrs = {"class":"info"})
134 |                     if len(d2) != 0:
135 |                         e2 = d2[0].find_all("a")
136 |                         if len(e2) != 0:
137 |                             m_name = e2[0].get('title')
138 |                             if e2[0].get('href') != None:
139 |                                 m_id = e2[0].get('href').split("/")[-2]
140 |                             else:
141 |                                 m_id=''
142 |                             a_2 = {"name": m_name, "douban": m_id}
143 |                             # j_1 = json.dumps(a_1, ensure_ascii=False)
144 |                             master_work.append(a_2)
145 | 
146 |     return master_work
147 | 
148 | 
149 | def extract_source(source_code):
150 |     ''' analysis source code
151 |     Inputs:
152 |         the web page source code
153 | 
154 |     Returns:
155 |         structure data
156 | 
157 |     '''
158 |     info = []
159 |     soup = BeautifulSoup(source_code)
160 |     info.append(name_extract(soup))
161 |     info = info + basic_info_extract(soup)
162 |     info.append(intro_extract(soup))
163 |     info.append(movies_extract(soup))
164 |     a_1 = { "c_name" : info[0], \
165 |             "e_name" : '',\
166 |             "star" : info[1], \
167 |             "birthdate" : info[2], \
168 |             "birthplace":info[3], \
169 |             "db_id" : info[4], \
170 |             "intro":info[5], \
171 |             "movie":info[6]}
172 |     return a_1
173 | 
174 | 
175 | 
176 | def del_dup(list1):
177 |     '''合并电影
178 |     '''
179 |     res=[]
180 |     set1=set()
181 |     for i in list1:
182 |         if i['douban'] not in set1:
183 |             set1.add(i['douban'])
184 |             res.append(i)
185 |     return res
186 | 
187 | def main():
188 |     '''extract actor structure data
189 |     '''
190 |     path = "data/"
191 |     file_list = set(os.listdir(path))
192 |     count=0
193 |     # 先解析这个文件
194 |     with open("data/actor_info1", 'r') as f:
195 |         each_info = f.readlines()
196 |         for i in range(len(each_info)):
197 |             all_info = each_info[i].split('\t')
198 |             # print all_info
199 |             # 获取id
200 |             c_name = all_info[0]
201 |             e_name = all_info[1].replace('english_name-', '')
202 |             douban_id = all_info[2].replace('douban_id-', '')
203 |             master_work = all_info[3].replace('master_word-', '').replace('\n', '')
204 | 
205 |             master_work_list = []
206 | 
207 |             for i in master_work.split('|x02'):
208 |                 if i != "":
209 |                     movie_id = i.split('|x01')[0]
210 |                     movie_name = i.split('|x01')[1]
211 |                     m_1 = {"name": movie_name, "douban": movie_id}
212 |                     master_work_list.append(m_1)
213 | 
214 |             if c_name == "" or douban_id not in file_list:
215 |                 continue
216 | 
217 |             with open("final_result",'a') as f1:
218 |                 file = open(path + douban_id,'r')
219 |                 source = file.read()
220 |                 final_info = extract_source(source)
221 |                 final_info["c_name"] = c_name
222 |                 final_info["e_name"] = e_name
223 |                 final_info["db_id"] = douban_id
224 |                 final_info["movie"] = del_dup(final_info["movie"]+master_work_list)
225 | 
226 | 
227 |                 f1.write(json.dumps(final_info, ensure_ascii=False) + "\n")
228 | 
229 | 
230 | print time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) , 'start'
231 | main()
232 | print time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) , 'end'
233 | 
234 | 
235 | 
236 | 
237 | 
238 | 
239 | 


--------------------------------------------------------------------------------