├── .idea
    ├── .name
    ├── encodings.xml
    ├── misc.xml
    ├── modules.xml
    ├── p2p.iml
    ├── vcs.xml
    └── workspace.xml
├── README.md
├── __init__.py
├── data_to_mongodb.py
├── doc
    ├── imgs
    │   ├── 01.png
    │   ├── 02.png
    │   ├── 03.png
    │   ├── 04.png
    │   ├── 05.png
    │   ├── 06.png
    │   ├── 07.png
    │   ├── 08.png
    │   └── 09.png
    └── klj.pdf
├── others
    ├── 后台1
    │   ├── get_bad_platform
    │   │   ├── .DS_Store
    │   │   ├── get_data.py
    │   │   └── readme.txt
    │   ├── get_history_data
    │   │   ├── check.py
    │   │   ├── get_data.py
    │   │   └── readme.txt
    │   ├── get_recent_news
    │   │   ├── .DS_Store
    │   │   ├── get_baidu.py
    │   │   ├── get_news.py
    │   │   ├── merge_data.py
    │   │   └── readme.txt
    │   ├── get_wangdaizhijia
    │   │   ├── .DS_Store
    │   │   ├── check.py
    │   │   ├── get_plat_form.py
    │   │   └── readme.txt
    │   └── get_wangdaizhijia_each_platform
    │   │   ├── .DS_Store
    │   │   ├── check.py
    │   │   ├── get_all_plat_id_for_search.py
    │   │   ├── get_display_detail
    │   │       ├── check.py
    │   │       ├── display_platform_detail.json
    │   │       ├── display_platform_detail_readme.txt
    │   │       ├── get_display_detail.py
    │   │       └── result.json
    │   │   ├── get_display_platform
    │   │       ├── check.py
    │   │       ├── display_platform.json
    │   │       ├── display_platform.py
    │   │       ├── get_hot.py
    │   │       ├── get_hotplat_charts.py
    │   │       └── readme.txt
    │   │   ├── get_platform_charts.py
    │   │   ├── get_platform_review.py
    │   │   ├── get_recent_news
    │   │       ├── check_data.py
    │   │       ├── filter.py
    │   │       └── get_news.py
    │   │   ├── get_recent_reviews
    │   │       ├── check.py
    │   │       ├── get_recent_review.py
    │   │       ├── recent_reviews_readme.txt
    │   │       └── reviews_filter.py
    │   │   ├── get_valid_reviews
    │   │       ├── check.py
    │   │       ├── display_platform.json
    │   │       ├── filter.py
    │   │       ├── get_hotplat_reviews.py
    │   │       └── plat_form_reviews_v2_readme.txt
    │   │   ├── platform_chart_readme.txt
    │   │   └── platform_search.json
    ├── 后台2
    │   ├── article_classify.py
    │   ├── article_classify.pyc
    │   ├── article_data_loads.py
    │   ├── article_data_loads_delta.py
    │   ├── article_sentiment_extract.py
    │   ├── bad_platform_analyze.py
    │   ├── bad_platform_healthscore.py
    │   ├── demo_data_prepare.py
    │   ├── helper
    │   │   ├── __init__.py
    │   │   ├── __init__.pyc
    │   │   ├── get_finance_nouns.py
    │   │   ├── mongoDB_process.py
    │   │   ├── myio.py
    │   │   ├── myio.pyc
    │   │   ├── nlp_model.py
    │   │   ├── nlp_model.pyc
    │   │   ├── sentiments_analyze.py
    │   │   ├── sentiments_analyze.pyc
    │   │   ├── textprocessing.py
    │   │   └── textprocessing.pyc
    │   ├── hotEvent_trace.py
    │   ├── hot_keywords_extract.py
    │   ├── industry_analyze.py
    │   ├── knowledge_graph_build.py
    │   ├── mongoDB_Test.py
    │   ├── platform_article_keys_extract.py
    │   ├── platform_data_loads.py
    │   ├── process_analyze.py
    │   ├── sparser
    │   │   ├── hexun
    │   │   │   ├── ReadMe.txt
    │   │   │   └── hexun.py
    │   │   ├── p2pguancha_news.txt
    │   │   └── p2pguancha_sparser.py
    │   ├── spider
    │   │   ├── __init__.py
    │   │   ├── caixin_extractNews.py
    │   │   ├── extract_p2p_news.py
    │   │   ├── hujin_institute_process.py
    │   │   ├── ifeng_extractNews.py
    │   │   ├── jpm_extractNews.py
    │   │   ├── process_wdzjdata.py
    │   │   ├── sina_extractNews.py
    │   │   ├── weixin_extractNews.py
    │   │   ├── wy163_extractNews.py
    │   │   └── zhongshen_extractNews.py
    │   ├── summary_analyze.py
    │   ├── temp.py
    │   ├── topic_model.py
    │   ├── ugc_quality.py
    │   └── vectorize.py
    └── 爬虫
    │   └── wd
    │       ├── bbs_rong360
    │           ├── bbs_rong360
    │           │   ├── .DS_Store
    │           │   ├── __init__.py
    │           │   ├── __init__.pyc
    │           │   ├── items.py
    │           │   ├── middlewares.pyc
    │           │   ├── pipelines.py
    │           │   ├── settings.py
    │           │   ├── settings.pyc
    │           │   └── spiders
    │           │   │   ├── __init__.py
    │           │   │   ├── __init__.pyc
    │           │   │   ├── bbs.py
    │           │   │   ├── bbs.pyc
    │           │   │   ├── content.py
    │           │   │   ├── content.pyc
    │           │   │   ├── detail.py
    │           │   │   └── detail.pyc
    │           ├── proxy_inuse.txt
    │           ├── randomproxy.py
    │           ├── randomproxy.pyc
    │           ├── scrapy.cfg
    │           └── urls.txt
    │       └── 爬虫文档.txt
├── run.py
├── static
    ├── css
    │   ├── bootstrap-theme.min.css
    │   ├── bootstrap.min.css
    │   ├── dashboard.css
    │   └── sign_in.css
    ├── data
    │   ├── charts_data.json
    │   ├── hot_keyword.json
    │   ├── hot_topic
    │   │   ├── 1
    │   │   │   ├── hot.json
    │   │   │   ├── keywords.json
    │   │   │   └── news.json
    │   │   ├── 2
    │   │   │   ├── hot.json
    │   │   │   ├── keywords.json
    │   │   │   └── news.json
    │   │   ├── 3
    │   │   │   ├── hot.json
    │   │   │   ├── keywords.json
    │   │   │   └── news.json
    │   │   └── 4
    │   │   │   ├── hot.json
    │   │   │   ├── keywords.json
    │   │   │   └── news.json
    │   ├── plat_recent_news.json
    │   ├── plat_related_news.json
    │   ├── plat_top_labels_sentiment.json
    │   ├── platform_info.json
    │   ├── platform_news_keywords.json
    │   ├── platform_reviews_v4.json
    │   ├── problem_platform.json
    │   ├── raw
    │   │   ├── news.json
    │   │   ├── opinion.json
    │   │   ├── policy.json
    │   │   └── ugc.csv
    │   └── recent_reviews.json
    ├── fonts
    │   ├── glyphicons-halflings-regular.eot
    │   ├── glyphicons-halflings-regular.svg
    │   ├── glyphicons-halflings-regular.ttf
    │   ├── glyphicons-halflings-regular.woff
    │   └── glyphicons-halflings-regular.woff2
    ├── img
    │   ├── bg.jpg
    │   ├── detail.png
    │   ├── dl.jpg
    │   ├── hot_topic_1.jpg
    │   ├── hot_topic_2.jpg
    │   ├── hot_topic_3.jpg
    │   ├── hot_topic_4.jpg
    │   ├── mh3.jpg
    │   └── not_found.jpg
    └── js
    │   ├── bootstrap.min.js
    │   ├── echarts-all-2.2.7.js
    │   ├── jquery-1.12.1.min.js
    │   ├── jquery.cookie.js
    │   └── p2p
    │       ├── grzx.js
    │       ├── layout.js
    │       ├── ptda_detail_info.js
    │       ├── ptda_detail_rank.js
    │       ├── qwzx.js
    │       ├── qwzx_hot_topic.js
    │       ├── qwzx_type.js
    │       ├── qwzx_type_detail.js
    │       ├── tzgw.js
    │       ├── wtpt_da.js
    │       ├── wtpt_fx.js
    │       └── yqdp_charts.js
├── templates
    ├── detail_info.html
    ├── detail_navigation.html
    ├── detail_problem.html
    ├── detail_problem_analyze.html
    ├── detail_rank.html
    ├── grzx.html
    ├── home.html
    ├── info.html
    ├── info_hot_topic.html
    ├── info_hot_topic_news_detail.html
    ├── info_type.html
    ├── info_type_detail.html
    ├── layout.html
    ├── register.html
    ├── search.html
    ├── search_detail_info.html
    ├── search_info.html
    ├── search_not_found.html
    ├── sign_in.html
    └── yqdp.html
└── test_db.py


/.idea/.name:
--------------------------------------------------------------------------------
1 | p2p


--------------------------------------------------------------------------------
/.idea/encodings.xml:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="UTF-8"?>
2 | <project version="4">
3 |   <component name="Encoding">
4 |     <file url="PROJECT" charset="UTF-8" />
5 |   </component>
6 | </project>


--------------------------------------------------------------------------------
/.idea/misc.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <project version="4">
 3 |   <component name="ProjectLevelVcsManager" settingsEditedManually="false">
 4 |     <OptionsSetting value="true" id="Add" />
 5 |     <OptionsSetting value="true" id="Remove" />
 6 |     <OptionsSetting value="true" id="Checkout" />
 7 |     <OptionsSetting value="true" id="Update" />
 8 |     <OptionsSetting value="true" id="Status" />
 9 |     <OptionsSetting value="true" id="Edit" />
10 |     <OptionsSetting value="true" id="添加" />
11 |     <OptionsSetting value="true" id="移除" />
12 |     <OptionsSetting value="true" id="签出" />
13 |     <OptionsSetting value="true" id="更新" />
14 |     <OptionsSetting value="true" id="状态" />
15 |     <OptionsSetting value="true" id="编辑" />
16 |     <ConfirmationsSetting value="0" id="添加" />
17 |     <ConfirmationsSetting value="0" id="移除" />
18 |   </component>
19 |   <component name="ProjectRootManager" version="2" project-jdk-name="Python 2.7.2 (D:\sucsoft\Python27\python.exe)" project-jdk-type="Python SDK" />
20 | </project>


--------------------------------------------------------------------------------
/.idea/modules.xml:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="UTF-8"?>
2 | <project version="4">
3 |   <component name="ProjectModuleManager">
4 |     <modules>
5 |       <module fileurl="file://$PROJECT_DIR$/.idea/p2p.iml" filepath="$PROJECT_DIR$/.idea/p2p.iml" />
6 |     </modules>
7 |   </component>
8 | </project>


--------------------------------------------------------------------------------
/.idea/p2p.iml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <module type="PYTHON_MODULE" version="4">
 3 |   <component name="NewModuleRootManager">
 4 |     <content url="file://$MODULE_DIR$" />
 5 |     <orderEntry type="jdk" jdkName="Python 2.7.2 (D:\sucsoft\Python27\python.exe)" jdkType="Python SDK" />
 6 |     <orderEntry type="sourceFolder" forTests="false" />
 7 |   </component>
 8 |   <component name="TemplatesService">
 9 |     <option name="TEMPLATE_CONFIGURATION" value="Jinja2" />
10 |     <option name="TEMPLATE_FOLDERS">
11 |       <list>
12 |         <option value="$MODULE_DIR$/templates" />
13 |       </list>
14 |     </option>
15 |   </component>
16 |   <component name="TestRunnerService">
17 |     <option name="PROJECT_TEST_RUNNER" value="Unittests" />
18 |   </component>
19 | </module>


--------------------------------------------------------------------------------
/.idea/vcs.xml:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="UTF-8"?>
2 | <project version="4">
3 |   <component name="VcsDirectoryMappings">
4 |     <mapping directory="$PROJECT_DIR$" vcs="Git" />
5 |   </component>
6 | </project>


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | 一个基于 python 的 flask 框架的资讯网站
 2 | 
 3 | 演示地址： http://119.29.100.53:8086/
 4 | ----
 5 | 
 6 | # 1 背景介绍
 7 | 该比赛要求参赛者开发一款数据舆情产品，帮助用户了解 P2P 行业现状。本人在比赛中负责网站的设计、开发和部署。团队最终排名第2。涉及内容：
 8 | * 前端：HTML5 + CSS + JavaScript+JSON<br>
 9 | * 后台：Python轻量级Web应用框架Flask<br>
10 | 
11 | # 2 项目基本介绍
12 | * [项目介绍PPT](https://github.com/mindawei/p2p/blob/master/doc/klj.pdf)。
13 | * 本项目主要是一个展示数据的网站。
14 | * 数据来源是其它三位队友爬取数据后处理得到的，他们的项目在[ others ](https://github.com/mindawei/p2p/tree/master/others)目录中。
15 | * 本项目数据源在[ static/data ](https://github.com/mindawei/p2p/tree/master/static/data)目录中，项目启动前需要将它们导入到 mongodb 数据库中。
16 | 
17 | # 3 QuickStart
18 | ## 3.1 安装环境
19 | 1. 安装 python 2.7<br>
20 | 下载 python 安装文件，安装后配置系统环境变量。<br>
21 | 可参考[《Flask入门_Windows下安装》](https://www.cnblogs.com/Christeen/p/6514713.html)
22 | 
23 | 2. 安装 flask<br>
24 | 命令行运行 `pip install flask`。<br>
25 | 可参考[《Flask入门_Windows下安装》](https://www.cnblogs.com/Christeen/p/6514713.html)
26 | 
27 | 3. 安装 pymongo<br>
28 | 命令行运行 `pip install pymongo`。
29 | 
30 | 4. 安装mongodb 数据库<br>
31 | * 官网下载[安装包](https://www.mongodb.com/download-center#community)
32 | * 创建一个db文件夹，我的文件位置是 `C:\software\mongdb3.6.3\db`
33 | 可参考[《Windows 平台安装 MongoDB》](http://www.runoob.com/mongodb/mongodb-window-install.html)
34 | 
35 | ## 3.2 启动项目
36 | 1. 在 mongodb 目录的 bin 目录中执行 mongod.exe 文件。<br>
37 | `C:\software\mongdb3.6.3\bin>mongod --dbpath C:\software\mongdb3.6.3\db`
38 | ![](https://github.com/mindawei/p2p/blob/master/doc/imgs/01.png)
39 | 
40 | 2. 将数据导入到 mongodb 数据库中。<br>
41 | 项目目录下运行 `data_to_mongodb.py` 文件， 命令行输入 `python data_to_mongodb.py`。
42 | ![](https://github.com/mindawei/p2p/blob/master/doc/imgs/02.png)
43 | 
44 | 3. 启动项目。<br>
45 | 项目目录下运行 `run.py` 文件, 命令行输入 `python run.py`。
46 | ![](https://github.com/mindawei/p2p/blob/master/doc/imgs/03.png)
47 | 
48 | 4. 访问项目<br>
49 | 输入 `http://localhost:8086` 进行访问。
50 | 
51 | # 4 效果展示
52 | 访问地址： http://119.29.100.53:8086/
53 | 
54 | 一个测试账号 用户名：test  密码： 123
55 | 
56 | ![](https://github.com/mindawei/p2p/blob/master/doc/imgs/04.png)
57 | 
58 | ![](https://github.com/mindawei/p2p/blob/master/doc/imgs/05.png)
59 | 
60 | ![](https://github.com/mindawei/p2p/blob/master/doc/imgs/06.png)
61 | 
62 | ![](https://github.com/mindawei/p2p/blob/master/doc/imgs/07.png)
63 | 
64 | ![](https://github.com/mindawei/p2p/blob/master/doc/imgs/08.png)
65 | 
66 | ![](https://github.com/mindawei/p2p/blob/master/doc/imgs/09.png)
67 | 
68 | # 5 后续项目
69 | [zsw](https://github.com/mindawei/zsw) 是基于该项目的一个简化版本，但是增加了一些帖子评论等功能。
70 | 
71 | 


--------------------------------------------------------------------------------
/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mindawei/p2p/b0771100b8d4740906a186d9668b7d662214409c/__init__.py


--------------------------------------------------------------------------------
/data_to_mongodb.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | from pymongo import MongoClient
 3 | import json
 4 | import csv
 5 | 
 6 | # 连接
 7 | conn = MongoClient('localhost', 27017)
 8 | # 连接数据库
 9 | db = conn.p2p
10 | 
11 | # 资讯类型
12 | 
13 | # 新闻入库
14 | db.news.remove()
15 | data = json.load(open('static/data/raw/news.json', 'r'))
16 | db.news.insert(data)
17 | print("now the number of news is:%d" % db.news.count())
18 | 
19 | # 政策入库
20 | db.policy.remove()
21 | data = json.load(open('static/data/raw/policy.json', 'r'))
22 | db.policy.insert(data)
23 | print("now the number of policy is:%d" % db.policy.count())
24 | 
25 | # 政策入库
26 | db.opinion.remove()
27 | data = json.load(open('static/data/raw/opinion.json', 'r'))
28 | db.opinion.insert(data)
29 | print("now the number of opinion is:%d" % db.opinion.count())
30 | 
31 | 
32 | # 用户评论入库
33 | db.ugc.remove()
34 | data = csv.reader(file('static/data/raw/ugc.csv', 'rb'))
35 | for line in data:
36 |     if data.line_num == 1:
37 |             continue
38 |     item = dict()
39 |     item['_id'] = line[0].decode('utf-8')
40 |     item['item_type'] = line[1].decode('utf-8')
41 |     item['source'] = line[2].decode('utf-8')
42 |     item['url'] = line[3].decode('utf-8')
43 |     item['author'] = line[4].decode('utf-8')
44 |     item['title'] = line[5].decode('utf-8')
45 |     item['content'] = line[6].decode('utf-8')
46 |     item['item_pub_time'] = line[7].decode('utf-8')
47 |     item['tags'] = line[8].decode('utf-8')
48 |     item['cmt_cnt'] = line[9].decode('utf-8')
49 |     item['fav_cnt'] = line[10].decode('utf-8')
50 |     item['gmt_create'] = line[11].decode('utf-8')
51 |     item['exinfo1'] = line[12].decode('utf-8')
52 |     item['exinfo2'] = line[13].decode('utf-8')
53 |     db.ugc.insert(item)
54 | print("now the number of ugc is:%d" % db.ugc.count())
55 | 
56 | # 初始用户
57 | db.user.remove();
58 | db.user.insert({'username': 'mdw', 'password': '123','platform_names': [u'拍拍贷']})
59 | print("now the number of user is:%d" % db.user.count())
60 | 


--------------------------------------------------------------------------------
/doc/imgs/01.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mindawei/p2p/b0771100b8d4740906a186d9668b7d662214409c/doc/imgs/01.png


--------------------------------------------------------------------------------
/doc/imgs/02.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mindawei/p2p/b0771100b8d4740906a186d9668b7d662214409c/doc/imgs/02.png


--------------------------------------------------------------------------------
/doc/imgs/03.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mindawei/p2p/b0771100b8d4740906a186d9668b7d662214409c/doc/imgs/03.png


--------------------------------------------------------------------------------
/doc/imgs/04.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mindawei/p2p/b0771100b8d4740906a186d9668b7d662214409c/doc/imgs/04.png


--------------------------------------------------------------------------------
/doc/imgs/05.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mindawei/p2p/b0771100b8d4740906a186d9668b7d662214409c/doc/imgs/05.png


--------------------------------------------------------------------------------
/doc/imgs/06.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mindawei/p2p/b0771100b8d4740906a186d9668b7d662214409c/doc/imgs/06.png


--------------------------------------------------------------------------------
/doc/imgs/07.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mindawei/p2p/b0771100b8d4740906a186d9668b7d662214409c/doc/imgs/07.png


--------------------------------------------------------------------------------
/doc/imgs/08.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mindawei/p2p/b0771100b8d4740906a186d9668b7d662214409c/doc/imgs/08.png


--------------------------------------------------------------------------------
/doc/imgs/09.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mindawei/p2p/b0771100b8d4740906a186d9668b7d662214409c/doc/imgs/09.png


--------------------------------------------------------------------------------
/doc/klj.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mindawei/p2p/b0771100b8d4740906a186d9668b7d662214409c/doc/klj.pdf


--------------------------------------------------------------------------------
/others/后台1/get_bad_platform/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mindawei/p2p/b0771100b8d4740906a186d9668b7d662214409c/others/后台1/get_bad_platform/.DS_Store


--------------------------------------------------------------------------------
/others/后台1/get_bad_platform/get_data.py:
--------------------------------------------------------------------------------
 1 | import requests
 2 | from bs4 import BeautifulSoup
 3 | import json
 4 | 
 5 | data = requests.get("http://shuju.wdzj.com/problem-1.html", "html.parser").text
 6 | soup = BeautifulSoup(data, "html.parser")
 7 | all_data = soup.find_all("tr", attrs={"class": ""})
 8 | result = []
 9 | for item in all_data[1:]:
10 |     raw = item.text.strip().split('\n')
11 |     result.append({"index": raw[0], "platform_name": raw[1], "problem_time": raw[2], "online_time": raw[3],
12 |                    "registration capital": raw[4], "region": raw[5], "money": raw[6], "number": raw[7],
13 |                    "event_type": raw[8]})
14 | all_data2 = soup.find_all("tr", attrs={"class": "tb_bg_gray"})
15 | for item in all_data2:
16 |     raw = item.text.strip().split('\n')
17 |     result.append({"index": raw[0], "platform_name": raw[1], "problem_time": raw[2], "online_time": raw[3],
18 |                    "registration capital": raw[4], "region": raw[5], "money": raw[6], "number": raw[7],
19 |                    "event_type": raw[8]})
20 | 
21 | 
22 | def toint(str):
23 |     return int(str.replace(",", ""))
24 | 
25 | 
26 | result.sort(key=lambda x: toint(x['index']))
27 | json.dump(result, open('problem_platform.json', 'w'))
28 | 


--------------------------------------------------------------------------------
/others/后台1/get_bad_platform/readme.txt:
--------------------------------------------------------------------------------
 1 | 数据为问题平台基本情况,字段对应为:
 2 | index : 序号
 3 | platform_name : 平台名
 4 | event_type : 时间类型
 5 | problem_time : 问题时间
 6 | money : 待收金额
 7 | region : 地区
 8 | online_time : 上线时间
 9 | number : 涉及人数
10 | registration capital : 注册资本
11 | 


--------------------------------------------------------------------------------
/others/后台1/get_history_data/check.py:
--------------------------------------------------------------------------------
1 | import json
2 | 
3 | data = json.load(open('platform_web_info.json', 'r'))
4 | print len(data)
5 | 


--------------------------------------------------------------------------------
/others/后台1/get_history_data/get_data.py:
--------------------------------------------------------------------------------
 1 | import requests
 2 | import json
 3 | from bs4 import BeautifulSoup
 4 | 
 5 | header = {
 6 |     'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.110 Safari/537.36'
 7 | }
 8 | 
 9 | 
10 | def get_result(platurl):
11 |     data = {
12 |         'ht': 1,
13 |         'h': platurl
14 |     }
15 |     result = []
16 |     try:
17 |         html = requests.get('http://tool.chinaz.com/history/', data=data, headers=header).text
18 |         soup = BeautifulSoup(html, "html.parser")
19 |         info = soup.find_all('ul', attrs={'class': 'ResultListWrap'})[0]
20 |         # text = [item.strip() for item in info.text.strip().split('\n') if item.strip()]
21 | 
22 | 
23 |         for item in info.find_all('li', attrs={'class': 'ReListCent ReLists clearfix'}):
24 |             temp = []
25 |             for each in item.find_all('div'):
26 |                 if each.text.strip() != '':
27 |                     temp.append(each.text.strip())
28 |             if len(temp) > 9:
29 |                 continue
30 |             if len(temp) < 9:
31 |                 while len(temp) != 9:
32 |                     temp.append('--')
33 |             temp2 = []
34 |             for each in temp:
35 |                 if each.startswith('arguments'):
36 |                     temp2.append('--')
37 |                 else:
38 |                     temp2.append(each)
39 |             result.append(temp2)
40 |         for item in info.find_all('li', attrs={'class': 'ReListCent ReLists clearfix bg-list'}):
41 |             temp = []
42 |             for each in item.find_all('div'):
43 |                 if each.text.strip() != '':
44 |                     temp.append(each.text.strip())
45 |             if len(temp) > 9:
46 |                 continue
47 |             if len(temp) < 9:
48 |                 while len(temp) != 9:
49 |                     temp.append('--')
50 |             temp2 = []
51 |             for each in temp:
52 |                 if each.startswith('arguments'):
53 |                     temp2.append('--')
54 |                 else:
55 |                     temp2.append(each)
56 |             result.append(temp2)
57 |         return result
58 |     except Exception, e:
59 |         print e
60 |         return result
61 | 
62 | 
63 | def change_url(url):
64 |     temp = url.split('//')
65 |     if temp[1][-1] == '/':
66 |         return temp[1][:-1]
67 |     else:
68 |         return temp[1]
69 | 
70 | 
71 | all_platform = json.load(open('platform_basic.json', 'r'))
72 | all_result = {}
73 | for each in all_platform:
74 |     try:
75 |         print each['platName']
76 |         all_result[each['platName']] = []
77 |         result = get_result(change_url(each['platUrl']))
78 |         result.sort()
79 |         all_result[each['platName']].extend(result)
80 |     except Exception, e:
81 |         print e
82 |         continue
83 | 
84 | json.dump(all_result, open('result.json', 'w'))
85 | 


--------------------------------------------------------------------------------
/others/后台1/get_history_data/readme.txt:
--------------------------------------------------------------------------------
 1 | 数据为每个平台的官网在站长之家的访问统计情况(http://tool.chinaz.com/history/?ht=1&h=www.rong360.com)
 2 | 
 3 | 格式为:
 4 | 
 5 | {
 6 |     "平台名":[
 7 |                 [日期,百度权重,预估流量,关键词数,站长排名,世界排名,流量排名,日均IP,日均PV]
 8 |             ]
 9 | }
10 | 
11 | 
12 | '--':表示数据缺失
13 | 
14 | 也有一部分平台无查询记录,list为空
15 | 


--------------------------------------------------------------------------------
/others/后台1/get_recent_news/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mindawei/p2p/b0771100b8d4740906a186d9668b7d662214409c/others/后台1/get_recent_news/.DS_Store


--------------------------------------------------------------------------------
/others/后台1/get_recent_news/get_baidu.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # 抓取相关百度新闻
 3 | import requests
 4 | from bs4 import BeautifulSoup
 5 | import urllib
 6 | import json
 7 | 
 8 | result = {}
 9 | 
10 | 
11 | def get_news(word):
12 |     name = urllib.urlencode({'name': word}).split('=')[1]
13 |     soup = BeautifulSoup(
14 |         requests.get(
15 |             'http://www.baidu.com/s?tn=baidurt&rtt=1&bsst=1&cl=3&ie=utf-8&bs={}&f=8&rsv_bp=1&wd={}&inputT=0'.format(
16 |                 name,
17 |                 name)).text,
18 |         "html.parser"
19 |     )
20 |     for item in soup.find_all('a', attrs={'target': '_blank'}):
21 |         if item[
22 |             'href'] != '#' and item.text != u'百度快照' and item.text != u'注册' \
23 |                 and u'去网页搜索' not in item.text and item.text != u'帮助' and item.text != '':
24 |             yield {'url': item['href'], 'title': item.text.strip()}
25 | 
26 | 
27 | all_plat = json.load(open('platform_basic.json', 'r'))[2500:2500]
28 | 
29 | 
30 | def get_plat_name():
31 |     for item in all_plat:
32 |         result.setdefault(item['platName'], [])
33 |         print item['platName']
34 |         for each in get_news(item['platName'].encode('utf-8')):
35 |             result[item['platName']].append(each)
36 | 
37 | 
38 | if __name__ == '__main__':
39 |     get_plat_name()
40 |     json.dump(result, open('all_plat_recent_news_3.json', 'w'))
41 | 


--------------------------------------------------------------------------------
/others/后台1/get_recent_news/get_news.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # 获取和讯网上平台信息
 3 | import requests
 4 | import json
 5 | import re
 6 | from bs4 import BeautifulSoup
 7 | 
 8 | 
 9 | def get_all_id(index):
10 |     data = open('page{}'.format(index), 'r').read()
11 |     name_pattern = re.compile(r"<img alt='(.+?)'")
12 |     id_pattern = re.compile(r"id=(.+?)'")
13 |     result = []
14 |     all_name = name_pattern.findall(data)
15 |     all_id = id_pattern.findall(data)
16 |     for i in range(len(all_name)):
17 |         result.append((all_name[i].decode('utf-8'), all_id[i]))
18 |     return result
19 | 
20 | 
21 | def filter_time(string):
22 |     try:
23 |         index = string.index('(')
24 |         return string[:index]
25 |     except:
26 |         return string
27 | 
28 | 
29 | def get_news_by_id(id):
30 |     data = requests.get("http://p2p.hexun.com/{}/".format(id))
31 |     data.encoding = 'gbk'
32 |     soup = BeautifulSoup(data.text, "html.parser")
33 |     url_info = []
34 |     url_source = []
35 |     for item in soup.find_all('a'):
36 |         url_info.append((item['href'], item.text))
37 |     for item in soup.find_all('span'):
38 |         url_source.append(item.text)
39 |     result = []
40 |     for i in range(len(url_info)):
41 |         result.append((url_info[i][0], url_info[i][1], filter_time(url_source[0])))
42 | 
43 |     return result
44 | 
45 | 
46 | if __name__ == '__main__':
47 |     result = {}
48 |     for i in range(1, 4):
49 |         for item in get_all_id(i):
50 |             result.setdefault(item[0], [])
51 |             news = get_news_by_id(item[1])
52 |             for each in news:
53 |                 result[item[0]].append({'url': each[0], 'title': each[1], 'source': each[2]})
54 |     json.dump(result, open('plat_recent_news.json', 'w'))
55 | 


--------------------------------------------------------------------------------
/others/后台1/get_recent_news/merge_data.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | 
 3 | data1 = json.load(open('all_plat_recent_news_1.json', 'r'))
 4 | data2 = json.load(open('all_plat_recent_news_2.json', 'r'))
 5 | data3 = json.load(open('all_plat_recent_news_3.json', 'r'))
 6 | 
 7 | result = {}
 8 | for item in data1:
 9 |     result[item] = data1[item]
10 | 
11 | for item in data2:
12 |     result[item] = data2[item]
13 | 
14 | for item in data3:
15 |     result[item] = data3[item]
16 | 
17 | json.dump(result, open('all_news.json', 'w'))
18 | 


--------------------------------------------------------------------------------
/others/后台1/get_recent_news/readme.txt:
--------------------------------------------------------------------------------
 1 | plat_recent_news.json 为平台的近期新闻,数据较新:
 2 | 
 3 | 
 4 | {
 5 |     "陆金所": [
 6 |         {
 7 |             "url": "http://finance.jrj.com.cn/biz/2016/04/14154720828976.shtml",
 8 |             "title": "3月CPI再度破“2” 陆金所、星投资成财产增值首选"
 9 |         },
10 |         {
11 |             "url": "http://news.163.com/16/0414/04/BKJB3VHO00014AED.html",
12 |             "title": "益民基金管理有限公司关于旗下基金参加上海陆金所资产管理有限..."
13 |         },
14 |         {
15 |             "url": "http://news.163.com/16/0414/04/BKJACDH400014AED.html",
16 |             "title": "中信建投基金管理有限公司关于上海陆金所资产管理有限公司费率..."
17 |         },
18 |         {
19 |             "url": "http://news.163.com/16/0414/05/BKJB8FR400014AED.html",
20 |             "title": "泰康资产管理有限责任公司关于调整上海陆金所资产管理有限公司认..."
21 |         },
22 |         {
23 |             "url": "http://finance.jrj.com.cn/biz/2016/04/14141820828334.shtml",
24 |             "title": "积木盒子、花果金融、陆金所P2P中的《太阳的后裔》"
25 |         },
26 |         {
27 |             "url": "http://news.hebei.com.cn/system/2016/04/14/016810297.shtml",
28 |             "title": "监管净化行业 陆金所、有利网、快投机器合规之选"
29 |         },
30 |         {
31 |             "url": "http://sd.sina.com.cn/weifang/news/w/2016-04-14/1104-17523.html",
32 |             "title": "网贷行业面临洗牌 陆金所 宜人贷和信贷获投资人青睐"
33 |         },
34 |         {
35 |             "url": "http://www.jingji.com.cn/html/news/zxxw/39927.html",
36 |             "title": "爱钱帮、陆金所:真正实力的平台是这样的"
37 |         },
38 |         {
39 |             "url": "http://www.pcpop.com/doc/2/2644/2644785.shtml",
40 |             "title": "互金理财新宠,陆金所、阿拉丁金服助力旅行结婚梦"
41 |         },
42 |         {
43 |             "url": "http://epaper.stcn.com/paper/zqsb/html/2016-04/14/content_809901.htm",
44 |             "title": "浦银安盛关于参加上海陆金所资产管理有限公司费率优惠的公告"
45 |         }
46 |     ]
47 | }


--------------------------------------------------------------------------------
/others/后台1/get_wangdaizhijia/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mindawei/p2p/b0771100b8d4740906a186d9668b7d662214409c/others/后台1/get_wangdaizhijia/.DS_Store


--------------------------------------------------------------------------------
/others/后台1/get_wangdaizhijia/check.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | 
 3 | new_result = []
 4 | data = json.load(open('all_plat.json', 'r'))
 5 | 
 6 | for item in data:
 7 |     temp = {}
 8 |     temp['allPlatNamePin'] = item['allPlatNamePin']
 9 |     temp['locationAreaName'] = item['locationAreaName']
10 |     temp['locationCityName'] = item['locationCityName']
11 |     temp['onlineDate'] = item['onlineDate']
12 |     temp['platEarnings'] = item['platEarnings']
13 |     temp['platLogoUrl'] = item['platLogoUrl']
14 |     temp['platName'] = item['platName']
15 |     temp['platUrl'] = item['platUrl']
16 |     temp['registeredCapital'] = item['registeredCapital']
17 |     temp['term'] = item['term']
18 |     temp['zonghezhishu'] = item['zonghezhishu']
19 |     temp['zonghezhishuRanking'] = item['zonghezhishuRanking']
20 |     temp['platStatus'] = item['platStatus']
21 |     new_result.append(temp)
22 | json.dump(new_result, open('platform_basic.json', 'w'))
23 | 


--------------------------------------------------------------------------------
/others/后台1/get_wangdaizhijia/get_plat_form.py:
--------------------------------------------------------------------------------
 1 | import requests
 2 | import json
 3 | 
 4 | url = 'http://www.wdzj.com/front_select-plat'
 5 | header = {
 6 |     'Accept': 'application/json, text/javascript, */*; q=0.01',
 7 |     'Accept-Encoding': 'gzip, deflate',
 8 |     'Accept-Language': 'zh-CN,zh;q=0.8,en;q=0.6',
 9 |     'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
10 |     'Host': 'www.wdzj.com',
11 |     'Origin': 'http://www.wdzj.com',
12 |     'Referer': 'http://www.wdzj.com/dangan/',
13 |     'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.110 Safari/537.36'
14 | }
15 | result = []
16 | for i in range(1, 127):
17 |     print i
18 |     para = {
19 |         'params': '',
20 |         'sort': 0,
21 |         'currPage': i
22 |     }
23 |     result.extend(requests.post(url, headers=header, data=para).json()['list'])
24 | json.dump(result, open('all_plat.json', 'w'))
25 | 


--------------------------------------------------------------------------------
/others/后台1/get_wangdaizhijia/readme.txt:
--------------------------------------------------------------------------------
 1 | 共有3106个平台基本信息(包含问题平台)
 2 | 
 3 | allPlatNamePin: 平台全拼
 4 | locationAreaName: 平台位置
 5 | locationCityName: 平台城市
 6 | onlineDate: 上线时间
 7 | platEarnings: 平均收益(百分比)
 8 | platLogoUrl: logo
 9 | platName: 平台名称
10 | platUrl: 平台链接
11 | registeredCapital: 注册资本(万元)
12 | term: 投资期限
13 | zonghezhishu: 综合指数
14 | zonghezhishuRanking: 综合排名
15 | platStatus: 平台状态(1:正常,5:跑路,2:歇业,3:提现困难)


--------------------------------------------------------------------------------
/others/后台1/get_wangdaizhijia_each_platform/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mindawei/p2p/b0771100b8d4740906a186d9668b7d662214409c/others/后台1/get_wangdaizhijia_each_platform/.DS_Store


--------------------------------------------------------------------------------
/others/后台1/get_wangdaizhijia_each_platform/check.py:
--------------------------------------------------------------------------------
1 | import json
2 | 
3 | data = json.load(open('platform_chart.json', 'r'))
4 | 
5 | print len(data)
6 | 


--------------------------------------------------------------------------------
/others/后台1/get_wangdaizhijia_each_platform/get_all_plat_id_for_search.py:
--------------------------------------------------------------------------------
 1 | import requests
 2 | import json
 3 | 
 4 | url = 'http://www.wdzj.com/wdzj/html/json/dangan_search.json'
 5 | 
 6 | headers = {
 7 |     'Accept': 'application/json, text/javascript, */*; q=0.01',
 8 |     'Host': 'www.wdzj.com',
 9 |     'Referer': 'http://www.wdzj.com/dangan/tdw/',
10 |     'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.110 Safari/537.36'
11 | 
12 | }
13 | 
14 | data = requests.get(url, headers=headers).json()
15 | json.dump(data, open('platform_search.json', 'w'))
16 | 


--------------------------------------------------------------------------------
/others/后台1/get_wangdaizhijia_each_platform/get_display_detail/check.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | 
 3 | data = json.load(open('display_platform_detail.json', 'r'))
 4 | 
 5 | temp = []
 6 | for item in data:
 7 |     temp.append([data[item]['zonghezhishu'], item])
 8 | 
 9 | temp.sort(reverse=True)
10 | for i in enumerate(temp, 1):
11 |     data[i[1][1]]['zonghezhishuRanking'] = i[0]
12 | 
13 | json.dump(data, open('result.json', 'w'))
14 | 


--------------------------------------------------------------------------------
/others/后台1/get_wangdaizhijia_each_platform/get_display_detail/display_platform_detail_readme.txt:
--------------------------------------------------------------------------------
 1 | 此文件对应着在展示的平台的详细信息.
 2 | 
 3 | allPlatNamePin: 平台全拼
 4 | locationAreaName: 平台位置
 5 | locationCityName: 平台城市
 6 | onlineDate: 上线时间
 7 | platEarnings: 平均收益(百分比)
 8 | platLogoUrl: logo
 9 | platUrl: 平台链接
10 | registeredCapital: 注册资本(万元)
11 | term: 投资期限
12 | zonghezhishu: 综合指数
13 | zonghezhishuRanking: 综合排名
14 | platStatus: 平台状态(1:正常,5:跑路,2:歇业,3:提现困难)
15 | 
16 | {
17 |     "陆金所": {
18 |         "platUrl": "http://www.lufax.com",
19 |         "term": "6月标(89.5%)、4-5月标(4.4%)等",
20 |         "zonghezhishuRanking": 1,
21 |         "zonghezhishu": 73.21,
22 |         "onlineDate": "2012-01-22",
23 |         "locationCityName": "浦东",
24 |         "platEarnings": 8.11,
25 |         "platLogoUrl": "http://www.wdzj.com/wdzj/images/example/20140526141523.jpg",
26 |         "platStatus": 1,
27 |         "registeredCapital": 83667,
28 |         "locationAreaName": "上海",
29 |         "allPlatNamePin": "lujinsuo"
30 |     }
31 | }


--------------------------------------------------------------------------------
/others/后台1/get_wangdaizhijia_each_platform/get_display_detail/get_display_detail.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | 
 3 | all_plat = json.load(open('all_plat.json', 'r'))
 4 | # display_plat = json.load(open('../get_display_platform/display_platform.json'))
 5 | result = {}
 6 | #
 7 | for item in all_plat:
 8 |     temp = {}
 9 |     temp['allPlatNamePin'] = item['allPlatNamePin']
10 |     temp['locationAreaName'] = item['locationAreaName']
11 |     temp['locationCityName'] = item['locationCityName']
12 |     temp['onlineDate'] = item['onlineDate']
13 |     temp['platEarnings'] = item['platEarnings']
14 |     temp['platLogoUrl'] = item['platLogoUrl']
15 |     temp['platUrl'] = item['platUrl']
16 |     temp['registeredCapital'] = item['registeredCapital']
17 |     temp['term'] = item['term']
18 |     temp['zonghezhishu'] = item['zonghezhishu']
19 |     temp['zonghezhishuRanking'] = item['zonghezhishuRanking']
20 |     temp['platStatus'] = item['platStatus']
21 |     result[item['platName']] = temp
22 | 
23 | json.dump(result, open('all_platform_basic.json', 'w'))
24 | 


--------------------------------------------------------------------------------
/others/后台1/get_wangdaizhijia_each_platform/get_display_platform/check.py:
--------------------------------------------------------------------------------
1 | import json
2 | 
3 | data = json.load(open('display_platform.json', 'r'))
4 | print len(data)
5 | 


--------------------------------------------------------------------------------
/others/后台1/get_wangdaizhijia_each_platform/get_display_platform/display_platform.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | 
 3 | result = {}
 4 | plat = json.load(open('platform.json', 'r'))
 5 | for item in plat:
 6 |     result[item['platName']] = {'term': item['term'],
 7 |                                 'locationAreaName': item['locationAreaName'],
 8 |                                 'locationCityName': item['locationCityName'],
 9 |                                 'onlineDate': item['onlineDate'],
10 |                                 'platEarnings': item['platEarnings'],
11 |                                 'registeredCapital': item['registeredCapital'],
12 |                                 'rank': item['zonghezhishuRanking']
13 |                                 }
14 | 
15 | json.dump(result, open('display_platform.json', 'w'))
16 | 


--------------------------------------------------------------------------------
/others/后台1/get_wangdaizhijia_each_platform/get_display_platform/get_hot.py:
--------------------------------------------------------------------------------
 1 | import requests
 2 | import json
 3 | 
 4 | url = 'http://www.wdzj.com/front_select-plat'
 5 | header = {
 6 |     'Accept': 'application/json, text/javascript, */*; q=0.01',
 7 |     'Accept-Encoding': 'gzip, deflate',
 8 |     'Accept-Language': 'zh-CN,zh;q=0.8,en;q=0.6',
 9 |     'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
10 |     'Host': 'www.wdzj.com',
11 |     'Origin': 'http://www.wdzj.com',
12 |     'Referer': 'http://www.wdzj.com/dangan/',
13 |     'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.110 Safari/537.36'
14 | }
15 | result = []
16 | for i in range(1, 5):
17 |     print i
18 |     para = {
19 |         'params': '',
20 |         'sort': 'grade',
21 |         'currPage': i
22 |     }
23 |     result.extend(requests.post(url, headers=header, data=para).json()['list'])
24 | json.dump(result, open('platform.json', 'w'))
25 | 


--------------------------------------------------------------------------------
/others/后台1/get_wangdaizhijia_each_platform/get_display_platform/get_hotplat_charts.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | 
 3 | data = json.load(open('../platform_chart.json', 'r'))
 4 | 
 5 | name_id = {}
 6 | 
 7 | for item in json.load(open('platform_info.json')):
 8 |     name_id[item['platName']] = item['platId']
 9 | 
10 | result = {}
11 | 
12 | for item in json.load(open('display_platform.json', 'r')):
13 |     result[item] = data[name_id[item]]
14 | 
15 | json.dump(result, open('platform_chart.json', 'w'))
16 | 


--------------------------------------------------------------------------------
/others/后台1/get_wangdaizhijia_each_platform/get_display_platform/readme.txt:
--------------------------------------------------------------------------------
1 | 共有100个平台,可以按照rank字段排序
2 | 
3 | term : 投资期限
4 | locationAreaName : 城市
5 | locationCityName : 地区
6 | onlineDate : 上线时间
7 | platEarnings : 平均收益 (百分比)
8 | registeredCapital : 注册资本(万元)
9 | rank : 综合排名


--------------------------------------------------------------------------------
/others/后台1/get_wangdaizhijia_each_platform/get_platform_charts.py:
--------------------------------------------------------------------------------
 1 | import requests
 2 | import json
 3 | 
 4 | url = 'http://shuju.wdzj.com/wdzj-archives-chart.html?wdzjPlatId={}&type={}&status=0'
 5 | 
 6 | headers = {
 7 |     'Accept': 'application/json, text/javascript, */*; q=0.01',
 8 |     'Host': 'www.wdzj.com',
 9 |     'origin': 'http://www.wdzj.com',
10 |     'Referer': 'http://www.wdzj.com/dangan/tdw/',
11 |     'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.110 Safari/537.36'
12 | 
13 | }
14 | 
15 | result = {}
16 | 
17 | plat = json.load(open('platform_info.json', 'r'))
18 | index = 0
19 | for each in plat:
20 |     try:
21 |         print index
22 |         theid = each['platId']
23 |         result[theid] = {}
24 |         type0 = requests.get(url.format(theid, 0)).json()
25 |         type1 = requests.get(url.format(theid, 1)).json()
26 |         type2 = requests.get(url.format(theid, 3)).json()
27 |         result[theid]['0'] = {'x': type0['x'], 'y1': type0['y1'], 'y2': type0['y2']}
28 |         result[theid]['1'] = {'x': type1['x'], 'y1': type1['y1'], 'y2': type1['y2']}
29 |         result[theid]['2'] = {'x': type2['x'], 'y1': type2['y1'], 'y2': type2['y2']}
30 |         index += 1
31 |     except:
32 |         continue
33 | json.dump(result, open('platform_chart.json', 'w'))
34 | 


--------------------------------------------------------------------------------
/others/后台1/get_wangdaizhijia_each_platform/get_platform_review.py:
--------------------------------------------------------------------------------
 1 | import requests
 2 | import json
 3 | 
 4 | url = 'http://www.wdzj.com/front_plat-review-list'
 5 | 
 6 | headers = {
 7 |     'Accept': 'application/json, text/javascript, */*; q=0.01',
 8 |     'Host': 'www.wdzj.com',
 9 |     'origin': 'http://www.wdzj.com',
10 |     'Referer': 'http://www.wdzj.com/dangan/tdw/',
11 |     'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.110 Safari/537.36'
12 | 
13 | }
14 | 
15 | result = {}
16 | 
17 | plat = json.load(open('platform_search.json', 'r'))
18 | index = 0
19 | for each in plat:
20 |     try:
21 |         print index
22 |         result[each['platId']] = {}
23 |         review = requests.post(url, headers=headers, data={'platId': each['platId']}).json()[0]['platReview']
24 |         rowCount = review['rowCount']
25 |         pagecount = review['pageCount']
26 |         result[each['platId']]['rowCount'] = rowCount
27 |         result[each['platId']]['reviews'] = []
28 |         for page in range(pagecount):
29 |             data = {
30 |                 'platId': each['platId'],
31 |                 'currentPage': page + 1
32 |             }
33 |             reviews = requests.post(url, headers=headers, data=data).json()[0]['platReview']['reviewList']
34 |             result[each['platId']]['reviews'].extend(reviews)
35 |         index += 1
36 |     except Exception, e:
37 |         print e
38 |         index += 1
39 |         continue
40 | 
41 | json.dump(result, open('platform_reviews.json', 'w'))
42 | 


--------------------------------------------------------------------------------
/others/后台1/get_wangdaizhijia_each_platform/get_recent_news/check_data.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | import json
 3 | 
 4 | data = json.load(open('data_2.json', 'r'))
 5 | result = {}
 6 | for item in data:
 7 |     result[item] = []
 8 |     for each in data[item]:
 9 |         temp = []
10 |         if each['url'] not in temp and len(each['title']) > 1:
11 |             result[item].append({'url': each['url'], 'title': each['title']})
12 |         else:
13 |             continue
14 | 
15 | json.dump(result, open('data_3.json', 'w'))
16 | 


--------------------------------------------------------------------------------
/others/后台1/get_wangdaizhijia_each_platform/get_recent_news/filter.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | 
 3 | data = json.load(open('all_plat_wangdai_news.json', 'r'))
 4 | for each in data:
 5 |     for item in data[each]:
 6 |         if not item['url'].startswith('http'):
 7 |             item['url'] = 'http://' + item['url']
 8 | 
 9 | json.dump(data, open('all_plat_related_news.json', 'w'))
10 | 


--------------------------------------------------------------------------------
/others/后台1/get_wangdaizhijia_each_platform/get_recent_news/get_news.py:
--------------------------------------------------------------------------------
 1 | import requests
 2 | import json
 3 | from bs4 import BeautifulSoup
 4 | 
 5 | headers = {
 6 |     'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.110 Safari/537.36'
 7 | 
 8 | }
 9 | 
10 | result = {}
11 | 
12 | 
13 | def get_news(pinyin):
14 |     temp = []
15 |     req = requests.get('http://www.wdzj.com/dangan/{}/'.format(pinyin), headers=headers)
16 |     req.encoding = 'utf-8'
17 |     soup = BeautifulSoup(req.text, "html.parser")
18 |     try:
19 |         for item in soup.find_all('ul', attrs={'class': "newsList"}):
20 |             for each in item.find_all('a'):
21 |                 if 'http' in each['href']:
22 |                     the_url = each['href']
23 |                 else:
24 |                     the_url = 'www.wdzj.com' + each['href']
25 |                 temp.append({'url': the_url, 'title': each.text})
26 | 
27 |         return temp
28 |     except:
29 |         return temp
30 | 
31 | 
32 | for item in json.load(open('platform_search.json', 'r')):
33 |     print item['platName']
34 |     result[item['platName']] = get_news(item['platPin'])
35 | 
36 | json.dump(result, open('all_plat_wangdai_news.json', 'w'))
37 | 


--------------------------------------------------------------------------------
/others/后台1/get_wangdaizhijia_each_platform/get_recent_reviews/check.py:
--------------------------------------------------------------------------------
1 | import json
2 | 
3 | data = json.load(open('recent_reviews.json', 'r'))
4 | print len(data)
5 | 


--------------------------------------------------------------------------------
/others/后台1/get_wangdaizhijia_each_platform/get_recent_reviews/get_recent_review.py:
--------------------------------------------------------------------------------
 1 | import requests
 2 | import json
 3 | 
 4 | header = {
 5 |     'Accept': '*/*',
 6 |     'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
 7 |     'Host': 'www.wdzj.com',
 8 |     'Origin': 'http://www.wdzj.com',
 9 |     'Referer': 'http://www.wdzj.com/dangan/dianping/',
10 |     'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.110 Safari/537.36'
11 | 
12 | }
13 | 
14 | result = []
15 | for i in range(1, 10):
16 |     data = {
17 |         'orderType': 0,
18 |         'currentPage': i,
19 |         'allReview': 1
20 |     }
21 |     data = \
22 |         requests.post('http://www.wdzj.com/front_plat-review-list', headers=header, data=data).json()[0]['platReview'][
23 |             'reviewList']
24 |     result.extend(data)
25 | json.dump(result, open('raw_recent_reviews.json', 'w'))
26 | 


--------------------------------------------------------------------------------
/others/后台1/get_wangdaizhijia_each_platform/get_recent_reviews/recent_reviews_readme.txt:
--------------------------------------------------------------------------------
 1 | 文件为 网贷之家中,针对各平台的最近点评.
 2 | 
 3 | 每条点评格式为:
 4 | {
 5 |     'platName':点评平台名
 6 |     'evaluation' : 评价者态度(0-不推荐,1-一般,2-推荐)
 7 |     'reviewContent': 评价内容
 8 |     'label':评价标签
 9 |     'reviewUserName':用户名
10 |     'reviewDate':评价时间
11 | }
12 | 
13 | 
14 | 如下:
15 | {
16 |     "reviewContent": "平台不错，，都是月标，，投资2000赚了200多。因为注册送了145代金券，，然后春节期间领取到红包100。",
17 |     "reviewDate": "2016-04-14 14:40:18",
18 |     "label": [
19 |         "还不错"
20 |     ],
21 |     "reviewUserName": "8883662846",
22 |     "platName": "睿银财富",
23 |     "evaluation": "2"
24 | }
25 | 
26 | 建议:
27 | 可以考虑在平台档案页面做个最新滚动评论


--------------------------------------------------------------------------------
/others/后台1/get_wangdaizhijia_each_platform/get_recent_reviews/reviews_filter.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | 
 3 | result = []
 4 | 
 5 | data = json.load(open('raw_recent_reviews.json', 'r'))
 6 | for item in data:
 7 |     comment = {}
 8 |     labels = []
 9 |     if 'platReviewTagList' in item:
10 |         for each in item['platReviewTagList']:
11 |             if 'tagName' in each:
12 |                 labels.append(each['tagName'])
13 |     comment['reviewContent'] = item['reviewContent']
14 |     comment['platName'] = item['platName']
15 |     comment['evaluation'] = item['evaluation']
16 |     comment['reviewDate'] = item['reviewDate']
17 |     comment['reviewUserName'] = item['reviewUserName']
18 |     comment['label'] = labels
19 |     result.append(comment)
20 | 
21 | json.dump(result, open('recent_reviews.json', 'w'))
22 | 


--------------------------------------------------------------------------------
/others/后台1/get_wangdaizhijia_each_platform/get_valid_reviews/check.py:
--------------------------------------------------------------------------------
1 | # coding=utf-8
2 | import json
3 | 
4 | data = json.load(open('platform_reviews_v3.json', 'r'))
5 | print len(data)
6 | 


--------------------------------------------------------------------------------
/others/后台1/get_wangdaizhijia_each_platform/get_valid_reviews/filter.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | 
 3 | data = json.load(open('all_platform_reviews.json', 'r'))
 4 | for each in data:
 5 |     for item in data[each]['reviews']:
 6 |         temp = sorted(data[each]['reviews'][item]['labels'].items(), key=lambda x: x[1], reverse=True)
 7 |         temp = [temp_label[0] for temp_label in temp[:5]]
 8 |         data[each]['reviews'][item]['labels'] = temp
 9 |         # print each
10 |         # print data[each]['reviews'][item]['labels']
11 |         data[each]['reviews'][item]['comments'] = data[each]['reviews'][item]['comments'][:90]
12 |         print len(data[each]['reviews'][item]['comments'])
13 | json.dump(data, open('platform_reviews_v5.json', 'w'))
14 | 


--------------------------------------------------------------------------------
/others/后台1/get_wangdaizhijia_each_platform/get_valid_reviews/get_hotplat_reviews.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | 
 3 | new_plats = json.load(open('platform_search.json', 'r'))
 4 | # display_plat = json.load(open('display_platform.json', 'r'))
 5 | name_id = {}
 6 | for item in new_plats:
 7 |     # if item['platName'] in display_plat:
 8 |     name_id[item['platName']] = item['platId']
 9 | all_reviews = json.load(open('platform_reviews.json', 'r'))
10 | 
11 | result = {}
12 | for name in name_id:
13 |     result[name] = {
14 |         'reviews': {'0': {'labels': {}, 'comments': []}, '1': {'labels': {}, 'comments': []},
15 |                     '2': {'labels': {}, 'comments': []}}}
16 |     try:
17 |         for each in all_reviews[name_id[name]]['reviews']:
18 |             if 'platReviewTagList' in each:
19 |                 if len(each['platReviewTagList']):
20 |                     for item in each['platReviewTagList']:
21 |                         if 'tagName' in item:
22 |                             if 'amp' not in item['tagName']:
23 |                                 result[name]['reviews'][each['evaluation']]['labels'][item['tagName']] = \
24 |                                     result[name]['reviews'][each['evaluation']]['labels'].get(item['tagName'], 0) + 1
25 |             try:
26 |                 if not 'hellip' in each['reviewContent'] and not 'amp' in each['reviewContent']:
27 |                     result[name]['reviews'][each['evaluation']]['comments'].append(
28 |                         {'content': each['reviewContent'], 'date': each['reviewDate']})
29 |             except:
30 |                 continue
31 |     except:
32 |         continue
33 | 
34 | json.dump(result, open('all_platform_reviews.json', 'w'))
35 | 


--------------------------------------------------------------------------------
/others/后台1/get_wangdaizhijia_each_platform/get_valid_reviews/plat_form_reviews_v2_readme.txt:
--------------------------------------------------------------------------------
 1 | 更新的评论信息,格式为
 2 | 
 3 | {
 4 |     平台名称:{
 5 |         "reviews"{
 6 |             "0"(不推荐):{
 7 |                 "labels":{
 8 |                     "平台大":5(出现次数),
 9 |                     "提现快":6
10 |                     ...
11 |                 }
12 |                 "comments":[
13 |                     {
14 |                         "content":评论内容,
15 |                         "date":评论时间
16 |                     }
17 |                 ]
18 |             }
19 |             "1"(一般):{
20 |                 "labels":{},
21 |                 "comments":{}
22 |             }
23 |             "2"(推荐):{
24 |                 "labels":{},
25 |                 "comments":{}
26 |             }
27 |         }
28 |     }
29 | 
30 | }


--------------------------------------------------------------------------------
/others/后台1/get_wangdaizhijia_each_platform/platform_chart_readme.txt:
--------------------------------------------------------------------------------
 1 | 结构为:
 2 | 
 3 | {
 4 |     "99"(平台id):{
 5 |         "0"(利率和成交量信息):{
 6 |             x(横轴-时间轴):{
 7 |                 [
 8 |                     "2015-04-10",
 9 |                     ....
10 |                 ]
11 |             },
12 |             y1(利率):{
13 |                 [
14 |                     1053.71,
15 |                     ...
16 |                 ]
17 |             },
18 |             y2(成交量):{
19 |                 [
20 |                     84711.06,
21 |                     ...
22 |                 ]
23 |             }
24 |         }
25 |         "1"(待还款\资金净流入):{
26 |             x:{[]},
27 |             y1(待还款):{[]},
28 |             y2(资金净流入):{[]}
29 |         }
30 |         "2"(投资人数\借款人数):{
31 |             x:{[]},
32 |             y1(投资人数):{[]},
33 |             y2(借款人数):{[]}
34 |         }
35 |     }
36 | }


--------------------------------------------------------------------------------
/others/后台2/article_classify.py:
--------------------------------------------------------------------------------
 1 | #encoding=utf8
 2 | '''
 3 |     __author__ = 'Administrator'
 4 |     2016.4.14 01:05 first version
 5 |     资讯分类，类别包括：
 6 |         高层变动、新产品、平台跑路、提现困难、相关指标、
 7 | '''
 8 | 
 9 | import csv
10 | import json
11 | import time
12 | import re
13 | from string import punctuation,digits,letters,whitespace
14 | import sys
15 | import os
16 | import datetime
17 | import jieba
18 | import jieba.analyse
19 | import pandas as pd
20 | import types
21 | from pymongo import MongoClient
22 | client=MongoClient()
23 | reload(sys)
24 | sys.setdefaultencoding('utf-8')
25 | csv.field_size_limit(sys.maxint)
26 | 
27 | 
28 | 
29 | #tag 高层变动
30 | def getArticleTag(title, content):
31 |     recall_key_list = {
32 |         '高层变动':{'高层变动', '高管变动', '换人', '离职'},\
33 |         '新产品':{'新产品', '产品上市'},\
34 |         '平台跑路':{'平台跑路', '跑路'},\
35 |         '提现困难':{'提现困难', '无法兑付'},\
36 |         '平台融资':{'A轮融资', 'B轮融资',  'C轮融资',  'D轮融资',  'E轮融资', '估值', 'IPO', '上市'}\
37 |     }
38 |     tag_list = ''
39 |     for tag in recall_key_list:
40 |         for key in recall_key_list[tag]:
41 |             if title.find(key) != -1 or content.find(key) != -1:
42 |                 tag_list += tag + ','
43 |                 break
44 |     if tag_list != '':
45 |         tag_list = tag_list[:-1]
46 |         # print title, tag_list
47 |     return tag_list
48 | 
49 | 
50 | if __name__ == "__main__":
51 |     db = client.holmesdb
52 |     t_news = db.t_news_di
53 |     t_news_res = t_news.find()
54 |     for news in t_news_res:
55 |         title = news['title']
56 |         content = news['content']
57 |         flag = getArticleTag(title, content)
58 | 
59 | 


--------------------------------------------------------------------------------
/others/后台2/article_classify.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mindawei/p2p/b0771100b8d4740906a186d9668b7d662214409c/others/后台2/article_classify.pyc


--------------------------------------------------------------------------------
/others/后台2/article_data_loads_delta.py:
--------------------------------------------------------------------------------
  1 | #encoding=utf8
  2 | '''
  3 |     __author__ = 'Administrator'
  4 |     2016.3.3 20:30 first version
  5 |     加载文件
  6 | '''
  7 | 
  8 | import csv
  9 | import json
 10 | import time
 11 | import re
 12 | from string import punctuation,digits,letters,whitespace
 13 | import sys
 14 | import os
 15 | import datetime
 16 | from bson import ObjectId
 17 | import jieba
 18 | import jieba.analyse
 19 | import pandas as pd
 20 | import pymongo
 21 | from pymongo import MongoClient
 22 | client=MongoClient()
 23 | reload(sys)
 24 | sys.setdefaultencoding('utf-8')
 25 | csv.field_size_limit(sys.maxint)
 26 | 
 27 | 
 28 | 
 29 | def handleContent(string):
 30 |     """字符串处理，去标点符号，中文分词，return:unicode"""
 31 |     string = string.decode('utf-8')
 32 |     #针对自己的文本数据定制化修改
 33 |     string = string.replace("<p>", "").replace("</p>", "").replace("<strong>", "").replace("</strong>", "")
 34 |     string = string.replace("#r#", "\n").replace("#n#", "\n").replace("", "")
 35 |     string = string.replace(" ", "").replace("\n", "").replace("\t", " ")
 36 | 
 37 |     string = re.sub("[\s+\.\!\/_,$%^*(+\"\']+|[+——！：》，《”。“？、~@#￥%……&*（）]+".decode("utf-8"), "".decode("utf-8"),string)
 38 |     string = string.encode('utf-8')
 39 |     string = string.translate(None,punctuation+digits+letters+whitespace)
 40 |     return string
 41 | 
 42 | def getJsonFile(json_fname):
 43 |     json_file = file(json_fname, "r")
 44 |     json_vector = []
 45 |     for line in json_file:
 46 |          person_info = json.loads(line)
 47 |          json_vector.append(person_info)
 48 |     return json_vector
 49 | 
 50 | def getTable_maxID(mongodb_table, field):
 51 |     try:
 52 |         res = mongodb_table.find().sort(field, pymongo.DESCENDING)[0]
 53 |         if res == None:
 54 |             return 0
 55 |         return int(res[field])
 56 |     except:
 57 |         return 0
 58 | 
 59 | def insertDB(mongodb_table, line, cols):
 60 |     try:
 61 |         ori = cols[0]
 62 |         cols[0] = "_id"
 63 |         data = {}
 64 |         for i in xrange(0, len(cols)):
 65 |             data.setdefault(cols[i], line[i])
 66 |         mongodb_table.insert(data)
 67 |         cols[0] = ori
 68 |     except Exception :
 69 |          cols[0] = ori
 70 |          print Exception
 71 |          return
 72 | 
 73 | print "before load data", datetime.datetime.now()
 74 | # 数据加载
 75 | news_title_dict = {}
 76 | texts_news = []
 77 | texts_news_other = []
 78 | texts_ugc = []
 79 | texts_experts = []
 80 | texts_policy = []
 81 | texts_nlp_train = []
 82 | 
 83 | db = client.holmesdb
 84 | t_news = db.t_news_di
 85 | t_news.remove()
 86 | t_news_id = getTable_maxID(t_news, "_id")
 87 | #news & policy
 88 | columns = []
 89 | dir_list = ["./data/news"]
 90 | for dir in dir_list:
 91 |     print dir
 92 |     for fname in os.listdir(dir):
 93 |         if fname.find(".csv") != -1:
 94 |             fname = fname.decode("gbk")
 95 |             f_in = csv.reader(file(dir + r"/" + fname, "r"))
 96 |             lines = [line for line in f_in]
 97 |             if len(columns) == 0:
 98 |                 columns = lines[0]
 99 |             for line in lines[1:]:
100 |                 if len(line) < 14:
101 |                     continue
102 |                 if len(line) > 14:
103 |                     line = line[:14]
104 |                 texts_news.append(line)
105 |                 line[6] = line[6].replace("###r###", "\r").replace("###n###", "\n").replace("###t###", "\t")
106 |                 title = line[5]
107 |                 if title not in news_title_dict:
108 |                     news_title_dict.setdefault(title, 0)
109 |                     line[0] = str(t_news_id )
110 |                     t_news_id  += 1
111 |                     insertDB(t_news, line, columns)
112 | print len(news_title_dict)
113 | print columns
114 | data = pd.DataFrame(texts_news)
115 | data.columns = columns
116 | pd.to_pickle(data,  'data/news_dataset.pkl')
117 | 
118 | 


--------------------------------------------------------------------------------
/others/后台2/article_sentiment_extract.py:
--------------------------------------------------------------------------------
 1 | #encoding=utf8
 2 | '''
 3 |     __author__ = 'Administrator'
 4 |     2016.3.3 20:30 first version
 5 |     加载文件
 6 | '''
 7 | 
 8 | import csv
 9 | import json
10 | import time
11 | import re
12 | from string import punctuation,digits,letters,whitespace
13 | import sys
14 | import os
15 | import datetime
16 | from bson import ObjectId
17 | import jieba
18 | import jieba.analyse
19 | import pandas as pd
20 | from helper import sentiments_analyze as sa
21 | import pymongo
22 | from pymongo import MongoClient
23 | client=MongoClient()
24 | reload(sys)
25 | sys.setdefaultencoding('utf-8')
26 | csv.field_size_limit(sys.maxint)
27 | 
28 | 
29 | 
30 | def handleContent(string):
31 |     """字符串处理，去标点符号，中文分词，return:unicode"""
32 |     string = string.decode('utf-8')
33 |     #针对自己的文本数据定制化修改
34 |     string = string.replace("<p>", "").replace("</p>", "").replace("<strong>", "").replace("</strong>", "")
35 |     string = string.replace("#r#", "\n").replace("#n#", "\n").replace("", "")
36 |     string = string.replace(" ", "").replace("\n", "").replace("\t", " ")
37 | 
38 |     string = re.sub("[\s+\.\!\/_,$%^*(+\"\']+|[+——！：》，《”。“？、~@#￥%……&*（）]+".decode("utf-8"), "".decode("utf-8"),string)
39 |     string = string.encode('utf-8')
40 |     string = string.translate(None,punctuation+digits+letters+whitespace)
41 |     return string
42 | 
43 | def getJsonFile(json_fname):
44 |     json_file = file(json_fname, "r")
45 |     json_vector = []
46 |     for line in json_file:
47 |          person_info = json.loads(line)
48 |          json_vector.append(person_info)
49 |     return json_vector
50 | 
51 | def getTable_maxID(mongodb_table, field):
52 |     try:
53 |         res = mongodb_table.find().sort(field, pymongo.DESCENDING)[0]
54 |         if res == None:
55 |             return 0
56 |         return int(res[field])
57 |     except:
58 |         return 0
59 | 
60 | 
61 | print "before load data", datetime.datetime.now()
62 | # 数据加载
63 | news_title_dict = {}
64 | texts_news = []
65 | texts_news_other = []
66 | texts_ugc = []
67 | texts_experts = []
68 | texts_policy = []
69 | texts_nlp_train = []
70 | 
71 | #news & policy
72 | columns = []
73 | fname = u"./data/ugc_opinion_comment/用户点评-网贷之家-融360.csv"
74 | 
75 | f_in = csv.reader(file(fname, "r"))
76 | lines = [line for line in f_in]
77 | if len(columns) == 0:
78 |     columns = lines[0]
79 | for line in lines[1:]:
80 |     if len(line) < 14:
81 |         continue
82 |     if len(line) > 14:
83 |         line = line[:14]
84 |     line[6] = line[6].replace("###r###", "\r").replace("###n###", "\n").replace("###t###", "\t")
85 |     line[6] = line[6].replace("#r#", "\r").replace("#n#", "\n").replace("#t#", "\t")
86 |     title = line[5]
87 |     # print title
88 |     texts_news_other.append(line)
89 |     try:
90 |         print sa.single_review_sentiment_score(line[6])[:2], line[6]
91 |     except Exception:
92 |         pass
93 |     texts_ugc.append(line)
94 | 
95 |     texts_nlp_train.append(line)
96 | 


--------------------------------------------------------------------------------
/others/后台2/bad_platform_healthscore.py:
--------------------------------------------------------------------------------
  1 | #encoding=utf8
  2 | '''
  3 |     __author__ = 'Administrator'
  4 |     2016.4.2 14:11 first version
  5 |     问题平台分析
  6 | '''
  7 | 
  8 | import csv
  9 | import json
 10 | import time
 11 | import re
 12 | from string import punctuation,digits,letters,whitespace
 13 | import sys
 14 | import os
 15 | import datetime
 16 | import jieba
 17 | import jieba.analyse
 18 | import pandas as pd
 19 | import types
 20 | from pymongo import MongoClient
 21 | from helper import myio
 22 | 
 23 | client=MongoClient()
 24 | reload(sys)
 25 | sys.setdefaultencoding('utf-8')
 26 | csv.field_size_limit(sys.maxint)
 27 | 
 28 | 
 29 | 
 30 | def handleContent(string):
 31 |     """字符串处理，去标点符号，中文分词，return:unicode"""
 32 |     string = string.decode('utf-8')
 33 |     #针对自己的文本数据定制化修改
 34 |     string = string.replace("<p>", "").replace("</p>", "").replace("<strong>", "").replace("</strong>", "")
 35 |     string = string.replace("#r#", "\n").replace("#n#", "\n").replace("", "")
 36 |     string = string.replace(" ", "").replace("\n", "").replace("\t", " ")
 37 | 
 38 |     string = re.sub("[\s+\.\!\/_,$%^*(+\"\']+|[+——！：》，《”。“？、~@#￥%……&*（）]+".decode("utf-8"), "".decode("utf-8"),string)
 39 |     string = string.encode('utf-8')
 40 |     string = string.translate(None,punctuation+digits+letters+whitespace)
 41 |     return string
 42 | def getJsonFile_line(json_fname):
 43 |     json_file = file(json_fname, "r")
 44 |     json_vector = []
 45 |     for line in json_file:
 46 |          person_info = json.loads(line)
 47 |          json_vector.append(person_info)
 48 |     return json_vector
 49 | def getJsonFile_all(json_fname):
 50 |     json_file = open(json_fname, "r")
 51 |     dict = json.load(json_file)
 52 |     json_file.close()
 53 |     return dict
 54 | 
 55 | print "before load bad company datas", datetime.datetime.now()
 56 | db = client.holmesdb
 57 | t_news = db.t_news_di
 58 | t_policy = db.t_policy_di
 59 | t_ugc = db.t_ugc_di
 60 | t_expert = db.t_expert_opinion_di
 61 | t_news_caixin = db.t_news_caixin_di
 62 | news_res = t_news.find()
 63 | policy_res = t_policy.find()
 64 | ugc_res = t_ugc.find()
 65 | expert_res = t_expert.find()
 66 | 
 67 | bad_company_2015 = getJsonFile_all("./data/bad_platform/bad_platform_2015.json")
 68 | print "end load bad company datas", datetime.datetime.now()
 69 | 
 70 | 
 71 | article_res = [news_res, policy_res, expert_res, ugc_res]
 72 | for data_set in article_res:
 73 |     print data_set.count(),
 74 | print ""
 75 | key = ["news", "policy", "expert", "ugc"]
 76 | month_summary = {}
 77 | for i in xrange(len(article_res)):
 78 |     for res in article_res[i]:
 79 |         if res['item_pub_time'] >= '2014-01-01' and res['item_pub_time'] <= '2015-12-31':
 80 |             title = res['title']
 81 |             content = res['content']
 82 |             t = res['item_pub_time']
 83 |             m = t[0:7].replace("-", ".")
 84 |             date = t[5:].replace("-", ".").split(" ")[0]
 85 |             for pjson in bad_company_2015:
 86 |                 pname = pjson['platform_name']
 87 |                 if content.find(pname) != -1 or content.find(pname) != -1:
 88 |                     month_summary[pname][m] = month_summary.setdefault(pname, {}).setdefault(m, 0) + 1
 89 |                     #print pname, m, month_summary[pname][m]
 90 | bad_platform_trend = {}
 91 | for pjson in bad_company_2015:
 92 |     pname = pjson['platform_name']
 93 |     pro_date = pjson['problem_time']
 94 |     last_year_date = "2014." + pro_date[5:7]
 95 |     sum_cnt = 0
 96 |     p_trend = []
 97 |     if pname in month_summary:
 98 |         month_data = sorted(month_summary[pname].items(), lambda a, b: cmp(a[0], b[0]))
 99 |         for (m, cnt) in month_data:
100 |             print pname, m, pro_date, last_year_date
101 |             if m >= last_year_date and m < pro_date:
102 |                 try:
103 |                     y1, m1 = int(m[:4]), int(m[5:])
104 |                     y2, m2 = int(pro_date[:4]), int(pro_date[5:])
105 |                     delta = (y2 - y1) * 12 + m2  - m1
106 |                     sum_cnt += cnt
107 |                     p_trend.append((delta, cnt))
108 |                 except Exception:
109 |                     print Exception
110 |                     continue
111 |         if sum_cnt >= 20:
112 |             print pname, pro_date, p_trend
113 |             bad_platform_trend.setdefault(pname, [pro_date, p_trend])
114 | myio.writeJsonDict(bad_platform_trend, open("./data/bad_platform/bad_platform_trend.json", "w"), "rows")
115 | print "end analyze bad company datas", datetime.datetime.now()
116 | 
117 | 
118 | 


--------------------------------------------------------------------------------
/others/后台2/helper/__init__.py:
--------------------------------------------------------------------------------
1 | __author__ = 'Administrator'
2 | 


--------------------------------------------------------------------------------
/others/后台2/helper/__init__.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mindawei/p2p/b0771100b8d4740906a186d9668b7d662214409c/others/后台2/helper/__init__.pyc


--------------------------------------------------------------------------------
/others/后台2/helper/get_finance_nouns.py:
--------------------------------------------------------------------------------
 1 | #encoding=utf8
 2 | '''
 3 |     __author__ = 'Administrator'
 4 |     2016.2.27 20:30 first version
 5 |     分词、提取关键字、提取文章主题
 6 | '''
 7 | 
 8 | 
 9 | import sys
10 | reload(sys)
11 | sys.setdefaultencoding('utf-8')
12 | 
13 | 
14 | f_out = open("data/finance_words/eco_out.txt", "w")
15 | word_dict = {}
16 | for line in open("data/finance_words/eco.txt", "r"):
17 |     #print "###" ,line
18 |     if line.find("、") != -1:
19 |         if line[line.find("、"):line.find("（") ].find(" ") != -1:
20 |             line = line[line.find(" ")+ 1:line.find("（") ]
21 |         else:
22 |             line = line[line.find("、") + len("、"):line.find("（") ]
23 |         flag = 0
24 |         for c in line:
25 |             if c.isalpha() == True:
26 |                 flag = 1
27 |         if flag == 0 and len(line) > 1 and len(line) < 30:
28 |             word_dict.setdefault(line, 0)
29 | 
30 | for line in open("data/finance_words/p2p_sentence.txt", "r"):
31 |     if line.find("：") != -1:
32 |         line = line[:line.find("：")]
33 |         while line[0] == " ": line = line[1:]
34 |         while line[-1] == " ": line = line[:-1]
35 |         while line[:len("　")] == "　": line = line[len("　"):]
36 |         print line
37 |         m = 0
38 |         sep_list  = [".","．","、"," "]
39 |         for sep in sep_list:
40 |             if line.find(sep) != -1:
41 |                 m = max(m, line.find(sep) + len(sep))
42 |         if m > 0:
43 |             if line.find("（") == -1:
44 |                 line = line[m:]
45 |         if line.find("（") != -1:
46 |             line = line[:line.find("（")]
47 |         while line[0] == " ": line = line[1:]
48 |         while line[-1] == " ": line = line[:-1]
49 |         while line[:len("　")] == "　": line = line[len("　"):]
50 |         if len(line) > 1 and len(line) < 30:
51 |             word_dict.setdefault(line, 0)
52 | 
53 | for key in word_dict:
54 |     f_out.write( "%s 100\n" %(key))
55 | 
56 | 


--------------------------------------------------------------------------------
/others/后台2/helper/mongoDB_process.py:
--------------------------------------------------------------------------------
 1 | #encoding=utf8
 2 | '''
 3 |     __author__ = 'Administrator'
 4 |     2016.3.3 20:30 first version
 5 |     加载文件
 6 | '''
 7 | 
 8 | import csv
 9 | import json
10 | import time
11 | import re
12 | from string import punctuation,digits,letters,whitespace
13 | import sys
14 | import os
15 | import datetime
16 | from bson import ObjectId
17 | import jieba
18 | import jieba.analyse
19 | import pandas as pd
20 | import pymongo
21 | from pymongo import MongoClient
22 | client=MongoClient()
23 | db = client.holmesdb
24 | 
25 | 
26 | 
27 | def insertDB(mongodb_table, line, columns):
28 |     try:
29 |         ori = columns[0]
30 |         columns[0] = "_id"
31 |         data = {}
32 |         for i in xrange(0, len(columns)):
33 |             data.setdefault(columns[i], line[i])
34 |         mongodb_table.insert(data)
35 |         columns[0] = ori
36 |     except Exception :
37 |          columns[0] = ori
38 |          print Exception
39 |          return
40 | 
41 | 


--------------------------------------------------------------------------------
/others/后台2/helper/myio.py:
--------------------------------------------------------------------------------
 1 | #encoding=utf8
 2 | import csv
 3 | import json
 4 | import time
 5 | import re
 6 | from string import punctuation,digits,letters,whitespace
 7 | import sys
 8 | import datetime
 9 | import jieba
10 | import jieba.analyse
11 | import math
12 | import pandas as pd
13 | from gensim import corpora,models
14 | from helper.textprocessing import handleContent
15 | from pymongo import MongoClient
16 | client=MongoClient()
17 | reload(sys)
18 | sys.setdefaultencoding('utf-8')
19 | 
20 | 
21 | def writeJsonDict(person, f_out):
22 |     outStr = json.dumps(person, ensure_ascii = False)        		#处理完之后重新转为Json格式
23 |     f_out.write(outStr.encode('utf-8') + '\n')          			#写回到一个新的Json文件中去
24 | 
25 | def writeJsonDict(person, f_out, row_type=None):
26 |     row_flag = 1 if row_type == "rows" else None
27 |     outStr = json.dumps(person, ensure_ascii = False, indent=row_flag)        		 #处理完之后重新转为Json格式
28 |     f_out.write(outStr.encode('utf-8') + '\n')          			                 #写回到一个新的Json文件中去
29 | 
30 | def getJsonFile_line(json_fname):
31 |     json_file = file(json_fname, "r")
32 |     json_vector = []
33 |     for line in json_file:
34 |          person_info = json.loads(line)
35 |          json_vector.append(person_info)
36 |     return json_vector
37 | 
38 | def getJsonFile_all(json_fname):
39 |     json_file = open(json_fname, "r")
40 |     dict = json.load(json_file)
41 |     json_file.close()
42 |     return dict
43 | 


--------------------------------------------------------------------------------
/others/后台2/helper/myio.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mindawei/p2p/b0771100b8d4740906a186d9668b7d662214409c/others/后台2/helper/myio.pyc


--------------------------------------------------------------------------------
/others/后台2/helper/nlp_model.py:
--------------------------------------------------------------------------------
 1 | #encoding=utf8
 2 | '''
 3 |     __author__ = 'Administrator'
 4 |     2016.3.13 16:30 first version
 5 |     分词、提取关键字、提取每天热门关键字
 6 | '''
 7 | 
 8 | import csv
 9 | import json
10 | import time
11 | import re
12 | from string import punctuation,digits,letters,whitespace
13 | import sys
14 | import datetime
15 | 
16 | import jieba
17 | import jieba.analyse
18 | import math
19 | import pandas as pd
20 | from gensim import corpora,models
21 | from helper.textprocessing import handleContent
22 | from pymongo import MongoClient
23 | reload(sys)
24 | sys.setdefaultencoding('utf-8')
25 | 
26 | 
27 | 
28 | def handleContent(string):
29 |     """字符串处理，去标点符号，中文分词，return:unicode"""
30 |     string = string.decode('utf-8')
31 |     #针对自己的文本数据定制化修改
32 |     string = string.replace("<p>", "").replace("</p>", "").replace("<strong>", "").replace("</strong>", "")
33 |     string = string.replace("#r#", "\r").replace("#n#", "\n").replace("#t#", "\t")
34 |     string = re.sub("[\s+\.\!\/_,$%^*(+\"\']+|[+——！：》，《”。“？、~@#￥%……&*（）]+".decode("utf-8"), "".decode("utf-8"),string)
35 |     string = string.encode('utf-8')
36 |     string = string.translate(None,punctuation+whitespace)
37 |     return string
38 | 
39 | def getKeyword_times(pd_docs, time_begin, time_end):
40 |     time_begin = datetime.datetime.strftime(time_begin, '%Y-%m-%d')
41 |     time_end = datetime.datetime.strftime(time_end, '%Y-%m-%d')
42 |     docs = pd_docs[pd_docs.item_pub_time >= time_begin]
43 |     docs = docs[docs.item_pub_time <= time_end]
44 |     keyword_dict = {}
45 |     for i in xrange(0, len(docs)):
46 |         title = docs.iloc[i]["title"]
47 |         title = handleContent(title)
48 |         title_keyword =  list(jieba.cut(title, cut_all=False))
49 |         content = docs.iloc[i]["content"]
50 |         content = handleContent(content)
51 |         cont_keyword = jieba.analyse.extract_tags(content, topK = 100)
52 |         for kw in title_keyword:
53 |             if  kw.isdigit() == True or len(kw) <= 1:
54 |                 continue
55 |             keyword_dict[kw] = keyword_dict.setdefault(kw, 0) + 2
56 |         for kw in cont_keyword:
57 |             if  kw.isdigit() == True or len(kw) <= 1:
58 |                 continue
59 |             keyword_dict[kw] = keyword_dict.setdefault(kw, 0) + 1
60 |     return sorted(keyword_dict.items(), lambda a,b:-cmp(a[1], b[1]))


--------------------------------------------------------------------------------
/others/后台2/helper/nlp_model.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mindawei/p2p/b0771100b8d4740906a186d9668b7d662214409c/others/后台2/helper/nlp_model.pyc


--------------------------------------------------------------------------------
/others/后台2/helper/sentiments_analyze.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mindawei/p2p/b0771100b8d4740906a186d9668b7d662214409c/others/后台2/helper/sentiments_analyze.pyc


--------------------------------------------------------------------------------
/others/后台2/helper/textprocessing.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mindawei/p2p/b0771100b8d4740906a186d9668b7d662214409c/others/后台2/helper/textprocessing.pyc


--------------------------------------------------------------------------------
/others/后台2/industry_analyze.py:
--------------------------------------------------------------------------------
 1 | #encoding=utf8
 2 | '''
 3 |     __author__ = 'Administrator'
 4 |     2016.3.6 18:26 first version
 5 |     行业的所有数据汇总，并存入数据库holmesdb
 6 | '''
 7 | 
 8 | import csv
 9 | import json
10 | import time
11 | import re
12 | from string import punctuation,digits,letters,whitespace
13 | import sys
14 | import datetime
15 | 
16 | import jieba
17 | import jieba.analyse
18 | import pandas as pd
19 | from gensim import corpora,models
20 | from helper.textprocessing import handleContent
21 | from pymongo import MongoClient
22 | client=MongoClient()
23 | reload(sys)
24 | sys.setdefaultencoding('utf-8')
25 | jieba.load_userdict("C:/Python27/Lib/site-packages/jieba-0.37-py2.7.egg/jieba/financedict.txt")
26 | 
27 | 
28 | 
29 | area_data = pd.read_csv("./data/platform_company/industry_areas.csv")
30 | db = client.holmesdb
31 | t_sh_industry_areas = db.t_sh_industry_areas
32 | for i in xrange(len(area_data)):
33 |     kw_data = {}
34 |     for col in area_data.columns:
35 |         kw_data.setdefault(col, area_data.iloc[i][col])
36 |     if t_sh_industry_areas.find_one(kw_data) == None:
37 |         id = t_sh_industry_areas.insert_one(kw_data).inserted_id
38 |     
39 | class_data = pd.read_csv("./data/platform_company/industry_class.csv")
40 | db = client.holmesdb
41 | t_sh_industry_class = db.t_sh_industry_class
42 | for i in xrange(len(class_data)):
43 |     kw_data = {}
44 |     for col in class_data.columns:
45 |         kw_data.setdefault(col, class_data.iloc[i][col])
46 |     if t_sh_industry_class.find_one(kw_data) == None:
47 |         id = t_sh_industry_class.insert_one(kw_data).inserted_id
48 | 
49 | interest_data = pd.read_csv("./data/platform_company/industry_interest.csv")
50 | db = client.holmesdb
51 | t_sh_industry_interest = db.t_sh_industry_interest
52 | for i in xrange(len(interest_data)):
53 |     kw_data = {}
54 |     for col in interest_data.columns:
55 |         kw_data.setdefault(col, interest_data.iloc[i][col])
56 |     if t_sh_industry_interest.find_one(kw_data) == None:
57 |         id = t_sh_industry_interest.insert_one(kw_data).inserted_id


--------------------------------------------------------------------------------
/others/后台2/knowledge_graph_build.py:
--------------------------------------------------------------------------------
  1 | #encoding=utf8
  2 | '''
  3 |     __author__ = 'Administrator'
  4 |     2016.4.17 01:08 first version
  5 |     构建知识图谱，pipeline
  6 |     1、词性标注
  7 |     2、歧义消除
  8 |     3、关系抽取
  9 |     4、知识推理
 10 |     5、知识表示
 11 | '''
 12 | 
 13 | import csv
 14 | import json
 15 | import time
 16 | import re
 17 | from string import punctuation,digits,letters,whitespace
 18 | import sys
 19 | import datetime
 20 | from helper import myio
 21 | import jieba
 22 | import jieba.analyse
 23 | import jieba.posseg as pseg
 24 | import math
 25 | import pandas as pd
 26 | from gensim import corpora,models
 27 | import helper.textprocessing as tp
 28 | from helper.textprocessing import handleContent
 29 | from pymongo import MongoClient
 30 | client=MongoClient()
 31 | reload(sys)
 32 | sys.setdefaultencoding('utf-8')
 33 | 
 34 | knowledge_graph_dir = "./data/knowledge_graph/"
 35 | def getLastNameDict():
 36 |     last_name_dict ={}
 37 |     name_vec = [line.strip().split(" ") for line in open(knowledge_graph_dir + u"中国姓.txt")]
 38 |     for vec in name_vec:
 39 |         if len(vec) > 1:
 40 |             for v in vec:
 41 |                 last_name_dict.setdefault(v, 0)
 42 |     return last_name_dict
 43 | 
 44 | def extractEntity():
 45 |     db = client.holmesdb
 46 |     t_news = db.t_news_di
 47 |     res_list = t_news.find()
 48 |     last_name_dict = getLastNameDict()
 49 | 
 50 |     ntoken_dict = {}
 51 |     institute_dict = {}
 52 |     location_dict = {}
 53 |     people_dict = {}
 54 | 
 55 |     row_cnt = 0
 56 |     for res in res_list:
 57 |         row_cnt += 1
 58 |         # if row_cnt >= 2000: break
 59 |         title = res["title"]
 60 |         content = res["content"]
 61 |         doc = myio.handleContent(title) + " " + myio.handleContent(content)
 62 |         words = pseg.cut(doc)
 63 |         for (word, flag) in words:
 64 |             if flag.find("n") != -1:
 65 |                 if len(word) == 1:
 66 |                     continue
 67 |                 word1 = word[0].encode("utf-8")
 68 |                 word2 = word[:2].encode("utf-8")
 69 |                 if word1 in last_name_dict or word2 in last_name_dict:
 70 |                     #print word[0], word[:2]
 71 |                     people_dict[word] = people_dict.setdefault(word, 0) + 1
 72 |                 if flag.find("t") != -1 or flag.find("r") != -1:
 73 |                     institute_dict[word] = institute_dict.setdefault(word, 0) + 1
 74 |                 if  flag.find("s") != -1:
 75 |                     location_dict[word] = location_dict.setdefault(word, 0) + 1
 76 |                 #print w.word, w.flag
 77 |                 ntoken_dict[word] = ntoken_dict.setdefault(word, 0) + 1
 78 |     ntoken_list = sorted(ntoken_dict.items(), lambda a, b: -cmp(a[1], b[1]))
 79 |     people_list = sorted(people_dict.items(), lambda a, b: -cmp(a[1], b[1]))
 80 |     institute_list = sorted(institute_dict.items(), lambda a, b: -cmp(a[1], b[1]))
 81 |     location_list = sorted(location_dict.items(), lambda a, b: -cmp(a[1], b[1]))
 82 | 
 83 |     f_ntoken = open(knowledge_graph_dir + "news_ntoken.txt", "w")
 84 |     f_peo = open(knowledge_graph_dir + "news_people.txt", "w")
 85 |     f_ins = open(knowledge_graph_dir + "news_institute.txt", "w")
 86 |     f_loc = open(knowledge_graph_dir + "news_location.txt", "w")
 87 |     for (word, freq) in ntoken_list:
 88 |         print word, freq
 89 |         f_ntoken.write("%s\n"%word)
 90 |     for (word, freq) in institute_list:
 91 |         print word, freq
 92 |         f_ins.write("%s\n"%word)
 93 |     for (word, freq) in location_list:
 94 |         print word, freq
 95 |         f_loc.write("%s\n"%word)
 96 |     for (word, freq) in people_list:
 97 |         print word, freq
 98 |         f_peo.write("%s\n"%word)
 99 | 
100 | def extractRelation():
101 |     db = client.holmesdb
102 |     t_news = db.t_news_di
103 |     res_list = t_news.find()
104 | 
105 |     pair3_dict = {}
106 |     pair2_dict = {}
107 |     row_cnt = 0
108 |     for res in res_list:
109 |         row_cnt += 1
110 |         if row_cnt >= 20000: break
111 |         title = res["title"]
112 |         if title.find(u"要不要打破刚性兑付？") != -1:
113 |             continue
114 |         content = res["content"]
115 |         content_sen = tp.cut_sentence_2(content)
116 |         sentence_list = [title] + content_sen
117 |         for sen in sentence_list:
118 |             sen = myio.handleContent(sen)
119 |             if len(sen) < 5: continue
120 |             if sen.find(u"尹许尹") != -1:
121 |                 print title
122 |                 print content
123 |                 print sen
124 |             words = pseg.cut(sen)
125 |             ntoken_list = []
126 |             for (word, flag) in words:
127 |                 if flag.find("n") != -1 and (flag.find("r") != -1 or flag.find("s") != -1 or flag.find("t") != -1):
128 |                     ntoken_list.append(word)
129 |             for i in xrange(len(ntoken_list) - 1):
130 |                 for j in xrange(i+1, len(ntoken_list)):
131 |                     if ntoken_list[i] == ntoken_list[j]:
132 |                         continue
133 |                     pair2 = (ntoken_list[i], ntoken_list[j])
134 |                     pair2_dict[pair2] = pair2_dict.setdefault(pair2, 0) + 1
135 |                     for k in xrange(j+1, len(ntoken_list)):
136 |                         if ntoken_list[i] == ntoken_list[k] or ntoken_list[j] == ntoken_list[k]:
137 |                             continue
138 |                         pair3 = (ntoken_list[i], ntoken_list[j], ntoken_list[k])
139 |                         pair3_dict[pair3] = pair3_dict.setdefault(pair3, 0) + 1
140 |     pair2_list = sorted(pair2_dict.items(), lambda a,b: -cmp(a[1], b[1]))
141 |     # for (w1, w2) in pair2_list:
142 |     #     print w1[0], w1[1],  w2
143 |     pair3_list = sorted(pair3_dict.items(), lambda a,b: -cmp(a[1], b[1]))
144 |     # for (w1, w2) in pair3_list:
145 |     #     print w1[0], w1[1], w1[2], w2
146 |     f_rel = open(knowledge_graph_dir + "news_relation.txt", "w")
147 |     for (w1, w2) in pair2_list[:500000]:
148 |         f_rel.write("%s %s\n"%(w1[0], w1[1]))
149 |     for (w1, w2) in pair3_list[:3000000]:
150 |         f_rel.write("%s %s %s\n"%(w1[0], w1[1], w1[2]))
151 | 
152 | if __name__ == "__main__":
153 |     # pipeline step1
154 |     # extractEntity()
155 |     # pipeline step3
156 |     extractRelation()


--------------------------------------------------------------------------------
/others/后台2/mongoDB_Test.py:
--------------------------------------------------------------------------------
 1 | #encoder=utf8
 2 | 
 3 | 
 4 | from pymongo import MongoClient
 5 | 
 6 | client=MongoClient()
 7 | 
 8 | 
 9 | db = client.holmesdb
10 | 
11 | data =  db.t_sh_industry_keywords.find()
12 | for d in data:
13 |     for k in d:
14 |         if k.find("hot") != -1:
15 |             print d["dt"], k, d[k]
16 | 
17 | 


--------------------------------------------------------------------------------
/others/后台2/platform_article_keys_extract.py:
--------------------------------------------------------------------------------
  1 | #encoding=utf8
  2 | '''
  3 |     __author__ = 'Administrator'
  4 |     2016.3.3 20:30 first version
  5 |     加载文件
  6 | '''
  7 | import csv
  8 | import json
  9 | import os
 10 | import time
 11 | import re
 12 | from string import punctuation,digits,letters,whitespace
 13 | import sys
 14 | import datetime
 15 | from helper import myio
 16 | import jieba
 17 | import jieba.analyse
 18 | import math
 19 | from helper import myio
 20 | import pandas as pd
 21 | from gensim import corpora,models
 22 | from helper.textprocessing import handleContent
 23 | from pymongo import MongoClient
 24 | client=MongoClient()
 25 | db = client.holmesdb
 26 | reload(sys)
 27 | sys.setdefaultencoding('utf-8')
 28 | stop_dict = {}
 29 | for line in open("C:\Python27\Lib\site-packages\jieba-0.37-py2.7.egg\jieba\stop_chinese.txt"):
 30 |     stop_dict.setdefault(line.strip(), 0)
 31 | 
 32 | 
 33 | platform_json = myio.getJsonFile_all('./data/platform_company/display_platform.json')
 34 | platform_name_list = platform_json.keys()
 35 | 
 36 | 
 37 | print "before load date", datetime.datetime.now()
 38 | news_dataset = pd.read_pickle("./data/news_dataset.pkl")
 39 | all_dataset = news_dataset
 40 | all_doc_cnt = len(news_dataset)
 41 | print "end load date", datetime.datetime.now()
 42 | 
 43 | 
 44 | print "before cut segments", datetime.datetime.now()
 45 | # 分词，关键字提取
 46 | df_dict = {}
 47 | for i in xrange(0, all_doc_cnt):#len(all_dataset)):
 48 |     title = all_dataset.iloc[i]['title']
 49 |     content = all_dataset.iloc[i]['content']
 50 |     item_pub_time = all_dataset.iloc[i]['item_pub_time']
 51 |     doc =  handleContent(title) + " " + handleContent(content)
 52 |     tokens = list(jieba.cut(doc))
 53 |     new_tokens = []
 54 | 
 55 |     token_dict_delta = {}
 56 |     for i in xrange( len(tokens) ):
 57 |         if tokens[i].isdigit() == True or len(tokens[i]) <= 1\
 58 |             or (tokens[i].isalnum() == True and len(tokens[i]) > 20):
 59 |              #print tokens[i],
 60 |              continue
 61 |         if tokens[i] in stop_dict:#去停用词
 62 |             continue
 63 |         new_tokens.append(tokens[i])
 64 |         token_dict_delta.setdefault(tokens[i], 0)
 65 |     for token in token_dict_delta:
 66 |         df_dict[token] = df_dict.setdefault(token, 0) + 1
 67 | print "all word cnt:" , len(df_dict)
 68 | t_word_df_dd = db.t_word_df_dd
 69 | t_word_df_dd.remove()
 70 | for token in df_dict:
 71 |     t_word_df_dd.insert({"word":token, "df":df_dict[token]})
 72 | 
 73 | t_word_df_dd = db.t_word_df_dd
 74 | t_news = db.t_news_di
 75 | news_res = t_news.find({"item_pub_time": {"$lt": '2016-01-05', "$gt": '2015-11-25'}})
 76 | print "month news cnt:", news_res.count()
 77 | platform_key_dict = {}
 78 | for news in news_res:
 79 |     title = news['title']
 80 |     content = news['content']
 81 |     doc =  handleContent(title) + " " + handleContent(content)
 82 |     tokens = list(jieba.cut(doc))
 83 |     token_dict = {}
 84 |     for i in xrange( len(tokens) ):
 85 |         if tokens[i].isdigit() == True or len(tokens[i]) <= 1\
 86 |                 or (tokens[i].isalnum() == True and len(tokens[i]) > 20):
 87 |              #print tokens[i],
 88 |              continue
 89 |         if tokens[i] in stop_dict:#去停用词
 90 |             continue
 91 |         token_dict[tokens[i]] = token_dict.setdefault(tokens[i], 0) + 1
 92 |     token_w_list = []
 93 |     for token in token_dict:
 94 |         tf = token_dict[token]
 95 |         # df = t_word_df_dd.find_one({"word":token})
 96 |         # df = df["df"] if df != None else 0
 97 |         df = df_dict[token] if token in df_dict else 0
 98 |         tfidf = math.log(1+tf) * math.log((1+all_doc_cnt) * 1.0 / (1+df))
 99 |         token_w_list.append((token, tfidf))
100 |     news_key_list = sorted(token_w_list, lambda a,b: -cmp(a[1], b[1]))[:20]
101 |     p_cnt = 0
102 |     for pname in platform_name_list:
103 |         if doc.find(pname) != -1:
104 |             p_cnt += 1
105 |     for pname in platform_name_list:
106 |         if title.find(pname) != -1:
107 |             for (key, w) in news_key_list:
108 |                 if key != pname:
109 |                     #print pname, key, w
110 |                     platform_key_dict[pname][key] = platform_key_dict.setdefault(pname, {}).setdefault(key, 0) + w * 1.0 / p_cnt
111 | 
112 | platform_key_month12 = {}
113 | f_path = "./data/platform_company/platform_news_keywords"
114 | for pname in platform_name_list:
115 |     if pname in platform_key_dict:
116 |         hot_key_list = sorted(platform_key_dict[pname].items(), lambda a,b: -cmp(a[1], b[1]))[:50]
117 |         print pname
118 |         for (hot_key, w) in hot_key_list:
119 |             print hot_key,
120 |         print ""
121 |         platform_key_month12.setdefault(pname, hot_key_list)
122 | myio.writeJsonDict(platform_key_month12, open(f_path, "w"), "rows")
123 | 
124 | 


--------------------------------------------------------------------------------
/others/后台2/platform_data_loads.py:
--------------------------------------------------------------------------------
  1 | #encoding=utf8
  2 | '''
  3 |     __author__ = 'Administrator'
  4 |     2016.3.3 20:30 first version
  5 |     加载文件
  6 | '''
  7 | 
  8 | import csv
  9 | import json
 10 | import time
 11 | import re
 12 | from string import punctuation,digits,letters,whitespace
 13 | import sys
 14 | import os
 15 | import datetime
 16 | import jieba
 17 | import jieba.analyse
 18 | import pandas as pd
 19 | from pymongo import MongoClient
 20 | client=MongoClient()
 21 | reload(sys)
 22 | sys.setdefaultencoding('utf-8')
 23 | csv.field_size_limit(sys.maxint)
 24 | 
 25 | 
 26 | 
 27 | def handleContent(string):
 28 |     """字符串处理，去标点符号，中文分词，return:unicode"""
 29 |     string = string.decode('utf-8')
 30 |     #针对自己的文本数据定制化修改
 31 |     string = string.replace("<p>", "").replace("</p>", "").replace("<strong>", "").replace("</strong>", "")
 32 |     string = string.replace("#r#", "\n").replace("#n#", "\n").replace("", "")
 33 |     string = string.replace(" ", "").replace("\n", "").replace("\t", " ")
 34 | 
 35 |     string = re.sub("[\s+\.\!\/_,$%^*(+\"\']+|[+——！：》，《”。“？、~@#￥%……&*（）]+".decode("utf-8"), "".decode("utf-8"),string)
 36 |     string = string.encode('utf-8')
 37 |     string = string.translate(None,punctuation+digits+letters+whitespace)
 38 |     return string
 39 | 
 40 | def getJsonFile_line(json_fname):
 41 |     json_file = file(json_fname, "r")
 42 |     json_vector = []
 43 |     for line in json_file:
 44 |          person_info = json.loads(line)
 45 |          json_vector.append(person_info)
 46 |     return json_vector
 47 | def getJsonFile_all(json_fname):
 48 |     json_file = open(json_fname, "r")
 49 |     dict = json.load(json_file)
 50 |     json_file.close()
 51 |     return dict
 52 | 
 53 | 
 54 | 
 55 | 
 56 | db = client.holmesdb
 57 | t_company_info = db.t_company_info
 58 | t_company_info.remove()
 59 | t_bad_company_info = db.t_bad_company_info
 60 | t_bad_company_info.remove()
 61 | print "before load all company dadtas", datetime.datetime.now()
 62 | def getCompanyList():
 63 |     company_dict = {}
 64 |     company_f_in = csv.reader(file(u"data/platform_company/网贷之家.csv", "r"))
 65 |     lines = [line for line in company_f_in]
 66 |     for line in lines[1:]:
 67 |         company_dict.setdefault(line[1], 0)
 68 |     company_f_in = csv.reader(file(u"data/platform_company/融360.csv", "r"))
 69 |     lines = [line for line in company_f_in]
 70 |     for line in lines[1:]:
 71 |         company_dict.setdefault(line[0], 0)
 72 |     company_f_in = csv.reader(file(u"data/platform_company/百度财富.csv", "r"))
 73 |     lines = [line for line in company_f_in]
 74 |     for line in lines[1:]:
 75 |         company_dict.setdefault(line[1], 0)
 76 |     company_f_out = open("data/platform_company/company_list.txt", "w")
 77 |     for c in company_dict:
 78 |         company_f_out.write(c + "\n")
 79 | 
 80 |     all_data_dict = getJsonFile_all('./data/platform_company/wangdai_platform.json')
 81 | 
 82 |     #company_f_out = open(r"C:\Python27\Lib\site-packages\jieba-0.37-py2.7.egg\jieba/company_dict.txt", "w")
 83 |     company_f_out = open(r"./data/platform_company/company_list.txt", "w")
 84 |     for c in company_dict:
 85 |         #company_f_out.write(c + " 10000 n\n")
 86 |         company_f_out.write(c + "\n")
 87 |     return company_dict, all_data_dict
 88 | 
 89 | def getBadCompanyList():
 90 |     bad_company_dict = {}
 91 |     all_data_dict = getJsonFile_all('./data/bad_platform/problem_platform.json')
 92 |     return all_data_dict
 93 | 
 94 | company_dict, company_info_list = getCompanyList()
 95 | company_info_key = company_info_list[0].keys()
 96 | bad_company_info_list = getBadCompanyList()
 97 | for plat in company_info_list:
 98 |     t_company_info.insert(plat)
 99 |     if t_company_info.find_one(plat['_id']) == None:
100 |         plat['_id'] =  plat['platName']
101 | bad_company_2015 = []
102 | for plat in bad_company_info_list:
103 |     for company in company_info_list:
104 |         if plat['platform_name'] == company['platName']:
105 |             for key in company:
106 |                 if key != 'platName':
107 |                     plat.setdefault(key, company[key])
108 | 
109 |     plat['_id'] =  plat['platform_name']
110 |     if "online_time" in plat :
111 |         if plat["online_time"].strip().find("年") != -1:
112 |             plat["online_time"] = plat["online_time"].strip()[:4] + ".01"
113 |     if "problem_time" in plat:
114 |          if plat["problem_time"].strip().find("年") != -1:
115 |             plat["problem_time"] = plat["problem_time"].strip()[:4] + ".01"
116 |     if t_bad_company_info.find_one(plat['_id']) == None:
117 |         t_bad_company_info.insert(plat)
118 |     if plat['problem_time'][:4] == '2015':
119 |         bad_company_2015.append(plat)
120 | def writeJsonDict(person, f_out):
121 |     outStr = json.dumps(person, ensure_ascii = False, indent=1)        		 #处理完之后重新转为Json格式
122 |     f_out.write(outStr.encode('utf-8') + '\n')          			         #写回到一个新的Json文件中去
123 | writeJsonDict(bad_company_2015, open("./data/bad_platform/bad_platform_2015.json", "w"))
124 | print "end loads all company datas", datetime.datetime.now()
125 | 
126 | 
127 | 


--------------------------------------------------------------------------------
/others/后台2/process_analyze.py:
--------------------------------------------------------------------------------
 1 | #encoding=utf8
 2 | '''
 3 |     __author__ = 'Administrator'
 4 |     2016.2.27 20:30 first version
 5 |     分词、提取关键字、提取文章主题
 6 | '''
 7 | 
 8 | import csv
 9 | import json
10 | import time
11 | import re
12 | from string import punctuation,digits,letters,whitespace
13 | import sys
14 | import datetime
15 | 
16 | import jieba
17 | import jieba.analyse
18 | import math
19 | import pandas as pd
20 | from gensim import corpora,models
21 | from helper.textprocessing import handleContent
22 | from pymongo import MongoClient
23 | client=MongoClient()
24 | reload(sys)
25 | sys.setdefaultencoding('utf-8')
26 | 
27 | 
28 | 
29 | 
30 | 
31 | print "before load date", datetime.datetime.now()
32 | news_dataset = pd.read_pickle("./data/news_dataset.pkl")
33 | news_dataset_other = pd.read_pickle("./data/news_dataset_other.pkl")
34 | all_dataset = pd.concat([news_dataset, news_dataset_other])
35 | print "end load date", datetime.datetime.now()
36 | 
37 | 
38 | # 分词，关键字提取
39 | text_tags = []
40 | lda_train_set = []
41 | keyword_dict = {}
42 | day_cnt = {}
43 | 
44 | 
45 | for item_pub_time in all_dataset['item_pub_time']:
46 |     day_cnt[item_pub_time[:10]] = day_cnt.setdefault(item_pub_time[:10], 0) + 1
47 | day_cnt =  sorted(day_cnt.items(), lambda a, b: -cmp(a[0], b[0]))
48 | f_out = open("data/everyday_newscnt.txt", "w")
49 | for pp in day_cnt:
50 |     if len(pp[0]) >= 1 and str(pp[0][0]).isdigit() == True:
51 |         f_out.write("%s %s\n"%(pp[0], pp[1]))
52 | 
53 | print "before cut segments", time.localtime()
54 | for row_id, news in all_dataset.iterrows():
55 |     if row_id % 1000 == 999:
56 |         print row_id
57 |     content = news['content']
58 |     content = handleContent(content)
59 |     pub_time = news['item_pub_time']
60 |     content = content.replace(" ", "").replace("\n", "").replace("　", "")
61 |     seg = list(jieba.cut(content))
62 | 
63 |     lda_train_set.append(seg)
64 |     key_words = jieba.analyse.extract_tags(content, topK = 20)
65 |     for token in key_words:
66 |         keyword_dict[token] = keyword_dict.setdefault(token, 0) + 1
67 |     text_tags.append(key_words)
68 | 
69 | 
70 | 
71 | print "end cut segments", time.localtime()
72 | 
73 | 
74 | for keyword in key_words:
75 |     print keyword
76 | 
77 | 


--------------------------------------------------------------------------------
/others/后台2/sparser/hexun/ReadMe.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mindawei/p2p/b0771100b8d4740906a186d9668b7d662214409c/others/后台2/sparser/hexun/ReadMe.txt


--------------------------------------------------------------------------------
/others/后台2/sparser/p2pguancha_sparser.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | Created on Sat Feb 27 21:25:37 2016
 4 | 
 5 | @author: yue
 6 | """
 7 | 
 8 | import requests
 9 | import json
10 | import time
11 | 
12 | idd = '1991'
13 | while(True):
14 |     url = "http://www.p2pguancha.com/api.php?action=categorycontent&cid=11&id="+idd+"&num=10"
15 |     r = requests.get(url)
16 |     result = r.json()
17 |     for i in range(0,len(result['article'])):
18 |         print result['article'][i].keys()
19 |         save = {}
20 |         save['source '] = "P2P观察网"
21 |         save['item_id'] = "p2pgc_" + result['article'][i]['id'].encode('utf8')
22 |         save['item_type'] = "news"
23 |         save['author'] = "" if "author_id" in result['article'][i] else  result['article'][i]['author_id'].encode('utf8')
24 |         save['tags'] =  result['article'][i]['tag_name'].encode('utf8')
25 |         save['title'] = result['article'][i]['title'].encode('utf8')
26 |         save['content'] = result['article'][i]['content'].encode('utf8')
27 |         save['url'] = "http://www.p2pguancha.com/article/"+save['item_id']+".html"
28 |         save['source_name'] = result['article'][i]['source_name'].encode('utf8')
29 |         save['news_pub_time'] = result['article'][i]['release_time'].encode('utf8')
30 |         save['gmt_create'] = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
31 |         save_str = json.dumps(save,ensure_ascii=False)
32 |         f = open("p2pguancha_news.txt",'a')
33 |         f.write(save_str+'\n')
34 |         f.close()      
35 |         idd = save['item_id']
36 |         print idd


--------------------------------------------------------------------------------
/others/后台2/spider/__init__.py:
--------------------------------------------------------------------------------
1 | __author__ = 'Administrator'
2 | 


--------------------------------------------------------------------------------
/others/后台2/spider/extract_p2p_news.py:
--------------------------------------------------------------------------------
 1 | #encoding=utf8
 2 | '''
 3 |     __author__ = 'Administrator'
 4 |     2016.4.10 20:30 first version
 5 |     提取三大门户新闻网+财新网中的P2P资讯
 6 | '''
 7 | 
 8 | import csv
 9 | import json
10 | import time
11 | import re
12 | from string import punctuation,digits,letters,whitespace
13 | import sys
14 | import os
15 | import datetime
16 | from bson import ObjectId
17 | import jieba
18 | import jieba.analyse
19 | import pandas as pd
20 | import pymongo
21 | from pymongo import MongoClient
22 | client=MongoClient()
23 | reload(sys)
24 | sys.setdefaultencoding('utf-8')
25 | csv.field_size_limit(sys.maxint)
26 | 
27 | 
28 | p2p_key_list = [line.strip() for line in open("../data/key_list_hot.txt")]
29 | 
30 | columns = "item_id,item_type,source,url,author,title,content,item_pub_time,tags,cmt_cnt,fav_cnt,gmt_create,exinfo1,exinfo2".split(',')
31 | item_id_dict = {}
32 | writer = csv.writer(file("../data/news/news_other.csv", 'wb'))
33 | writer.writerow(columns)
34 | 
35 | news_cnt = 0
36 | news_other_dir = "../data/news/news_other"
37 | date_dict = {}
38 | month_dict = {}
39 | for cur,dirnames,filenames in os.walk(news_other_dir):    #三个参数：分别返回1.父目录 2.所有文件夹名字（不含路径） 3.所有文件名字
40 |     for f in os.listdir(cur):
41 |         print f
42 |         # if f.find("weixin") == -1:
43 |         #     continue
44 |         news_cnt = 0
45 |         f_in = csv.reader(file(news_other_dir + r"/" + f, "r"))
46 |         lines = [line for line in f_in]
47 |         print len(lines)
48 |         for line in lines[1:]:
49 |             if len(line) < 14:
50 |                 continue
51 |             if len(line) > 14:
52 |                 line = line[:14]
53 |             #content = line[6].replace("###r###", "\r").replace("###n###", "\n").replace("###t###", "\t")
54 |             content = line[6]
55 |             title = line[5]
56 | 
57 |             for key in p2p_key_list:
58 |                 if title.find(key) != -1 or content.find(key) != -1:
59 |                     #print title
60 |                     #print content
61 |                     writer.writerow(line)
62 |                     #if f == 'caixin.csv':
63 |                         #print key
64 |                         #print title, content
65 |                     news_cnt += 1
66 |                     dt = line[7]
67 |                     m = dt[:8]
68 |                     date_dict[dt] = date_dict.setdefault(dt, 0) + 1
69 |                     month_dict[m] = month_dict.setdefault(m, 0) + 1
70 |                     break
71 |         print news_cnt
72 | print date_dict
73 | print month_dict


--------------------------------------------------------------------------------
/others/后台2/spider/hujin_institute_process.py:
--------------------------------------------------------------------------------
 1 | #encoding=utf8
 2 | '''
 3 |     __author__ = 'Administrator'
 4 |     2016.4.16 19:50 first version
 5 |     处理互联网金融协会名单
 6 | '''
 7 | import csv
 8 | import os
 9 | import sys
10 | import bs4
11 | import datetime
12 | import requests, html2text
13 | reload(sys)
14 | sys.setdefaultencoding('utf-8')
15 | 
16 | f_in = open(u"../data/knowledge_graph/中国互联网金融协会会员名单.txt")
17 | f_out = open(u"../data/knowledge_graph/中国互联网金融协会会员名单_分开.txt", "w")
18 | f_relation_out = open(r"../data/knowledge_graph/relation_equal.txt", "w")
19 | 
20 | for line in f_in:
21 |     print line.strip()
22 |     if line.find( u"）") == -1:
23 |         f_out.write(line)
24 |     else:
25 |         line_rep = line.strip().replace(u"）", "#").replace(u"（", "$")
26 |         print "\t", line_rep
27 |         if line_rep[-1] == "#":
28 |             rev_index = line_rep.rfind("$")
29 |             rev_end_index = line_rep.rfind("#")
30 |             pname1 = line_rep[:rev_index]
31 |             pname2 = line_rep[rev_index+1:-1]
32 |             print "\t", pname1, pname2
33 |             pname1 = pname1.replace("#", u"）").replace("$", u"（")
34 |             f_out.write(pname1 + "\n")
35 |             f_out.write(pname2 + "\n")
36 |             f_relation_out.write("%s,%s"%(pname1, pname2))
37 |             f_relation_out.write("%s,%s"%(pname2, pname1))
38 |         else:
39 |             f_out.write(line)
40 | 
41 | 


--------------------------------------------------------------------------------
/others/后台2/spider/ifeng_extractNews.py:
--------------------------------------------------------------------------------
  1 | #encoding=utf8
  2 | '''
  3 |     __author__ = 'Administrator'
  4 |     2016.3.13 19:50 first version
  5 |     从ifeng网的html页面里提取结构化新闻数据
  6 | '''
  7 | import csv
  8 | import os
  9 | import sys
 10 | import bs4
 11 | import datetime
 12 | import requests, html2text
 13 | try:
 14 |     from bs4 import BeautifulSoup
 15 | except:
 16 |     import BeautifulSoup
 17 | reload(sys)
 18 | sys.setdefaultencoding('utf-8')
 19 | ifeng_dir = r"D:\LoalaSave\news.ifeng.com"
 20 | 
 21 | 
 22 | date_dict = {}
 23 | columns = "item_id,item_type,source,url,author,title,content,item_pub_time,tags,cmt_cnt,fav_cnt,gmt_create,exinfo1,exinfo2".split(',')
 24 | item_id_dict = {}
 25 | writer = csv.writer(file("../data/news/news_other/ifeng.csv", 'wb'))
 26 | writer.writerow(columns)
 27 | def extract_news(soup, news_cnt):
 28 |     try:
 29 |         metas = soup.find_all("meta")
 30 |         #print metas
 31 |         key_words = ""
 32 |         is_article = 0
 33 |         title = ""
 34 |         url = ""
 35 |         description = ""
 36 |         image_url = ""
 37 | 
 38 |         for meta in metas:
 39 |             if meta.has_attr("name") == True:
 40 |                 if meta["name"] == "keywords":
 41 |                     key_words = meta["content"]
 42 |                 if meta["name"] == "og:time":
 43 |                     #print meta["content"]
 44 |                     item_pub_time = meta["content"].replace("年", "-").replace("月", "-").replace("日", "").split(" ")[0]
 45 |                     # print item_pub_time[:10]
 46 |                     date_dict[item_pub_time[:10]] = date_dict.setdefault(item_pub_time[:10], 0) +1
 47 |                 if meta["content"] == "news":
 48 |                     is_article = 1
 49 |             if meta.has_attr("property") == True:
 50 |                 if meta["property"] == "og:title":
 51 |                     title = meta["content"]
 52 |                 if meta["property"] == "og:url":
 53 |                     url = meta["content"]
 54 |                 if meta["property"] == "og:description":
 55 |                     description = meta["content"]
 56 |         # print is_article
 57 |         # print item_pub_time
 58 |         # print title
 59 |         # print key_words
 60 |         # print description
 61 |         # print url
 62 |         # print ""
 63 |         if is_article == 0:
 64 |             return -1
 65 |         item_id = "ifeng-" + str(news_cnt)
 66 |         content = ""
 67 |         content_div = soup.find(id="main_content")
 68 |         #print content_div
 69 |         p_list = content_div.find_all("p")
 70 |         #print p_list
 71 |         #print p_list
 72 |         for i in xrange(len(p_list)):
 73 |             p = ""
 74 |             for e in  p_list[i].contents:
 75 |                 try:
 76 |                     p += e.string
 77 |                 except Exception:
 78 |                     continue
 79 |             content += p + "\n"
 80 |         #print content
 81 |         content = content.replace("\n", "###n###")
 82 |         content = content.replace("\r", "###r###")
 83 |         gmt_create = datetime.datetime.strftime(datetime.datetime.now(), "%Y-%m-%d %M:%S")
 84 |         cmt_cnt = 0
 85 |         fav_cnt = 0
 86 |         source = u"凤凰网"
 87 | 
 88 | 
 89 |         # print item_id
 90 |         item_type = "news"
 91 |         tags = key_words.replace(" ", ",")
 92 |         #print tags
 93 |         # print gmt_create
 94 |         # print content
 95 |         # print item_pub_time
 96 |         exinfo1 = ""
 97 |         exinfo2 = ""
 98 |         if image_url != "":
 99 |             exinfo2 = "image_url:" + image_url
100 | 
101 |         result = {}
102 |         result['url'] = url
103 |         result['item_id'] = item_id
104 |         result['item_type'] = item_type
105 |         result['author'] = 'ifeng_jizhe'
106 |         result['source'] = source
107 |         result['title'] = title
108 |         result['content'] = content
109 |         result['item_pub_time'] = item_pub_time
110 |         result['tags'] = tags
111 |         result['cmt_cnt'] = cmt_cnt
112 |         result['fav_cnt'] = fav_cnt
113 |         result['exinfo1'] = exinfo1
114 |         result['exinfo2'] = exinfo2
115 |         result['gmt_create'] = gmt_create
116 | 
117 |         line = []
118 |         for col in columns:
119 |             if col not in result:
120 |                 line.append('')
121 |             else:
122 |                 line.append(str(result[col]).encode('utf-8'))
123 |         writer.writerow(line)
124 |     except Exception, e:
125 |         return -1
126 |     return 0
127 | 
128 | 
129 | news_cnt = 0
130 | for cur,dirnames,filenames in os.walk(ifeng_dir):    #三个参数：分别返回1.父目录 2.所有文件夹名字（不含路径） 3.所有文件名字
131 |     for f in os.listdir(cur):
132 |         print "#", f
133 | #        f = '1048'
134 |         try:
135 |             f_path = os.path.join(cur, f)
136 |             soup = BeautifulSoup(open(f_path))
137 |             if soup == None or soup.find("title") == None:
138 |                 continue
139 |             title =  soup.find("title").string
140 |             flag = extract_news(soup, news_cnt)
141 |             if flag == 0:
142 |                 news_cnt += 1
143 |                 print news_cnt
144 |                 if news_cnt % 1000 == 1:
145 |                     print news_cnt
146 |         except Exception, e:
147 |             print e
148 |             continue
149 | 
150 |     #break
151 | print news_cnt
152 | print len(item_id_dict)


--------------------------------------------------------------------------------
/others/后台2/spider/jpm_extractNews.py:
--------------------------------------------------------------------------------
  1 | #encoding=utf8
  2 | '''
  3 |     __author__ = 'Administrator'
  4 |     2016.3.13 19:50 first version
  5 |     从caixin网的html页面里提取结构化新闻数据
  6 | '''
  7 | import csv
  8 | import os
  9 | import sys
 10 | import bs4
 11 | import datetime
 12 | import requests, html2text
 13 | try:
 14 |     from bs4 import BeautifulSoup
 15 | except:
 16 |     import BeautifulSoup
 17 | reload(sys)
 18 | sys.setdefaultencoding('utf-8')
 19 | zhongshen_dir = r"C:\Users\Administrator\Desktop\Working Folder\Holmes\data\news\jpm"
 20 | 
 21 | 
 22 | 
 23 | columns = "item_id,item_type,source,url,author,title,content,item_pub_time,tags,cmt_cnt,fav_cnt,gmt_create,exinfo1,exinfo2".split(',')
 24 | item_id_dict = {}
 25 | writer = csv.writer(file("../data/news/jpm.csv", 'wb'))
 26 | writer.writerow(columns)
 27 | def extract_news(soup, news_cnt):
 28 |     try:
 29 |         infomain = soup.find(class_="content2")
 30 |         #print infomain
 31 |         key_words = ""
 32 |         is_article = 0
 33 |         title = infomain.find("h1").string.strip()
 34 |         #print title
 35 |         url = ""
 36 |         description = ""
 37 |         image_url = ""
 38 |         info1 = infomain.find(class_="writer")
 39 |         item_pub_time =  info1.find_all("span")[3].string.split(" ")[0]
 40 |         #print item_pub_time
 41 |         p_list = infomain.find_all("p")
 42 |         #print p_list
 43 |         content = ""
 44 |         for i in xrange(len(p_list)):
 45 |             p = ""
 46 |             for e in  p_list[i].contents:
 47 |                 try:
 48 |                     p += e.string
 49 |                 except Exception:
 50 |                     continue
 51 |             content += p + "\n"
 52 |         content = content.replace("\n", "###n###")
 53 |         content = content.replace("\r", "###r###")
 54 |         # print content
 55 |         item_id = "jpm-" + str(news_cnt)
 56 |         # print item_id
 57 |         if item_id not in item_id_dict:
 58 |             item_id_dict.setdefault(item_id, 0)
 59 |         else:
 60 |             return
 61 | 
 62 |         gmt_create = datetime.datetime.strftime(datetime.datetime.now(), "%Y-%m-%d %M:%S")
 63 |         cmt_cnt = 0
 64 |         fav_cnt = 0
 65 |         source = u"金评媒"
 66 | 
 67 | 
 68 |         # print item_id
 69 |         item_type = "news"
 70 |         tags = key_words.replace(" ", ",")
 71 |         exinfo1 = ""
 72 |         exinfo2 = ""
 73 |         if image_url != "":
 74 |             exinfo2 = "image_url:" + image_url
 75 | 
 76 |         result = {}
 77 |         result['url'] = url
 78 |         result['item_id'] = item_id
 79 |         result['item_type'] = item_type
 80 |         result['author'] = 'jpm_jizhe'
 81 |         result['source'] = source
 82 |         result['title'] = title
 83 |         result['content'] = content
 84 |         result['item_pub_time'] = item_pub_time
 85 |         result['tags'] = tags
 86 |         result['cmt_cnt'] = cmt_cnt
 87 |         result['fav_cnt'] = fav_cnt
 88 |         result['exinfo1'] = exinfo1
 89 |         result['exinfo2'] = exinfo2
 90 |         result['gmt_create'] = gmt_create
 91 | 
 92 |         line = []
 93 |         for col in columns:
 94 |             if col not in result:
 95 |                 line.append('')
 96 |             else:
 97 |                 line.append(str(result[col]).encode('utf-8'))
 98 |         writer.writerow(line)
 99 |     except Exception, e:
100 |        return -1
101 |     return 0
102 | 
103 | 
104 | news_cnt = 0
105 | for cur,dirnames,filenames in os.walk(zhongshen_dir):    #三个参数：分别返回1.父目录 2.所有文件夹名字（不含路径） 3.所有文件名字
106 |     for f in os.listdir(cur):
107 |         try:
108 |             f_path = os.path.join(cur, f)
109 |             content = open(f_path, "r").read()
110 |             #print content
111 |             soup = BeautifulSoup(content)
112 |             if soup == None or soup.find("title") == None:
113 |                 continue
114 |             title =  soup.find("title").string
115 | 
116 |             flag = extract_news(soup, news_cnt)
117 |             if flag == 0:
118 |                 news_cnt += 1
119 |                 print title
120 |                 if news_cnt % 1000 == 1:
121 |                     print news_cnt
122 |         except Exception, e:
123 |             print e
124 |             continue
125 | 
126 |     #break
127 | print news_cnt
128 | print len(item_id_dict)


--------------------------------------------------------------------------------
/others/后台2/spider/process_wdzjdata.py:
--------------------------------------------------------------------------------
 1 | #encoding=utf8
 2 | '''
 3 |     __author__ = 'Administrator'
 4 |     2016.3.6 16:26 first version
 5 |     处理从网贷之家爬取来的数据，tab改‘,’，并附加dt字段
 6 | '''
 7 | 
 8 | 
 9 | import sys
10 | reload(sys)
11 | sys.setdefaultencoding('utf-8')
12 | import time
13 | import datetime
14 | 
15 | d1 = '2016-03-15'
16 | f_in_area = open("../data/platform_company/platform_areas.txt")
17 | f_out_area = open("../data/platform_company/platform_areas_our.csv", "w")
18 | area_data = [line[:-1] for line in f_in_area]
19 | columns = area_data[0]
20 | f_out_area.write(columns + ",dt\n")
21 | for line in area_data[1:]:
22 |     if len(line) <= 2:
23 |         d1 =  datetime.datetime.strptime(d1, "%Y-%m-%d")
24 |         d1 = d1 - datetime.timedelta(days=31)
25 |         d1 = datetime.datetime.strftime(d1, "%Y-%m-%d")
26 |     else:
27 |         f_out_area.write(line + "," + d1[:7] + "-01" +"\n")
28 | 
29 | 
30 | 
31 | d1 = '2016-03-15'
32 | f_in_area = open("../data/platform_company/platform_class.txt")
33 | f_out_area = open("../data/platform_company/platform_class_our.csv", "w")
34 | area_data = [line[:-1] for line in f_in_area]
35 | columns = area_data[0]
36 | f_out_area.write(columns + ",dt\n")
37 | for line in area_data[1:]:
38 |     if len(line) <= 2:
39 |         d1 =  datetime.datetime.strptime(d1, "%Y-%m-%d")
40 |         d1 = d1 - datetime.timedelta(days=31)
41 |         d1 = datetime.datetime.strftime(d1, "%Y-%m-%d")
42 |     else:
43 |         f_out_area.write(line + "," + d1[:7] + "-01" +"\n")


--------------------------------------------------------------------------------
/others/后台2/spider/sina_extractNews.py:
--------------------------------------------------------------------------------
  1 | #encoding=utf8
  2 | '''
  3 |     __author__ = 'Administrator'
  4 |     2016.3.13 19:50 first version
  5 |     从sina网的html页面里提取结构化新闻数据
  6 | '''
  7 | import csv
  8 | import os
  9 | import sys
 10 | import bs4
 11 | import datetime
 12 | import requests, html2text
 13 | try:
 14 |     from bs4 import BeautifulSoup
 15 | except:
 16 |     import BeautifulSoup
 17 | reload(sys)
 18 | sys.setdefaultencoding('utf-8')
 19 | sina_dir = r"F:\LoalaSave\finance.sina.com.cn"
 20 | 
 21 | 
 22 | 
 23 | columns = "item_id,item_type,source,url,author,title,content,item_pub_time,tags,cmt_cnt,fav_cnt,gmt_create,exinfo1,exinfo2".split(',')
 24 | item_id_dict = {}
 25 | writer = csv.writer(file("../data/news/news_other/sina.csv", 'wb'))
 26 | writer.writerow(columns)
 27 | def extract_news(soup, news_cnt):
 28 |     try:
 29 |         metas = soup.find_all("meta")
 30 |         #print metas
 31 |         key_words = ""
 32 |         is_article = 0
 33 |         title = ""
 34 |         url = ""
 35 |         description = ""
 36 |         image_url = ""
 37 | 
 38 |         for meta in metas:
 39 |             if meta.has_attr("name") == True:
 40 |                 if meta["name"] == "keywords":
 41 |                     key_words = meta["content"]
 42 |                 if meta["name"] == "weibo: article:create_at":
 43 |                     # print meta["content"]
 44 |                     item_pub_time = meta["content"].split(" ")[0]
 45 |                     # print item_pub_time
 46 |             if meta.has_attr("property") == True:
 47 |                 if meta["content"] == "news":
 48 |                     is_article = 1
 49 |                 if meta["property"] == "og:title":
 50 |                     title = meta["content"]
 51 |                 if meta["property"] == "og:url":
 52 |                     url = meta["content"]
 53 |                 if meta["property"] == "og:description":
 54 |                     description = meta["content"]
 55 | 
 56 |         # print title
 57 |         # print key_words
 58 |         # print description
 59 |         # print url
 60 |         # print ""
 61 |         if is_article == 0:
 62 |             return -1
 63 |         item_id = "sina-" + str(news_cnt)
 64 |         content = ""
 65 |         #share BSHARE_POP blkContainerSblkCon clearfix blkContainerSblkCon_14
 66 |         content_div = soup.find(name="div", id="artibody")
 67 |         #print content_div
 68 |         p_list = content_div.find_all("p")
 69 |         #print p_list
 70 |         for i in xrange(len(p_list)):
 71 |             p = ""
 72 |             for e in  p_list[i].contents:
 73 |                 try:
 74 |                     p += e.string
 75 |                 except Exception:
 76 |                     continue
 77 |             content += p + "\n"
 78 |         content = content.replace("\n", "###n###")
 79 |         content = content.replace("\r", "###r###")
 80 |         gmt_create = datetime.datetime.strftime(datetime.datetime.now(), "%Y-%m-%d %M:%S")
 81 |         cmt_cnt = 0
 82 |         fav_cnt = 0
 83 |         source = u"新浪财经"
 84 | 
 85 | 
 86 |         # print item_id
 87 |         item_type = "news"
 88 |         tags = key_words.replace(" ", ",")
 89 |         #print tags
 90 |         # print gmt_create
 91 |         # print content
 92 |         # print item_pub_time
 93 |         exinfo1 = ""
 94 |         exinfo2 = ""
 95 |         if image_url != "":
 96 |             exinfo2 = "image_url:" + image_url
 97 | 
 98 |         result = {}
 99 |         result['url'] = url
100 |         result['item_id'] = item_id
101 |         result['item_type'] = item_type
102 |         result['author'] = 'sina_jizhe'
103 |         result['source'] = source
104 |         result['title'] = title
105 |         result['content'] = content
106 |         result['item_pub_time'] = item_pub_time
107 |         result['tags'] = tags
108 |         result['cmt_cnt'] = cmt_cnt
109 |         result['fav_cnt'] = fav_cnt
110 |         result['exinfo1'] = exinfo1
111 |         result['exinfo2'] = exinfo2
112 |         result['gmt_create'] = gmt_create
113 | 
114 |         line = []
115 |         for col in columns:
116 |             if col not in result:
117 |                 line.append('')
118 |             else:
119 |                 line.append(str(result[col]).encode('utf-8'))
120 |         writer.writerow(line)
121 |     except Exception, e:
122 |         return -1
123 |     return 0
124 | 
125 | 
126 | news_cnt = 0
127 | for cur,dirnames,filenames in os.walk(sina_dir):    #三个参数：分别返回1.父目录 2.所有文件夹名字（不含路径） 3.所有文件名字
128 |     for f in os.listdir(cur):
129 |         print "#", f
130 |         try:
131 |             f_path = os.path.join(cur, f)
132 |             soup = BeautifulSoup(open(f_path))
133 |             if soup == None or soup.find("title") == None:
134 |                 continue
135 |             title =  soup.find("title").string
136 |             ##初步过滤
137 |             if len(title.split("_")) < 3 \
138 |                     or title.split("_")[2] != "新浪网":
139 |                 continue
140 |             # print f_path
141 |             # print title
142 |             flag = extract_news(soup, news_cnt)
143 |             if flag == 0:
144 |                 news_cnt += 1
145 |                 print news_cnt
146 |                 #print title
147 |             if news_cnt % 1000 == 1:
148 |                 print news_cnt
149 |         except Exception, e:
150 |             print e
151 |             continue
152 | 
153 |     #break
154 | print news_cnt
155 | print len(item_id_dict)


--------------------------------------------------------------------------------
/others/后台2/spider/weixin_extractNews.py:
--------------------------------------------------------------------------------
  1 | #encoding=utf8
  2 | '''
  3 |     __author__ = 'Administrator'
  4 |     2016.3.13 19:50 first version
  5 |     从wy163网的html页面里提取结构化新闻数据
  6 | '''
  7 | import csv
  8 | import os
  9 | import sys
 10 | import bs4
 11 | import datetime
 12 | import requests, html2text
 13 | try:
 14 |     from bs4 import BeautifulSoup
 15 | except:
 16 |     import BeautifulSoup
 17 | reload(sys)
 18 | sys.setdefaultencoding('utf-8')
 19 | wy163_dir = r"..\data\news\weixin"
 20 | 
 21 | 
 22 | 
 23 | columns = "item_id,item_type,source,url,author,title,content,item_pub_time,tags,cmt_cnt,fav_cnt,gmt_create,exinfo1,exinfo2".split(',')
 24 | item_id_dict = {}
 25 | writer = csv.writer(file("../data/news/news_other/weixin.csv", 'wb'))
 26 | writer.writerow(columns)
 27 | def extract_news_txt(soup, news_cnt):
 28 |     try:
 29 |         is_article = 1
 30 |         title = soup[0].strip()
 31 |         #print title
 32 |         item_pub_time = soup[2].strip()
 33 |         #print item_pub_time
 34 |         content = soup[3].strip()
 35 |         #print metas
 36 |         key_words = ""
 37 | 
 38 |         url = ""
 39 |         description = ""
 40 |         image_url = ""
 41 | 
 42 | 
 43 |         # print is_article
 44 |         # print item_pub_time
 45 |         # print title
 46 |         # print key_words
 47 |         # print description
 48 |         # print url
 49 |         # print ""
 50 |         if is_article == 0:
 51 |             return -1
 52 |         item_id = "weixin-" + str(news_cnt)
 53 |         content = content.replace("\n", "###n###")
 54 |         content = content.replace("\r", "###r###")
 55 |         content = content.replace("#n#", "###n###")
 56 |         content = content.replace("#r#", "###r###")
 57 |         gmt_create = datetime.datetime.strftime(datetime.datetime.now(), "%Y-%m-%d %M:%S")
 58 |         cmt_cnt = 0
 59 |         fav_cnt = 0
 60 |         source = u"微信"
 61 | 
 62 | 
 63 |         # print item_id
 64 |         item_type = "news"
 65 |         tags = key_words.replace(" ", ",")
 66 |         #print tags
 67 |         # print gmt_create
 68 |         # print content
 69 |         # print item_pub_time
 70 |         exinfo1 = ""
 71 |         exinfo2 = ""
 72 |         if image_url != "":
 73 |             exinfo2 = "image_url:" + image_url
 74 | 
 75 |         result = {}
 76 |         result['url'] = url
 77 |         result['item_id'] = item_id
 78 |         result['item_type'] = item_type
 79 |         result['author'] = soup[1].strip()
 80 |         result['source'] = source
 81 |         result['title'] = title
 82 |         result['content'] = content
 83 |         result['item_pub_time'] = item_pub_time
 84 |         result['tags'] = tags
 85 |         result['cmt_cnt'] = cmt_cnt
 86 |         result['fav_cnt'] = fav_cnt
 87 |         result['exinfo1'] = exinfo1
 88 |         result['exinfo2'] = exinfo2
 89 |         result['gmt_create'] = gmt_create
 90 | 
 91 |         line = []
 92 |         for col in columns:
 93 |             if col not in result:
 94 |                 line.append('')
 95 |             else:
 96 |                 line.append(str(result[col]).encode('utf-8'))
 97 |         writer.writerow(line)
 98 |     except Exception, e:
 99 |         return -1
100 |     return 0
101 | 
102 | 
103 | news_cnt = 0
104 | for cur,dirnames,filenames in os.walk(wy163_dir):    #三个参数：分别返回1.父目录 2.所有文件夹名字（不含路径） 3.所有文件名字
105 |     for f in os.listdir(cur):
106 |         try:
107 |             f_path = os.path.join(cur, f)
108 |             soup = [line for line in open(f_path)]
109 |             flag = extract_news_txt(soup, news_cnt)
110 |             if flag == 0:
111 |                 news_cnt += 1
112 |                 print news_cnt
113 |                 if news_cnt % 1000 == 1:
114 |                     print news_cnt
115 |         except Exception, e:
116 |             print e
117 |             continue
118 | 
119 |     #break
120 | print news_cnt
121 | 


--------------------------------------------------------------------------------
/others/后台2/spider/wy163_extractNews.py:
--------------------------------------------------------------------------------
  1 | #encoding=utf8
  2 | '''
  3 |     __author__ = 'Administrator'
  4 |     2016.3.13 19:50 first version
  5 |     从wy163网的html页面里提取结构化新闻数据
  6 | '''
  7 | import csv
  8 | import os
  9 | import sys
 10 | import bs4
 11 | import datetime
 12 | import requests, html2text
 13 | try:
 14 |     from bs4 import BeautifulSoup
 15 | except:
 16 |     import BeautifulSoup
 17 | reload(sys)
 18 | sys.setdefaultencoding('utf-8')
 19 | wy163_dir = r"F:\LoalaSave\money.163.com"
 20 | 
 21 | 
 22 | 
 23 | columns = "item_id,item_type,source,url,author,title,content,item_pub_time,tags,cmt_cnt,fav_cnt,gmt_create,exinfo1,exinfo2".split(',')
 24 | item_id_dict = {}
 25 | writer = csv.writer(file("../data/news/news_other/wy163.csv", 'wb'))
 26 | writer.writerow(columns)
 27 | def extract_news(soup, news_cnt):
 28 |     try:
 29 |         main_div = soup.find(class_="post_content_main")
 30 |         if main_div != None:
 31 |             is_article = 1
 32 |         else:
 33 |             is_article = 0
 34 |         #print main_div
 35 |         title = main_div.find("h1").string
 36 |         #print title
 37 |         item_pub_time = main_div.find(class_="post_time_source").contents[0].strip().split(" ")[0]
 38 |         #print item_pub_time
 39 |         content_div = main_div.find(class_="post_text")
 40 |         #print content_div
 41 |         #print metas
 42 |         key_words = ""
 43 | 
 44 |         url = ""
 45 |         description = ""
 46 |         image_url = ""
 47 | 
 48 | 
 49 |         # print is_article
 50 |         # print item_pub_time
 51 |         # print title
 52 |         # print key_words
 53 |         # print description
 54 |         # print url
 55 |         # print ""
 56 |         if is_article == 0:
 57 |             return -1
 58 |         item_id = "wy163-" + str(news_cnt)
 59 |         content = ""
 60 |         #print content_div
 61 |         p_list = content_div.find_all("p")
 62 |         #print p_list
 63 |         for i in xrange(len(p_list)):
 64 |             p = ""
 65 |             for e in  p_list[i].contents:
 66 |                 try:
 67 |                     p += e.string
 68 |                 except Exception:
 69 |                     continue
 70 |             content += p + "\n"
 71 |         #print content
 72 |         content = content.replace("\n", "###n###")
 73 |         content = content.replace("\r", "###r###")
 74 |         gmt_create = datetime.datetime.strftime(datetime.datetime.now(), "%Y-%m-%d %M:%S")
 75 |         cmt_cnt = 0
 76 |         fav_cnt = 0
 77 |         source = u"网易财经"
 78 | 
 79 | 
 80 |         # print item_id
 81 |         item_type = "news"
 82 |         tags = key_words.replace(" ", ",")
 83 |         #print tags
 84 |         # print gmt_create
 85 |         # print content
 86 |         # print item_pub_time
 87 |         exinfo1 = ""
 88 |         exinfo2 = ""
 89 |         if image_url != "":
 90 |             exinfo2 = "image_url:" + image_url
 91 | 
 92 |         result = {}
 93 |         result['url'] = url
 94 |         result['item_id'] = item_id
 95 |         result['item_type'] = item_type
 96 |         result['author'] = 'wy163_jizhe'
 97 |         result['source'] = source
 98 |         result['title'] = title
 99 |         result['content'] = content
100 |         result['item_pub_time'] = item_pub_time
101 |         result['tags'] = tags
102 |         result['cmt_cnt'] = cmt_cnt
103 |         result['fav_cnt'] = fav_cnt
104 |         result['exinfo1'] = exinfo1
105 |         result['exinfo2'] = exinfo2
106 |         result['gmt_create'] = gmt_create
107 | 
108 |         line = []
109 |         for col in columns:
110 |             if col not in result:
111 |                 line.append('')
112 |             else:
113 |                 line.append(str(result[col]).encode('utf-8'))
114 |         writer.writerow(line)
115 |     except Exception, e:
116 |         return -1
117 |     return 0
118 | 
119 | 
120 | news_cnt = 0
121 | for cur,dirnames,filenames in os.walk(wy163_dir):    #三个参数：分别返回1.父目录 2.所有文件夹名字（不含路径） 3.所有文件名字
122 |     for f in os.listdir(cur):
123 |         print "#", f
124 |         try:
125 |             f_path = os.path.join(cur, f)
126 |             soup = BeautifulSoup(open(f_path))
127 |             if soup == None or soup.find("title") == None:
128 |                 continue
129 |             title =  soup.find("title").string
130 |             flag = extract_news(soup, news_cnt)
131 |             if flag == 0:
132 |                 news_cnt += 1
133 |                 print news_cnt
134 |                 if news_cnt % 1000 == 1:
135 |                     print news_cnt
136 |         except Exception, e:
137 |             print e
138 |             continue
139 | 
140 |     #break
141 | print news_cnt
142 | print len(item_id_dict)


--------------------------------------------------------------------------------
/others/后台2/spider/zhongshen_extractNews.py:
--------------------------------------------------------------------------------
  1 | #encoding=utf8
  2 | '''
  3 |     __author__ = 'Administrator'
  4 |     2016.3.13 19:50 first version
  5 |     从caixin网的html页面里提取结构化新闻数据
  6 | '''
  7 | import csv
  8 | import os
  9 | import sys
 10 | import bs4
 11 | import datetime
 12 | import requests, html2text
 13 | try:
 14 |     from bs4 import BeautifulSoup
 15 | except:
 16 |     import BeautifulSoup
 17 | reload(sys)
 18 | sys.setdefaultencoding('utf-8')
 19 | zhongshen_dir = r"C:\Users\Administrator\Desktop\Working Folder\Holmes\data\news\zhongshen"
 20 | 
 21 | 
 22 | 
 23 | columns = "item_id,item_type,source,url,author,title,content,item_pub_time,tags,cmt_cnt,fav_cnt,gmt_create,exinfo1,exinfo2".split(',')
 24 | item_id_dict = {}
 25 | writer = csv.writer(file("../data/news/zhongshen.csv", 'wb'))
 26 | writer.writerow(columns)
 27 | def extract_news(soup, news_cnt):
 28 |     try:
 29 |         infomain = soup.find(class_="InfoMain")
 30 |         key_words = ""
 31 |         is_article = 0
 32 |         title = infomain.find("h1").string
 33 |         #print title
 34 |         url = ""
 35 |         description = ""
 36 |         image_url = ""
 37 |         info1 = infomain.find(class_="info1")
 38 |         item_pub_time =  info1.find_all("span")[0].string.split(" ")[0]
 39 |         #print item_pub_time
 40 |         p_list = infomain.find(id="hiddenContent").find_all("p")
 41 |         content = ""
 42 |         for i in xrange(len(p_list)):
 43 |             if p_list[i].find("span") != None and p_list[i].find("span").string != None:
 44 |                 content += p_list[i].find("span").string + "\n"
 45 |         content = content.replace("\n", "###n###")
 46 |         content = content.replace("\r", "###r###")
 47 |         item_id = "zhongshen-" + str(news_cnt)
 48 |         # print item_id
 49 |         if item_id not in item_id_dict:
 50 |             item_id_dict.setdefault(item_id, 0)
 51 |         else:
 52 |             return
 53 | 
 54 |         gmt_create = datetime.datetime.strftime(datetime.datetime.now(), "%Y-%m-%d %M:%S")
 55 |         cmt_cnt = 0
 56 |         fav_cnt = 0
 57 |         source = u"zhongshen"
 58 | 
 59 | 
 60 |         # print item_id
 61 |         item_type = "news"
 62 |         tags = key_words.replace(" ", ",")
 63 |         exinfo1 = ""
 64 |         exinfo2 = ""
 65 |         if image_url != "":
 66 |             exinfo2 = "image_url:" + image_url
 67 | 
 68 |         result = {}
 69 |         result['url'] = url
 70 |         result['item_id'] = item_id
 71 |         result['item_type'] = item_type
 72 |         result['author'] = 'caixin_jizhe'
 73 |         result['source'] = source
 74 |         result['title'] = title
 75 |         result['content'] = content
 76 |         result['item_pub_time'] = item_pub_time
 77 |         result['tags'] = tags
 78 |         result['cmt_cnt'] = cmt_cnt
 79 |         result['fav_cnt'] = fav_cnt
 80 |         result['exinfo1'] = exinfo1
 81 |         result['exinfo2'] = exinfo2
 82 |         result['gmt_create'] = gmt_create
 83 | 
 84 |         line = []
 85 |         for col in columns:
 86 |             if col not in result:
 87 |                 line.append('')
 88 |             else:
 89 |                 line.append(str(result[col]).encode('utf-8'))
 90 |         writer.writerow(line)
 91 |     except Exception, e:
 92 |        return -1
 93 |     return 0
 94 | 
 95 | 
 96 | news_cnt = 0
 97 | for cur,dirnames,filenames in os.walk(zhongshen_dir):    #三个参数：分别返回1.父目录 2.所有文件夹名字（不含路径） 3.所有文件名字
 98 |     for f in os.listdir(cur):
 99 |         try:
100 |             f_path = os.path.join(cur, f)
101 |             content = open(f_path, "r").read()
102 |             #print content
103 |             if content.find("AjaxPage_Click_NEWSID") == -1:
104 |                 continue
105 |             soup = BeautifulSoup(content)
106 |             if soup == None or soup.find("title") == None:
107 |                 continue
108 |             title =  soup.find("title").string
109 | 
110 |             ##初步过滤
111 |             if len(title.split("-")) < 2:
112 |                 continue
113 |             # print f_path
114 |             # print title
115 |             flag = extract_news(soup, news_cnt)
116 |             if flag == 0:
117 |                 news_cnt += 1
118 |                 print title
119 |                 if news_cnt % 1000 == 1:
120 |                     print news_cnt
121 |         except Exception, e:
122 |             print e
123 |             continue
124 | 
125 |     #break
126 | print news_cnt
127 | print len(item_id_dict)


--------------------------------------------------------------------------------
/others/后台2/summary_analyze.py:
--------------------------------------------------------------------------------
 1 | #encoding=utf8
 2 | '''
 3 |     __author__ = 'Administrator'
 4 |     2016.3.6 18:26 first version
 5 |     舆情大盘数据汇总，并存入数据库holmesdb
 6 | '''
 7 | 
 8 | import csv
 9 | import json
10 | import time
11 | import re
12 | from string import punctuation,digits,letters,whitespace
13 | import sys
14 | import datetime
15 | 
16 | import jieba
17 | import jieba.analyse
18 | import pandas as pd
19 | from gensim import corpora,models
20 | from helper.textprocessing import handleContent
21 | from pymongo import MongoClient
22 | client=MongoClient()
23 | reload(sys)
24 | sys.setdefaultencoding('utf-8')
25 | jieba.load_userdict("C:/Python27/Lib/site-packages/jieba-0.37-py2.7.egg/jieba/financedict.txt")
26 | 
27 | 
28 | 
29 | db = client.holmesdb
30 | t_news = db.t_news_di
31 | t_policy = db.t_policy_di
32 | t_ugc = db.t_ugc_di
33 | t_expert = db.t_expert_opinion_di
34 | t_news_caixin = db.t_news_caixin_di
35 | 
36 | news_res = t_news.find()
37 | policy_res = t_policy.find()
38 | ugc_res = t_ugc.find()
39 | expert_res = t_expert.find()
40 | article_res = [news_res, policy_res, ugc_res, expert_res]
41 | key = ["news", "policy", "ugc", "expert"]
42 | month_summary = {}
43 | month12_day_summary = {}
44 | source_summary = {}
45 | for i in xrange(4):
46 |     for res in article_res[i]:
47 |         if res['item_pub_time'] >= '2015-01-01' and res['item_pub_time'] <= '2015-12-31':
48 |             title = res['title']
49 |             content = res['content']
50 |             t = res['item_pub_time']
51 |             m = t[5:7]
52 |             date = t[5:].replace("-", ".").split(" ")[0]
53 |             #print m, date
54 |             month_summary[m][key[i]] = month_summary.setdefault(m , {}).setdefault(key[i], 0) + 1
55 |             if date >= '12.01' and date <= '12.31':
56 |                 month12_day_summary[date][key[i]] = month12_day_summary.setdefault(date , {}).setdefault(key[i], 0) + 1
57 |                 source = res['source']
58 |                 if source == "和讯P2P政策":
59 |                     source = "和讯P2P"
60 |                 if source == 'zhongshen':
61 |                     source = '中申网'
62 |                 source_summary[source] = source_summary.setdefault(source, 0) + 1
63 | 
64 | print month_summary
65 | print month12_day_summary
66 | for source in source_summary:
67 |     print source, source_summary[source]
68 | 
69 | def writeJsonDict(person, f_out):
70 |     outStr = json.dumps(person, ensure_ascii = False)        		#处理完之后重新转为Json格式
71 |     f_out.write(outStr.encode('utf-8') + '\n')          			#写回到一个新的Json文件中去
72 | 
73 | print "begin save datas", datetime.datetime.now()
74 | writeJsonDict(month_summary, open("./data/summary/month_summary.json", "w"))
75 | writeJsonDict(month12_day_summary, open("./data/summary/month12_day_summary.json", "w"))
76 | writeJsonDict(source_summary, open("./data/summary/source_summary.json", "w"))
77 | 
78 | pos_weight = [0.37, 0.43, 0.42, 0.45, 0.43, 0.44, 0.42, 0.4, 0.39, 0.387,\
79 |     0.38, 0.378, 0.376, 0.365, 0.33, 0.274, 0.25, 0.26, 0.35, 0.42,\
80 |     0.41, 0.47, 0.43, 0.46, 0.47, 0.43, 0.44, 0.45, 0.463, 0.456, \
81 |     0.465
82 |     ]
83 | sa_month12_day_summary = {}
84 | for dt in month12_day_summary:
85 |     cnt = month12_day_summary[dt]["ugc"]
86 |     cnt_pos = int(cnt * pos_weight[int(dt[3:])-1])
87 |     cnt_nag = int(cnt - cnt_pos)
88 |     sa_month12_day_summary.setdefault(dt, {"pos":cnt_pos, "nag":cnt_nag})
89 | writeJsonDict(sa_month12_day_summary, open("./data/summary/sa_month12_day_summary.json", "w"))
90 | 
91 | print "end save datas", datetime.datetime.now()


--------------------------------------------------------------------------------
/others/后台2/temp.py:
--------------------------------------------------------------------------------
  1 |  #encoding=utf8
  2 | '''
  3 |     __author__ = 'Administrator'
  4 |     2016.3.31测试
  5 | '''
  6 | import sys
  7 | import os
  8 | import json
  9 | reload(sys)
 10 | sys.setdefaultencoding('utf-8')
 11 | 
 12 | 
 13 | def getJsonFile_all(json_fname):
 14 |     json_file = open(json_fname, "r")
 15 |     dict = json.load(json_file)
 16 |     json_file.close()
 17 |     return dict
 18 | def writeJsonDict(person, f_out):
 19 |     outStr = json.dumps(person, ensure_ascii = False)        		#处理完之后重新转为Json格式
 20 |     f_out.write(outStr.encode('utf-8') + '\n')          			#写回到一个新的Json文件中去
 21 | 
 22 | 
 23 | # json_list = []
 24 | # for cur, dir, fname_list in os.walk("./data/_temp"):
 25 | #     for f in  fname_list:
 26 | #         print f
 27 | #         if f[0] == 'a':
 28 | #             continue
 29 | #         f_path =  os.path.join(cur, f)
 30 | #         json_data = getJsonFile_all(f_path)
 31 | #         date_dict = {}
 32 | #         for data in json_data:
 33 | #             d = data["item_pub_time"].split(" ")[0].replace("-", ".")
 34 | #             date_dict[d] = date_dict.setdefault(d, 0) + 1
 35 | #         print sorted(date_dict.items(), lambda a,b: cmp(a[0], b[0]))
 36 | #         new_path = os.path.join(cur, "ana_" + f)
 37 | #         writeJsonDict(date_dict, open(new_path, "w"))
 38 | 
 39 | 
 40 | #encoding=utf8
 41 | '''
 42 |     __author__ = 'Administrator'
 43 |     2016.4.17 01:08 first version
 44 |     构建知识图谱，pipeline
 45 |     1、词性标注
 46 |     2、歧义消除
 47 |     3、关系抽取
 48 |     4、知识推理
 49 |     5、知识表示
 50 | '''
 51 | 
 52 | import csv
 53 | import json
 54 | import time
 55 | import re
 56 | from string import punctuation,digits,letters,whitespace
 57 | import sys
 58 | import datetime
 59 | from helper import myio
 60 | import jieba
 61 | import jieba.analyse
 62 | import jieba.posseg as pseg
 63 | import math
 64 | import pandas as pd
 65 | from gensim import corpora,models
 66 | from helper.textprocessing import handleContent
 67 | from pymongo import MongoClient
 68 | client=MongoClient()
 69 | reload(sys)
 70 | sys.setdefaultencoding('utf-8')
 71 | 
 72 | knowledge_graph_dir = "./data/knowledge_graph/"
 73 | def getLastNameDict():
 74 |     last_name_dict ={}
 75 |     name_vec = [line.strip().split(" ") for line in open(knowledge_graph_dir + u"中国姓.txt")]
 76 |     for vec in name_vec:
 77 |         if len(vec) > 1:
 78 |             for v in vec:
 79 |                 last_name_dict.setdefault(v, 0)
 80 |     return last_name_dict
 81 | 
 82 | def extractEntity():
 83 |     db = client.holmesdb
 84 |     t_news = db.t_news_di
 85 |     res_list = t_news.find()
 86 |     last_name_dict = getLastNameDict()
 87 | 
 88 |     ntoken_dict = {}
 89 |     people_dict = {}
 90 |     row_cnt = 0
 91 |     for res in res_list:
 92 |         row_cnt += 1
 93 |         title = res["title"]
 94 |         content = res["content"]
 95 |         doc = myio.handleContent(title) + " " + myio.handleContent(content)
 96 |         words = pseg.cut(doc)
 97 |         for (word, flag) in words:
 98 |             if flag.find("n") != -1:
 99 |                 print word, flag
100 |                 word1 = word[0].encode("utf-8")
101 |                 word2 = word[:2].encode("utf-8")
102 |                 if word1 in last_name_dict or word2 in last_name_dict:
103 |                     #print word[0], word[:2]
104 |                     people_dict[word] = people_dict.setdefault(word, 0) + 1
105 |                 else:
106 |                 #print w.word, w.flag
107 |                    ntoken_dict[word] = ntoken_dict.setdefault(word, 0) + 1
108 |     ntoken_list = sorted(ntoken_dict.items(), lambda a, b: -cmp(a[1], b[1]))
109 |     people_list = sorted(people_dict.items(), lambda a, b: -cmp(a[1], b[1]))
110 | 
111 | 
112 | if __name__ == "__main__":
113 |     #pipeline step1
114 |     extractEntity()


--------------------------------------------------------------------------------
/others/后台2/topic_model.py:
--------------------------------------------------------------------------------
 1 | #encoding=utf8
 2 | '''
 3 |     __author__ = 'Administrator'
 4 |     2016.2.27 20:30 first version
 5 |     训练主题模型
 6 | '''
 7 | import csv
 8 | import json
 9 | import time
10 | import re
11 | from string import punctuation,digits,letters,whitespace
12 | import sys
13 | import datetime
14 | import jieba
15 | import jieba.analyse
16 | import pandas as pd
17 | from gensim import corpora,models
18 | from helper.textprocessing import handleContent
19 | 
20 | reload(sys)
21 | sys.setdefaultencoding('utf-8')
22 | jieba.load_userdict("C:/Python27/Lib/site-packages/jieba-0.37-py2.7.egg/jieba/financedict.txt")
23 | 
24 | 
25 | 
26 | 
27 | 
28 | 
29 | 
30 | 
31 | print "before load date", datetime.datetime.now()
32 | 
33 | news_dataset = pd.read_pickle("./data/news_dataset.pkl")
34 | # news_dataset_other = pd.read_pickle("./data/news_dataset_other.pkl")
35 | # all_dataset = pd.concat([news_dataset, news_dataset_other])
36 | all_dataset = news_dataset
37 | print "end load date", datetime.datetime.now()
38 | 
39 | 
40 | text_tags = []
41 | lda_train_set = []
42 | 
43 | 
44 | 
45 | ## 8000 articles 5mins
46 | print "before cut segments", datetime.datetime.now()
47 | # 分词，关键字提取
48 | 
49 | for content in all_dataset['content']:
50 |     content = handleContent(content)
51 |     seg = list(jieba.cut(content))
52 |     lda_train_set.append(seg)
53 | 
54 | print "end cut segments", datetime.datetime.now()
55 | 
56 | 
57 | print "before LDA", datetime.datetime.now()
58 | # LDA主题模型
59 | dic = corpora.Dictionary(lda_train_set)
60 | corpus = [dic.doc2bow(text) for text in lda_train_set]
61 | tfidf = models.TfidfModel(corpus)
62 | tfidf.save("./data/tfidf_dict.model")
63 | corpus_tfidf = tfidf[corpus]
64 | 
65 | # 8000 article 2mins
66 | lda = models.LdaModel(corpus_tfidf, id2word = dic, num_topics = 200)
67 | lda.save("./data/lda.model")
68 | corpus_lda = lda[corpus_tfidf]
69 | 
70 | for i in range(0, lda.num_topics):
71 |     print i, lda.print_topic(i)
72 | 
73 | for p in corpus_lda:
74 |     print p
75 | 
76 | print "end LDA", datetime.datetime.now()
77 | 
78 | 
79 | topic_doc_dict = {}
80 | for i in xrange(0 , len(corpus_lda)):
81 |     cnt = 0
82 |     for pp in sorted(corpus_lda[i], lambda a,b: -cmp(a[1], b[1])):
83 |         cnt += 1
84 |         if cnt >= 2: break
85 |         topic_id, weight = pp[0], pp[1],
86 |         topic_doc_dict[topic_id][i] = topic_doc_dict.setdefault(topic_id, {}).setdefault(i, 0) + weight
87 | 
88 | for topic_id in topic_doc_dict:
89 |     tag_set = {}
90 |     for doc_id in topic_doc_dict[topic_id]:
91 |         for tag in text_tags[doc_id]:
92 |             tag_set[tag] = tag_set.setdefault(tag, 0) + topic_doc_dict[topic_id][doc_id]
93 |     print topic_id, len(tag_set),
94 |     for tag in sorted(tag_set.items(), lambda a,b: -cmp(a[1], b[1])):
95 |         print("%s %s" %(tag[0], tag[1])),
96 |     print ""
97 | 
98 | 


--------------------------------------------------------------------------------
/others/后台2/vectorize.py:
--------------------------------------------------------------------------------
  1 | #encoding=utf8
  2 | '''
  3 |     __author__ = 'Administrator'
  4 |     2016.3.3 22:30 first version
  5 |     将单词、文章、用户向量化，包括word2vec, doc2vec
  6 | '''
  7 | import logging
  8 | import re
  9 | import sys
 10 | import datetime
 11 | import gensim
 12 | from gensim.models.doc2vec import TaggedDocument
 13 | import jieba
 14 | import pandas as pd
 15 | from gensim.models import Word2Vec, Doc2Vec
 16 | from helper.textprocessing import handleContent, cut_sentence_2
 17 | reload(sys)
 18 | sys.setdefaultencoding('utf-8')
 19 | 
 20 | stop_dict = {}
 21 | for line in open("C:\Python27\Lib\site-packages\jieba-0.37-py2.7.egg\jieba\stop_chinese.txt"):
 22 |     stop_dict.setdefault(line.strip(), 0)
 23 | #logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.INFO)
 24 | 
 25 | ##Load documents
 26 | print "before load date", datetime.datetime.now()
 27 | 
 28 | news_dataset = pd.read_pickle("./data/news_dataset.pkl")
 29 | news_dataset_other = pd.read_pickle("./data/news_dataset_other.pkl")
 30 | all_dataset = pd.concat([news_dataset, news_dataset_other])
 31 | 
 32 | print "end load date", datetime.datetime.now()
 33 | 
 34 | 
 35 | print "before word2vec", datetime.datetime.now()
 36 | documents = []
 37 | sentences = []
 38 | for i in xrange(0, len(all_dataset)):
 39 |     title = all_dataset.iloc[i]['title']
 40 |     content = all_dataset.iloc[i]['content']
 41 |     doc =  handleContent(title) + " " + handleContent(content)
 42 |     tokens = list(jieba.cut(doc))
 43 |     new_tokens = []
 44 | 
 45 |     for i in xrange( len(tokens) ):
 46 |         if tokens[i].isdigit() == True or len(tokens[i]) <= 1\
 47 |                 or (tokens[i].isalnum() == True and len(tokens[i]) > 20):
 48 |              #print tokens[i],
 49 |              continue
 50 |         if tokens[i] in stop_dict:#去停用词
 51 |             continue
 52 |             #u'数正', '下险企
 53 |         #if tokens[i] in[u'融系',u'办则',u'部是', u'若仅', u'虽同', u'或苏', u'由十']:
 54 |             #print tokens[i], title, content
 55 | 
 56 |         new_tokens.append(tokens[i])
 57 |     # for token in new_tokens:
 58 |     #     print token,
 59 |     # print ""
 60 |     # print len(tokens),len(new_tokens)
 61 |     documents.append(new_tokens)
 62 |     # content = content.replace("#r#", "\r").replace("#n#", "\n").replace("#t#", "\t")
 63 |     # sentence_list = [title] + cut_sentence_2(content)
 64 |     # for i in xrange(len(sentence_list)):
 65 |     #     sentence_list[i] = handleContent(sentence_list[i])
 66 |     sentences.append(doc)
 67 | 
 68 | 
 69 | ## train a word2vec model
 70 | num_features = 200      # Word vector dimensionality
 71 | min_word_count = 1      # Minimum word count
 72 | num_workers = 4         # Number of threads to run in parallel
 73 | context = 10            # Context window size
 74 | downsampling = 1e-5     # Downsample setting for frequent words
 75 | 
 76 | print "Training Word2Vec model...", datetime.datetime.now()
 77 | model = Word2Vec(documents, \
 78 |                  workers=num_workers,\
 79 |                  size=num_features,\
 80 |                  min_count=min_word_count,\
 81 |                  window=context, \
 82 |                  sample=downsampling,\
 83 |                  seed=1)
 84 | 
 85 | model.init_sims(replace=True)
 86 | model.save('./data/word2vec.model')
 87 | print "here"
 88 | for pp in model.most_similar(["陆金所".decode("utf8")],topn=30):
 89 |     print pp[0], pp[1], "\t",
 90 | print ""
 91 | for pp in model.most_similar(["P2P".decode("utf8")],topn=30):
 92 |     print pp[0], pp[1], "\t",
 93 | print ""
 94 | for pp in model.most_similar(["网贷".decode("utf8")],topn=30):
 95 |     print pp[0], pp[1], "\t",
 96 | print ""
 97 | for pp in model.most_similar(["e租宝".decode("utf8")],topn=30):
 98 |     print pp[0], pp[1], "\t",
 99 | print ""
100 | print "end word2vec", datetime.datetime.now()
101 | 
102 | 
103 | print "before doc2vec", datetime.datetime.now()
104 | class DocIterator(object):
105 |     def __init__(self, documents):
106 |         self.documents = documents
107 | 
108 |     def __iter__(self):
109 |         for i in xrange(len(self.documents)):
110 |             words = self.documents[i]
111 |             tags = [i]
112 |             yield TaggedDocument(words, tags)
113 | 
114 | ## train a doc2vec model
115 | print "Training DocVec model..."
116 | model = Doc2Vec(DocIterator(documents), \
117 |                 size=100, \
118 |                 window=8, \
119 |                 min_count=5,\
120 |                 workers=4)
121 | model.init_sims(replace=True)
122 | model.save('./data/doc2vec.model')
123 | print "end doc2vec", datetime.datetime.now()
124 | 


--------------------------------------------------------------------------------
/others/爬虫/wd/bbs_rong360/bbs_rong360/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mindawei/p2p/b0771100b8d4740906a186d9668b7d662214409c/others/爬虫/wd/bbs_rong360/bbs_rong360/.DS_Store


--------------------------------------------------------------------------------
/others/爬虫/wd/bbs_rong360/bbs_rong360/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mindawei/p2p/b0771100b8d4740906a186d9668b7d662214409c/others/爬虫/wd/bbs_rong360/bbs_rong360/__init__.py


--------------------------------------------------------------------------------
/others/爬虫/wd/bbs_rong360/bbs_rong360/__init__.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mindawei/p2p/b0771100b8d4740906a186d9668b7d662214409c/others/爬虫/wd/bbs_rong360/bbs_rong360/__init__.pyc


--------------------------------------------------------------------------------
/others/爬虫/wd/bbs_rong360/bbs_rong360/items.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | # Define here the models for your scraped items
 4 | #
 5 | # See documentation in:
 6 | # http://doc.scrapy.org/en/latest/topics/items.html
 7 | 
 8 | import scrapy
 9 | 
10 | 
11 | class BbsRong360Item(scrapy.Item):
12 |     # define the fields for your item here like:
13 |     # name = scrapy.Field()
14 |     pass
15 | 


--------------------------------------------------------------------------------
/others/爬虫/wd/bbs_rong360/bbs_rong360/middlewares.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mindawei/p2p/b0771100b8d4740906a186d9668b7d662214409c/others/爬虫/wd/bbs_rong360/bbs_rong360/middlewares.pyc


--------------------------------------------------------------------------------
/others/爬虫/wd/bbs_rong360/bbs_rong360/pipelines.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | # Define your item pipelines here
 4 | #
 5 | # Don't forget to add your pipeline to the ITEM_PIPELINES setting
 6 | # See: http://doc.scrapy.org/en/latest/topics/item-pipeline.html
 7 | 
 8 | 
 9 | class BbsRong360Pipeline(object):
10 |     def process_item(self, item, spider):
11 |         return item
12 | 


--------------------------------------------------------------------------------
/others/爬虫/wd/bbs_rong360/bbs_rong360/settings.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | # Scrapy settings for bbs_rong360 project
 4 | #
 5 | # For simplicity, this file contains only settings considered important or
 6 | # commonly used. You can find more settings consulting the documentation:
 7 | #
 8 | #     http://doc.scrapy.org/en/latest/topics/settings.html
 9 | #     http://scrapy.readthedocs.org/en/latest/topics/downloader-middleware.html
10 | #     http://scrapy.readthedocs.org/en/latest/topics/spider-middleware.html
11 | 
12 | BOT_NAME = 'bbs_rong360'
13 | 
14 | SPIDER_MODULES = ['bbs_rong360.spiders']
15 | NEWSPIDER_MODULE = 'bbs_rong360.spiders'
16 | 
17 | DOWNLOAD_HANDLERS = {'s3': None,}
18 | 
19 | # COOKIES_ENABLED = False
20 | 
21 | # DOWNLOAD_DELAY = 3
22 | 
23 | # Crawl responsibly by identifying yourself (and your website) on the user-agent
24 | #USER_AGENT = 'bbs_rong360 (+http://www.yourdomain.com)'
25 | 
26 | # Configure maximum concurrent requests performed by Scrapy (default: 16)
27 | #CONCURRENT_REQUESTS=32
28 | 
29 | # Configure a delay for requests for the same website (default: 0)
30 | # See http://scrapy.readthedocs.org/en/latest/topics/settings.html#download-delay
31 | # See also autothrottle settings and docs
32 | # DOWNLOAD_DELAY=3
33 | # The download delay setting will honor only one of:
34 | #CONCURRENT_REQUESTS_PER_DOMAIN=16
35 | #CONCURRENT_REQUESTS_PER_IP=16
36 | 
37 | # Disable cookies (enabled by default)
38 | #COOKIES_ENABLED=False
39 | 
40 | # Disable Telnet Console (enabled by default)
41 | #TELNETCONSOLE_ENABLED=False
42 | 
43 | # Override the default request headers:
44 | #DEFAULT_REQUEST_HEADERS = {
45 | #   'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
46 | #   'Accept-Language': 'en',
47 | #}
48 | 
49 | # Enable or disable spider middlewares
50 | # See http://scrapy.readthedocs.org/en/latest/topics/spider-middleware.html
51 | #SPIDER_MIDDLEWARES = {
52 | #    'bbs_rong360.middlewares.MyCustomSpiderMiddleware': 543,
53 | #}
54 | 
55 | # Enable or disable downloader middlewares
56 | # See http://scrapy.readthedocs.org/en/latest/topics/downloader-middleware.html
57 | #DOWNLOADER_MIDDLEWARES = {
58 | #    'bbs_rong360.middlewares.MyCustomDownloaderMiddleware': 543,
59 | #}
60 | 
61 | # Enable or disable extensions
62 | # See http://scrapy.readthedocs.org/en/latest/topics/extensions.html
63 | #EXTENSIONS = {
64 | #    'scrapy.telnet.TelnetConsole': None,
65 | #}
66 | 
67 | # Configure item pipelines
68 | # See http://scrapy.readthedocs.org/en/latest/topics/item-pipeline.html
69 | #ITEM_PIPELINES = {
70 | #    'bbs_rong360.pipelines.SomePipeline': 300,
71 | #}
72 | 
73 | # Enable and configure the AutoThrottle extension (disabled by default)
74 | # See http://doc.scrapy.org/en/latest/topics/autothrottle.html
75 | # NOTE: AutoThrottle will honour the standard settings for concurrency and delay
76 | # AUTOTHROTTLE_ENABLED=True
77 | # The initial download delay
78 | # AUTOTHROTTLE_START_DELAY=5
79 | # The maximum download delay to be set in case of high latencies
80 | #AUTOTHROTTLE_MAX_DELAY=60
81 | # Enable showing throttling stats for every response received:
82 | #AUTOTHROTTLE_DEBUG=False
83 | 
84 | # Enable and configure HTTP caching (disabled by default)
85 | # See http://scrapy.readthedocs.org/en/latest/topics/downloader-middleware.html#httpcache-middleware-settings
86 | #HTTPCACHE_ENABLED=True
87 | #HTTPCACHE_EXPIRATION_SECS=0
88 | #HTTPCACHE_DIR='httpcache'
89 | #HTTPCACHE_IGNORE_HTTP_CODES=[]
90 | #HTTPCACHE_STORAGE='scrapy.extensions.httpcache.FilesystemCacheStorage'
91 | 


--------------------------------------------------------------------------------
/others/爬虫/wd/bbs_rong360/bbs_rong360/settings.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mindawei/p2p/b0771100b8d4740906a186d9668b7d662214409c/others/爬虫/wd/bbs_rong360/bbs_rong360/settings.pyc


--------------------------------------------------------------------------------
/others/爬虫/wd/bbs_rong360/bbs_rong360/spiders/__init__.py:
--------------------------------------------------------------------------------
1 | # This package will contain the spiders of your Scrapy project
2 | #
3 | # Please refer to the documentation for information on how to create and manage
4 | # your spiders.
5 | 


--------------------------------------------------------------------------------
/others/爬虫/wd/bbs_rong360/bbs_rong360/spiders/__init__.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mindawei/p2p/b0771100b8d4740906a186d9668b7d662214409c/others/爬虫/wd/bbs_rong360/bbs_rong360/spiders/__init__.pyc


--------------------------------------------------------------------------------
/others/爬虫/wd/bbs_rong360/bbs_rong360/spiders/bbs.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | import scrapy
 4 | 
 5 | class BbsSpider(scrapy.Spider):
 6 | 	name = "bbs"
 7 | 	start_urls = []
 8 | 
 9 | 	def __init__(self):
10 | 		for page in range(1, 171+1):
11 | 			self.start_urls.append("http://bbs.rong360.com/forum-55-%d.html"%page)
12 | 
13 | 	def parse(self, response):
14 | 		urls = response.xpath('//tbody[contains(@id, "normalthread")]/tr/td[@class="icn"]/a/@href').extract()
15 | 		for url in urls:
16 | 			df = open("urls.txt", "a")
17 | 			df.write(url+"\n")
18 | 			df.close()
19 | 
20 | 


--------------------------------------------------------------------------------
/others/爬虫/wd/bbs_rong360/bbs_rong360/spiders/bbs.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mindawei/p2p/b0771100b8d4740906a186d9668b7d662214409c/others/爬虫/wd/bbs_rong360/bbs_rong360/spiders/bbs.pyc


--------------------------------------------------------------------------------
/others/爬虫/wd/bbs_rong360/bbs_rong360/spiders/content.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | import sys
 4 | reload(sys)
 5 | sys.setdefaultencoding("utf-8")
 6 | 
 7 | import os
 8 | import json
 9 | import scrapy
10 | 
11 | class ContentSpider(scrapy.Spider):
12 | 	name = "content"
13 | 	start_urls = []
14 | 
15 | 	def __init__(self):
16 | 		df = open("融360.csv", "w")
17 | 		df.write("item_id,item_type,source,url,author,title,content,item_pub_time,tags,cmt_cnt,fav_cnt,gmt_create,exinfo1,exinfo2\n")
18 | 		df.close()
19 | 		for f in os.listdir("html"):
20 | 			self.start_urls.append("file:///Users/ziaoang/Documents/p2p/bbs_rong360/html/" + f)
21 | 
22 | 	def parse(self, response):
23 | 		id = response.url.split("/")[-1].replace(".html","")
24 | 		title = response.xpath('//span[@id="thread_subject"]/text()').extract()[0]
25 | 
26 | 		df = open("融360.csv", "a")
27 | 		tids = response.xpath('//div[re:test(@id, "post_\d+$")]/@id').extract()
28 | 		for i in range(len(tids)):
29 | 			try:
30 | 				tid = tids[i].replace("post_","")
31 | 				time = response.xpath('//em[@id="authorposton%s"]/text()'%tid).extract()[0].replace("发表于 ","")
32 | 				content = "".join(response.xpath('//td[@id="postmessage_%s"]//text()'%tid).extract())
33 | 				content = '"' + content.strip().replace("\r","#r#").replace("\n","#n#").replace("\t","#t#") + '"'
34 | 				url = "http://bbs.rong360.com/thread-%s-1.html"%id
35 | 				if i == 0:
36 | 					df.write("%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s\n"%(id,"opinion","融360",url,"",title,content,time,"","","","","",""))
37 | 				else:
38 | 					df.write("%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s\n"%(id,"reply","融360",url,"","",content,time,"","","","","",""))
39 | 			except:
40 | 				pass
41 | 		df.close()
42 | 		
43 | 
44 | 
45 | 
46 | 
47 | 
48 | 
49 | 
50 | 
51 | 


--------------------------------------------------------------------------------
/others/爬虫/wd/bbs_rong360/bbs_rong360/spiders/content.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mindawei/p2p/b0771100b8d4740906a186d9668b7d662214409c/others/爬虫/wd/bbs_rong360/bbs_rong360/spiders/content.pyc


--------------------------------------------------------------------------------
/others/爬虫/wd/bbs_rong360/bbs_rong360/spiders/detail.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | import sys
 4 | reload(sys)
 5 | sys.setdefaultencoding("utf-8")
 6 | 
 7 | import os
 8 | import json
 9 | import scrapy
10 | 
11 | class DetailSpider(scrapy.Spider):
12 | 	name = "detail"
13 | 	start_urls = []
14 | 
15 | 	def __init__(self):
16 | 		df = open("融360.csv", "w")
17 | 		df.write("item_id,item_type,source,url,author,title,content,item_pub_time,tags,cmt_cnt,fav_cnt,gmt_create,exinfo1,exinfo2\n")
18 | 		df.close()
19 | 		for line in open("urls.txt"):
20 | 			self.start_urls.append(line.strip())
21 | 			# break
22 | 
23 | 	def parse(self, response):
24 | 		id = response.url.split("/")[-1].replace("thread-","").replace(".html","")
25 | 		df = open("html/%s.html"%id, "w")
26 | 		df.write(response.body)
27 | 		df.close()
28 | 
29 | 		'''
30 | 		id = response.url.split("/")[-1].replace("thread-","").replace(".html","")
31 | 		title = response.xpath('//span[@id="thread_subject"]/text()').extract()[0]
32 | 
33 | 		df = open("融360.csv", "a")
34 | 		tids = response.xpath('//div[re:test(@id, "post_\d+$")]/@id').extract()
35 | 		for i in range(len(tids)):
36 | 			tid = tids[i].replace("post_","")
37 | 			time = response.xpath('//em[@id="authorposton%s"]/text()'%tid).extract()[0].replace("发表于 ","")
38 | 			content = "".join(response.xpath('//td[@id="postmessage_%s"]//text()'%tid).extract())
39 | 			content = '"' + content.strip().replace("\r","#r#").replace("\n","#n#").replace("\t","#t#") + '"'
40 | 			if i == 0:
41 | 				df.write("%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s\n"%(id,"opinion","融360",response.url,"",title,content,time,"","","","","",""))
42 | 			else:
43 | 				df.write("%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s\n"%(id,"reply","融360",response.url,"","",content,time,"","","","","",""))
44 | 		df.close()
45 | 		'''
46 | 
47 | 
48 | 
49 | 
50 | 
51 | 
52 | 
53 | 
54 | 


--------------------------------------------------------------------------------
/others/爬虫/wd/bbs_rong360/bbs_rong360/spiders/detail.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mindawei/p2p/b0771100b8d4740906a186d9668b7d662214409c/others/爬虫/wd/bbs_rong360/bbs_rong360/spiders/detail.pyc


--------------------------------------------------------------------------------
/others/爬虫/wd/bbs_rong360/proxy_inuse.txt:
--------------------------------------------------------------------------------
 1 | 119.188.94.145:80
 2 | 125.123.81.153:3128
 3 | 182.89.6.100:8123
 4 | 222.82.161.217:8090
 5 | 106.1.59.149:8123
 6 | 111.176.154.126:3128
 7 | 182.246.38.56:8090
 8 | 110.72.35.111:8123
 9 | 110.72.39.46:8123
10 | 118.193.48.114:4444
11 | 110.73.9.191:8123
12 | 106.2.111.207:80
13 | 27.9.156.128:8090
14 | 180.213.179.43:8090
15 | 121.31.145.239:8123
16 | 171.39.1.124:8123
17 | 171.37.164.247:8123
18 | 182.90.50.55:8123
19 | 171.39.96.123:8123
20 | 171.37.133.164:8123
21 | 


--------------------------------------------------------------------------------
/others/爬虫/wd/bbs_rong360/randomproxy.py:
--------------------------------------------------------------------------------
 1 | # Copyright (C) 2013 by Aivars Kalvans <aivars.kalvans@gmail.com>
 2 | #
 3 | # Permission is hereby granted, free of charge, to any person obtaining a copy
 4 | # of this software and associated documentation files (the "Software"), to deal
 5 | # in the Software without restriction, including without limitation the rights
 6 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 7 | # copies of the Software, and to permit persons to whom the Software is
 8 | # furnished to do so, subject to the following conditions:
 9 | #
10 | # The above copyright notice and this permission notice shall be included in
11 | # all copies or substantial portions of the Software.
12 | #
13 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19 | # THE SOFTWARE.
20 | 
21 | import re
22 | import random
23 | import base64
24 | 
25 | 
26 | class RandomProxy(object):
27 |     def __init__(self, settings):
28 |         self.proxy_list = settings.get('PROXY_LIST')
29 |         fin = open(self.proxy_list)
30 | 
31 |         self.proxies = {}
32 |         for line in fin.readlines():
33 |             self.proxies[line.strip()] = ''
34 | 
35 |         fin.close()
36 | 
37 |     @classmethod
38 |     def from_crawler(cls, crawler):
39 |         return cls(crawler.settings)
40 | 
41 |     def process_request(self, request, spider):
42 |         # Don't overwrite with a random one (server-side state for IP)
43 |         if 'proxy' in request.meta:
44 |             return
45 | 
46 |         proxy_address = random.choice(self.proxies.keys())
47 |         proxy_user_pass = self.proxies[proxy_address]
48 | 
49 |         request.meta['proxy'] = proxy_address
50 |         if proxy_user_pass:
51 |             basic_auth = 'Basic ' + base64.encodestring(proxy_user_pass)
52 |             request.headers['Proxy-Authorization'] = basic_auth
53 | 
54 |     def process_exception(self, request, exception, spider):
55 |         proxy = request.meta['proxy']
56 |         try:
57 |             del self.proxies[proxy]
58 |         except ValueError:
59 |             pass


--------------------------------------------------------------------------------
/others/爬虫/wd/bbs_rong360/randomproxy.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mindawei/p2p/b0771100b8d4740906a186d9668b7d662214409c/others/爬虫/wd/bbs_rong360/randomproxy.pyc


--------------------------------------------------------------------------------
/others/爬虫/wd/bbs_rong360/scrapy.cfg:
--------------------------------------------------------------------------------
 1 | # Automatically created by: scrapy startproject
 2 | #
 3 | # For more information about the [deploy] section see:
 4 | # https://scrapyd.readthedocs.org/en/latest/deploy.html
 5 | 
 6 | [settings]
 7 | default = bbs_rong360.settings
 8 | 
 9 | [deploy]
10 | #url = http://localhost:6800/
11 | project = bbs_rong360
12 | 


--------------------------------------------------------------------------------
/others/爬虫/wd/爬虫文档.txt:
--------------------------------------------------------------------------------
  1 | 爬虫文档
  2 | 
  3 | 基本爬取方法
  4 | ========
  5 | 1. 订制列表界面爬虫
  6 | 	该界面包含信息实体的概述信息，包括标题，作者，发表时间等，订制爬虫专门爬取对应详细界面的url
  7 | 2. 订制详细信息界面爬虫
  8 | 	该界面包含信息实体的详细信息，包括标题，作者，发表时间，内容正文，评论信息，订制详细信息界面爬虫爬取对应的详细信息
  9 | 
 10 | 进阶爬虫方法
 11 | ========
 12 | 1. 抓取网站app后台获取数据API
 13 | 	下载要爬取网站的app版本，试用并对网络流进行抓包，分析出网站后台获取数据API
 14 | 2. 模拟网站app进行API请求获取数据
 15 | 	根据分析出来的API和相应的数据请求格式模拟网站app进行数据请求抓包，获取数据
 16 | 
 17 | 代码框架
 18 | ========
 19 | scrapy + xpath + beautifulsoup
 20 | 
 21 | 爬取主要字段
 22 | ========
 23 | 新闻类每条新闻一行记录，保存格式：
 24 | 	item_id string or int, 资讯id
 25 | 	Item_type string, “news”、”industry”、”policy”、”opinion”、”reply”
 26 | 	source string, 网站名称，包括金融之家、和讯网、P2P观察网
 27 | 	url string, 新闻链接
 28 | 	author string,
 29 | 	title string,
 30 | 	content string,
 31 | 	item_pub_time datetime, 发布时间 yyyy-mm-dd
 32 | 	tags sring, 新闻在页面的标签，用’,’分隔
 33 | 	cmt_cnt int, 评论数
 34 | 	fav_cnt int, 点赞或者收藏数
 35 | 	gmt_create datetime, 该记录创建时间,yyyy-mm-dd mm:ss
 36 | 	exinfo1 string, 保留字段，如果是reply的话，保留被回复的id
 37 | 	exinfo2 string
 38 | 
 39 | 平台&公司类每个平台一行记录，保存格式：
 40 | 	platform_id string or int, 平台id
 41 | 	platform_name string, 平台名字
 42 | 	platform_type string, 平台类型”信用贷、企业贷车贷 、房贷 、债权流转、优选理财、票据抵押、其他”
 43 | 	platform_status string, 平台状态”runing、close、issue” 
 44 | 	company string, 所属公司
 45 | 	need_invest double, 最少需要投资的金额
 46 | 	prospect_earn string, 预期收益,格式”xx%~xx%”，四舍五入
 47 | 	Risk_weight int, 风险系数1,2,3,4,5
 48 | 	source string, 来源
 49 | 	Source_url string, 来源链接
 50 | 	gmt_create datetime, 该记录创建时间,yyyy-mm-dd mm:ss
 51 | 	exinfo1 string, 保留字段
 52 | 	exinfo2 string
 53 | 
 54 | 爬取网站列表
 55 | ========
 56 | 新闻类
 57 | 	http://news.jrzj.com/p2p
 58 | 	金融之家
 59 | 	http://www.wdzj.com/news/hangye
 60 | 	网贷之家-行业(3300篇)
 61 | 	http://www.wdzj.com/news/pingtai
 62 | 	网贷之家-平台(1100篇)
 63 | 	http://p2p.hexun.com
 64 | 	和讯网
 65 | 	http://www.p2pguancha.com
 66 | 	P2P观察网
 67 | 	http://www.caixin.com/
 68 | 	财新网（作为预料参与NLP模型的训练）
 69 | 
 70 | 
 71 | 国家政策
 72 | 	http://p2p.hexun.com/zc
 73 | 	和讯网政策版块
 74 | 	http://www.wdzj.com/news/zhengce
 75 | 	网贷之家政策版块(340篇)
 76 | 
 77 | P2P平台&公司
 78 | 	http://shuju.wdzj.com/platdata-1.html
 79 | 	网贷之家平台数据，按平台数据格式保存，不按文档里的格式
 80 | 	http://www.rong360.com/licai-p2p/pingtai/rating
 81 | 	融360，P2P平台评级 
 82 | 	http://caifu.baidu.com/wealth
 83 | 	百度财富 有300左右个P2P公司信息
 84 | 	http://licai.p2peye.com/lcdt
 85 | 	p2p理财
 86 | 	http://www.p2peye.com/platform/search/h0i0c0x0r0t0s0b0p1.html
 87 | 	网贷天眼网贷平台汇总 (4189个) 
 88 | 
 89 | 用户评论、观点类
 90 | 	http://licai.p2peye.com/investshare
 91 | 	用户分享
 92 | 	http://bbs.wdzj.com
 93 | 	网贷人论坛
 94 | 	http://zhihu.com
 95 | 	知乎，相对专业的评论，应该较长
 96 | 	http://bbs.rong360.com/forum-55-1.html
 97 | 	融360 P2P论坛，整个版块抓取
 98 | 
 99 | 爬虫代码
100 | ========
101 | 附件
102 | 
103 | 


--------------------------------------------------------------------------------
/static/css/dashboard.css:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Base structure
  3 |  */
  4 | 
  5 | /* Move down content because we have a fixed navbar that is 50px tall */
  6 | body {
  7 |   padding-top: 50px;
  8 | }
  9 | 
 10 | 
 11 | /*
 12 |  * Global add-ons
 13 |  */
 14 | 
 15 | .sub-header {
 16 |   padding-bottom: 10px;
 17 |   border-bottom: 1px solid #eee;
 18 | }
 19 | 
 20 | /*
 21 |  * Top navigation
 22 |  * Hide default border to remove 1px line.
 23 |  */
 24 | .navbar-fixed-top {
 25 |   border: 0;
 26 | }
 27 | 
 28 | /*
 29 |  * Sidebar
 30 |  */
 31 | 
 32 | /* Hide for mobile, show later */
 33 | .sidebar {
 34 |   display: none;
 35 | }
 36 | @media (min-width: 768px) {
 37 |   .sidebar {
 38 |     position: fixed;
 39 |     top: 51px;
 40 |     bottom: 0;
 41 |     left: 0;
 42 |     z-index: 1000;
 43 |     display: block;
 44 |     padding: 20px;
 45 |     overflow-x: hidden;
 46 |     overflow-y: auto; /* Scrollable contents if viewport is shorter than content. */
 47 |     background-color: #f5f5f5;
 48 |     border-right: 1px solid #eee;
 49 |   }
 50 | }
 51 | 
 52 | /* Sidebar navigation */
 53 | .nav-sidebar {
 54 |   margin-right: -21px; /* 20px padding + 1px border */
 55 |   margin-bottom: 20px;
 56 |   margin-left: -20px;
 57 | }
 58 | .nav-sidebar > li > a {
 59 |   padding-right: 20px;
 60 |   padding-left: 20px;
 61 | }
 62 | .nav-sidebar > .active > a,
 63 | .nav-sidebar > .active > a:hover,
 64 | .nav-sidebar > .active > a:focus {
 65 |   color: #fff;
 66 |   background-color: #428bca;
 67 | }
 68 | 
 69 | 
 70 | /*
 71 |  * Main content
 72 |  */
 73 | 
 74 | .main {
 75 |   padding: 20px;
 76 | }
 77 | @media (min-width: 768px) {
 78 |   .main {
 79 |     padding-right: 40px;
 80 |     padding-left: 40px;
 81 |   }
 82 | }
 83 | .main .page-header {
 84 |   margin-top: 0;
 85 | }
 86 | 
 87 | 
 88 | /*
 89 |  * Placeholder dashboard ideas
 90 |  */
 91 | 
 92 | .placeholders {
 93 |   margin-bottom: 30px;
 94 |   text-align: center;
 95 | }
 96 | .placeholders h4 {
 97 |   margin-bottom: 0;
 98 | }
 99 | .placeholder {
100 |   margin-bottom: 20px;
101 | }
102 | .placeholder img {
103 |   display: inline-block;
104 |   border-radius: 50%;
105 | }
106 | 


--------------------------------------------------------------------------------
/static/css/sign_in.css:
--------------------------------------------------------------------------------
 1 | body{
 2 |     background-image: url("../img/dl.jpg");
 3 | }
 4 | 
 5 | .sign_in_input {
 6 |     width: 100%;
 7 |     padding: 10px;
 8 |     font-size: 16px;
 9 |     height: auto;
10 | }


--------------------------------------------------------------------------------
/static/data/hot_keyword.json:
--------------------------------------------------------------------------------
1 | {"dt": "2015-12-29", "month_hot_keywords": "e租宝:1.82543890332;宜人贷:0.874153359485;上市:0.746187357849;征求意见:0.730134647048;早报:0.681997022146;速报:0.681997022146;办法:0.65499315201;大大:0.610310583229;纽交所:0.609133857773;调查:0.592044634463;细则:0.591111017562;观察:0.581325950509;责任编辑:0.573899253426;来源:0.571275453897;经营:0.565265533198;中介机构:0.558812590894;美元:0.54405237903;登陆:0.543869515869;事件:0.530250975761;明年:0.52910574318;出借:0.525476762064;负面:0.525476762064;部门:0.520103820183;涉嫌:0.517892025776;报道:0.514530365141;三农:0.507585376415;存管:0.495620382607;合规:0.493234167371;活动:0.488883075296;清单:0.488827135502;信息:0.469753716953;第一股:0.465056291569;自己:0.465056291569;三板:0.463431791396;集团:0.459247500902;非法:0.458295818058;相关:0.450114072287;管理:0.448932761089;行为:0.448142819022;披露:0.447954406301;消息:0.447919840475;用户:0.446841980888;官网:0.446441747711;资本:0.445457354661;公开:0.438605088285;自融:0.436377762919;众筹:0.4348652715;中国:0.434501210367;旗下:0.433217055772;暂行办法:0.422769414488;开展:0.415197868207;之家:0.414457731708;爆料:0.41315088778;公告:0.412741424861;发布:0.411078425733;员工:0.409387060047;IPO:0.408123093166;我们:0.404047002981;或者:0.403879713313;暂停:0.403555227849;要求:0.402890841729;有限公司:0.40272320975;规范:0.401665895414;总部:0.4013465767;日报:0.401091747582;累计:0.400396419349;社会:0.397409011407;集资:0.396826830097;网络:0.396387069107;有关:0.391586051426;理财:0.389551862839;工作:0.388852803927;资产:0.387630540481;健康:0.387615260427;冻结:0.386870255234;如果:0.381635810468;防范:0.376002959141;促进:0.376002959141;互联网:0.37359091714;经济:0.372414661979;影响:0.372246364599;明确:0.371144932164;这个:0.366669700253;计划:0.366669700253;今年:0.362866066284;不能:0.361665530802;被查:0.361665530802;租赁:0.361201632796;经侦:0.358563748023;叶子:0.358213199291;制度:0.357709877673;代销:0.357324198204;之前:0.35723653541;了解:0.357153398169;来看:0.356916955951;上海:0.352456233467;美国:0.351113237814;借贷:0.349701473881;百度:0.348537829528;可能:0.347948601982", "day_hot_keywords": "中介机构:0.444331081007;征求意见:0.410639456923;细则:0.32659403459;暂行办法:0.323185939615;出借:0.317699616172;活动:0.211808213709;清单:0.211313883595;办法:0.20966181388;备案:0.182314620765;应当:0.175445184133;会同:0.17503951676;明确:0.174617280325;公安部:0.174023198254;金融监管:0.170591933492;负面:0.168777921092;禁止:0.168459646537;义务:0.167560918974;部门:0.154429737787;行为:0.154042337243;网络:0.152746162409;责任编辑:0.149741908033;中介:0.1492348664;披露:0.148921695081;信息化:0.148110360336;保本:0.144652324122;管理:0.14100822943;保息:0.136732512914;事后:0.134645782123;起草:0.133525222266;规定:0.132254971826;评估:0.12906546907;要求:0.128138967708;不得:0.128083612071;加强:0.127507825644;信息:0.125173488687;底线:0.12515939901;信息安全:0.124302284467;有关:0.123225051757;职责:0.121862486526;地方:0.121665300276;承担:0.121665300276;赵然:0.121181203911;意见:0.120656534782;经营:0.119458751105;合规:0.119007303935;原则:0.117281954276;健康:0.1157456173;保护:0.112964444567;从事:0.112956297444;办公室:0.111872056021;数据库:0.111872056021;存管:0.111206457535;公开:0.110031788639;HZ002:0.107716625699;报送:0.106629675711;自负:0.106629675711;现向:0.106629675711;注册地:0.106629675711;孙立欣:0.106629675711;风险管理:0.106629675711;HF017:0.106629675711;制度:0.104274871908;银监会:0.103750758415;责任:0.1001439167;利好:0.0998942338374;引导:0.0994418275739;实行:0.0994418275739;解读:0.0992811300538;借贷:0.0971273471917;实施:0.0962764607769;投融资:0.0962764607769;规范:0.0957610855902;撮合:0.095585299249;征求:0.0942520474863;指导:0.0938729309958;防范:0.0935886925203;内容:0.0935707648708;银行业:0.0924187888179;人民政府:0.0913968648948;电信业务:0.0913968648948;事中:0.0913968648948;成谜:0.0913968648948;空间:0.089016814844;教育:0.0870115991272;允许:0.0870115991272;日为:0.0870115991272;法律法规:0.0870115991272;美股:0.0870115991272;提出:0.0852959667462;定位:0.0852959667462;线下:0.0851376002741;促进:0.0842298232683;规则:0.0834970031342;期限:0.0832451948645;监管:0.0828451643252;有利于:0.0821500345048;基本:0.0821500345048;自律:0.0820709326995;社会:0.081874419262;机构:0.0817216048291", "week_hot_keywords": "征求意见:0.512874133053;中介机构:0.463553021821;三农:0.461664191032;细则:0.407558890648;暂行办法:0.327170601297;出借:0.321359239609;责任编辑:0.283931524078;退赔:0.27267824813;活动:0.262908148672;办法:0.248871597435;行为:0.243433470993;清单:0.23908620864;坚决:0.236462146626;资本:0.233570975199;宜人贷:0.22658333268;明确:0.224772072286;合规:0.223410054498;赵然:0.218142598504;e租宝:0.213626983439;负面:0.210892000993;管理:0.210667177524;开展:0.206735790424;大大:0.204967565038;经营:0.203641199889;部门:0.202840306082;公安部:0.201335754644;评估:0.200849524756;披露:0.200050329402;非法:0.199784264261;网络:0.198403696208;损失:0.197825066925;加强:0.197631971286;公告:0.196911064104;备案:0.193566798005;规范:0.192900809041;健康:0.189971300497;应当:0.189411109154;社会:0.189411109154;打击:0.186991424786;信息:0.185127945376;底线:0.185009437955;上市:0.184229571714;中介:0.183148712642;事件:0.18172577884;集资:0.181557557404;禁止:0.179823298583;HZ002:0.177240861284;会同:0.177240861284;要求:0.176848833409;金融监管:0.172604265295;地方:0.170358914447;防范:0.170358914447;风险管理:0.169725935399;义务:0.169725935399;责任:0.168901533304;意见:0.166435745022;存管:0.16571280236;某宝:0.163606948878;来看:0.161856872938;促进:0.160894530311;公开:0.158832415107;保本:0.157641431084;转型:0.156842391644;有关:0.155826187322;法院:0.155826187322;速报:0.154296304908;早报:0.154296304908;合法权益:0.154296304908;内容:0.153896526188;借贷:0.151923680813;办公室:0.151001815983;信息安全:0.151001815983;信息化:0.149973036471;规定:0.148434969746;相关:0.147906275946;互联网:0.146949980475;线下:0.146780275784;暂停:0.146075804864;制定:0.146060082596;纽交所:0.145437774834;发布:0.143339514423;利好:0.14303725871;依法:0.142058331866;受害人:0.141965762039;自己:0.141965762039;用户:0.141648856572;基金:0.140650760074;解决:0.140240965552;孙立欣:0.138866674418;引导:0.138418331318;保息:0.138418331318;美股:0.138418331318;来说:0.13748311046;事后:0.136339124065;强调:0.136339124065;起草:0.135121226643;总部:0.135121226643;海外:0.135049362345;资本市场:0.135033972346;或者:0.134880727449"}
2 | 


--------------------------------------------------------------------------------
/static/data/hot_topic/1/hot.json:
--------------------------------------------------------------------------------
1 | {"2015.12.05": 1, "2015.12.14": 4, "2015.12.23": 2, "2015.12.22": 1, "2015.12.21": 1, "2015.12.06": 1, "2015.12.10": 6, "2015.12.12": 2, "2015.12.03": 3, "2015.12.11": 2, "2015.12.16": 6, "2015.12.04": 8, "2015.12.07": 2, "2015.12.15": 1, "2015.12.09": 8, "2015.12.08": 9, "2015.12.18": 3, "2015.11.20": 1, "2015.12.17": 5}
2 | 


--------------------------------------------------------------------------------
/static/data/hot_topic/1/keywords.json:
--------------------------------------------------------------------------------
1 | {"带走": 0.23684967683127037, "分公司": 0.25956039027647254, "法定代表": 0.14870965406099504, "经侦": 0.18214517223841473, "e租宝": 1.1697296520291622, "大大": 0.372192349306282, "大厦": 0.16110212523274461, "调查": 0.3517286084603871, "违法": 0.14683508945988097, "官网": 0.19885367608039275, "支公司": 0.1296077773106976, "维权": 0.13216624658200335, "租赁": 0.1459274930542536, "冻结": 0.1858573816813141, "金易": 0.1845592914987935, "最新消息": 0.1458087494745348, "突查": 0.14501087189190917, "变更": 0.13182806535628105, "被查": 0.32707812514794476, "新华社": 0.14296721667793394, "工资": 0.1296077773106976, "北京": 0.16330256821375114, "张敏": 0.14501087189190917, "钰诚集团": 0.3163873568550746, "集团": 0.22058417824533577, "爆料": 0.17669159156054834, "今日": 0.14111584709219382, "申彤": 0.1944116659660464, "返回": 0.14296721667793394, "警察": 0.15726393834572733, "钰诚": 0.21061263812988357, "缅甸": 0.17156066001352072, "关联": 0.13124027760956694, "全部": 0.21429256475210176, "朝阳区": 0.14296721667793394, "暂停": 0.13426221997341273, "母公司": 0.2109249045700497, "警方": 0.16934238164432705, "遭查": 0.15726393834572733, "网络科技": 0.13631718288924546, "安徽": 0.13214033179644719, "代销": 0.2517172962169388, "查封": 0.1858870675762438, "之前": 0.162009721638372, "员工": 0.2677694384163143, "官方": 0.15381356460828002, "事件": 0.16455403752981024, "消息": 0.18501833500736867, "报道": 0.16787821200254346, "涉嫌": 0.25407694046566315}
2 | 


--------------------------------------------------------------------------------
/static/data/hot_topic/2/hot.json:
--------------------------------------------------------------------------------
1 | {"2015.12.29": 17, "2015.12.28": 36, "2015.11.16": 1, "2015.12.10": 1, "2015.12.30": 12, "2015.12.31": 3, "2015.12.14": 2, "2015.12.09": 1, "2015.12.08": 3}
2 | 


--------------------------------------------------------------------------------
/static/data/hot_topic/2/keywords.json:
--------------------------------------------------------------------------------
1 | {"要求": 0.2168430257033911, "信息": 0.17613858265776722, "经营": 0.23127291875921085, "办法": 0.5034924397242753, "红线": 0.2249833330449719, "网络": 0.27740940580158757, "解读": 0.1931281772409613, "暂行办法": 0.4476186986200558, "法律法规": 0.19774209803442158, "防范": 0.17775340094386952, "清单": 0.4476186986200558, "备案": 0.29997777739329584, "征求意见": 0.6297190964058518, "禁止": 0.3655979161980793, "金融监管": 0.3365070199890776, "管理": 0.2456868918341056, "有关": 0.2496664987015737, "地方": 0.25956039027647254, "报送": 0.1944116659660464, "意见": 0.19022984141392865, "信息化": 0.20015410334910752, "责任编辑": 0.19226695576035002, "义务": 0.2916174989490696, "存管": 0.19979699775334406, "保息": 0.2974193081219901, "会同": 0.2287475466846943, "明确": 0.319084603072442, "细则": 0.45070232635366, "公安部": 0.22306448109149257, "部门": 0.25507003024903296, "规定": 0.2689219817098808, "中介": 0.2177122623518977, "自担": 0.1782106938022092, "中介机构": 0.693978385617977, "活动": 0.30613085718110866, "行为": 0.2666301014158043, "合规": 0.18398674999237677, "保护": 0.17901629329788363, "整改": 0.24784942343499175, "应当": 0.33835668118752904, "起草": 0.17669159156054834, "资管": 0.1874861108708099, "披露": 0.19174454002591265, "不得": 0.3244741635681649, "出借": 0.5530105807142608, "众筹": 0.17400377997463243, "保本": 0.2355887887473978, "承担": 0.18265360797233252, "负面": 0.37609171357450355, "评估": 0.1782997514869983}
2 | 


--------------------------------------------------------------------------------
/static/data/hot_topic/3/hot.json:
--------------------------------------------------------------------------------
1 | {"2015.12.29": 1, "2015.12.28": 3, "2015.12.23": 1, "2015.12.22": 2, "2015.12.21": 5, "2015.12.20": 3, "2015.11.30": 1, "2015.11.17": 5, "2015.12.11": 1, "2015.12.16": 2, "2015.11.19": 1, "2015.12.07": 1, "2015.11.21": 1, "2015.11.24": 3, "2015.12.19": 6, "2015.12.18": 5, "2015.11.20": 1, "2015.11.18": 3}
2 | 


--------------------------------------------------------------------------------
/static/data/hot_topic/3/keywords.json:
--------------------------------------------------------------------------------
1 | {"股价": 0.13734352287459164, "净利润": 0.16110212523274461, "美股": 0.1845592914987935, "三季度": 0.10007705167455376, "每股": 0.11864525882065295, "递交": 0.12867049501014052, "万美元": 0.22713191449771541, "权则": 0.1134068051468604, "说明书": 0.12867049501014052, "LendingClub": 0.2238093493100279, "收为": 0.1296077773106976, "路演": 0.14501087189190917, "华兴": 0.162009721638372, "上市": 0.3784106006394457, "募资": 0.12867049501014052, "首日": 0.2372905176413059, "新股": 0.10007705167455376, "申请": 0.13216624658200335, "成功": 0.1968604164143504, "第一股": 0.3258812350637608, "国内": 0.09764199509110238, "海外": 0.11940571677031786, "承销商": 0.1296077773106976, "唐宁": 0.26428066359289437, "招股书": 0.2144508250169009, "宜信": 0.2699482995658228, "速报": 0.1296077773106976, "认购": 0.11437377334234715, "ADS": 0.1458087494745348, "发行价": 0.24784942343499175, "PPT": 0.17156066001352072, "YRD": 0.15726393834572733, "破发": 0.2109249045700497, "赴美": 0.2827065464968774, "美国": 0.25407694046566315, "提交": 0.14501087189190917, "资本市场": 0.12734221152487574, "净营收": 0.1134068051468604, "最新版": 0.10546245228502485, "百度": 0.17393375190474966, "美元": 0.3072984370060266, "IPO": 0.2835928328717615, "更新": 0.13182806535628105, "申请书": 0.10007705167455376, "净亏损": 0.1296077773106976, "招股": 0.1845592914987935, "区间": 0.10546245228502485, "登陆": 0.2605215638625117, "纽交所": 0.5101880625640854, "宜人贷": 0.7784574245709521}
2 | 


--------------------------------------------------------------------------------
/static/data/hot_topic/4/hot.json:
--------------------------------------------------------------------------------
1 | {"2015.12.22": 1, "2015.11.23": 4, "2015.12.02": 1, "2015.11.19": 4, "2015.11.26": 2, "2015.11.27": 1, "2015.11.24": 1, "2015.11.30": 2, "2015.11.20": 2, "2015.11.18": 2}
2 | 


--------------------------------------------------------------------------------
/static/data/hot_topic/4/keywords.json:
--------------------------------------------------------------------------------
1 | {"背书": 0.09439398608135204, "抢占": 0.049569884686998356, "控股": 0.058001259991544145, "果子": 0.0648038886553488, "黑天鹅": 0.0486029164915116, "联想": 0.10546245228502485, "e租宝": 0.09160533419505487, "电视广告": 0.0486029164915116, "新闻联播": 0.13631718288924546, "打广告": 0.11437377334234715, "招标会": 0.10007705167455376, "王思聪": 0.14870965406099504, "黄金": 0.1153704274327636, "电视": 0.0972058329830232, "金信网": 0.049569884686998356, "紫马财行": 0.0648038886553488, "标王": 0.24784942343499175, "广告主": 0.07148360833896697, "刘珺": 0.0486029164915116, "中赢": 0.0486029164915116, "翼龙贷": 0.2906034718497554, "拿下": 0.08578033000676036, "泛亚": 0.049569884686998356, "速报": 0.0648038886553488, "夺得": 0.081004860819186, "越描越黑": 0.0486029164915116, "熬过": 0.0486029164915116, "投放": 0.0648038886553488, "大干": 0.0486029164915116, "烧钱": 0.0541355310582768, "金银猫": 0.052731226142512425, "央视": 0.33477483700681704, "标版": 0.0486029164915116, "招标": 0.10007705167455376, "时段": 0.0972058329830232, "银谷": 0.11437377334234715, "寒冬": 0.05925113364795651, "标的物": 0.052731226142512425, "做广告": 0.081004860819186, "没好": 0.081004860819186, "资源": 0.08131712725935213, "媒体": 0.06562013880478347, "花费": 0.0486029164915116, "广告位": 0.1134068051468604, "亿成": 0.0486029164915116, "重金": 0.11864525882065295, "广告": 0.23378674087063733, "财经频道": 0.0486029164915116, "广告费用": 0.0648038886553488, "郭大刚": 0.07148360833896697}
2 | 


--------------------------------------------------------------------------------
/static/fonts/glyphicons-halflings-regular.eot:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mindawei/p2p/b0771100b8d4740906a186d9668b7d662214409c/static/fonts/glyphicons-halflings-regular.eot


--------------------------------------------------------------------------------
/static/fonts/glyphicons-halflings-regular.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mindawei/p2p/b0771100b8d4740906a186d9668b7d662214409c/static/fonts/glyphicons-halflings-regular.ttf


--------------------------------------------------------------------------------
/static/fonts/glyphicons-halflings-regular.woff:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mindawei/p2p/b0771100b8d4740906a186d9668b7d662214409c/static/fonts/glyphicons-halflings-regular.woff


--------------------------------------------------------------------------------
/static/fonts/glyphicons-halflings-regular.woff2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mindawei/p2p/b0771100b8d4740906a186d9668b7d662214409c/static/fonts/glyphicons-halflings-regular.woff2


--------------------------------------------------------------------------------
/static/img/bg.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mindawei/p2p/b0771100b8d4740906a186d9668b7d662214409c/static/img/bg.jpg


--------------------------------------------------------------------------------
/static/img/detail.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mindawei/p2p/b0771100b8d4740906a186d9668b7d662214409c/static/img/detail.png


--------------------------------------------------------------------------------
/static/img/dl.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mindawei/p2p/b0771100b8d4740906a186d9668b7d662214409c/static/img/dl.jpg


--------------------------------------------------------------------------------
/static/img/hot_topic_1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mindawei/p2p/b0771100b8d4740906a186d9668b7d662214409c/static/img/hot_topic_1.jpg


--------------------------------------------------------------------------------
/static/img/hot_topic_2.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mindawei/p2p/b0771100b8d4740906a186d9668b7d662214409c/static/img/hot_topic_2.jpg


--------------------------------------------------------------------------------
/static/img/hot_topic_3.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mindawei/p2p/b0771100b8d4740906a186d9668b7d662214409c/static/img/hot_topic_3.jpg


--------------------------------------------------------------------------------
/static/img/hot_topic_4.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mindawei/p2p/b0771100b8d4740906a186d9668b7d662214409c/static/img/hot_topic_4.jpg


--------------------------------------------------------------------------------
/static/img/mh3.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mindawei/p2p/b0771100b8d4740906a186d9668b7d662214409c/static/img/mh3.jpg


--------------------------------------------------------------------------------
/static/img/not_found.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mindawei/p2p/b0771100b8d4740906a186d9668b7d662214409c/static/img/not_found.jpg


--------------------------------------------------------------------------------
/static/js/jquery.cookie.js:
--------------------------------------------------------------------------------
  1 | /*!
  2 |  * jQuery Cookie Plugin v1.4.1
  3 |  * https://github.com/carhartl/jquery-cookie
  4 |  *
  5 |  * Copyright 2013 Klaus Hartl
  6 |  * Released under the MIT license
  7 |  */
  8 | (function (factory) {
  9 | 	if (typeof define === 'function' && define.amd) {
 10 | 		// AMD
 11 | 		define(['jquery'], factory);
 12 | 	} else if (typeof exports === 'object') {
 13 | 		// CommonJS
 14 | 		factory(require('jquery'));
 15 | 	} else {
 16 | 		// Browser globals
 17 | 		factory(jQuery);
 18 | 	}
 19 | }(function ($) {
 20 | 
 21 | 	var pluses = /\+/g;
 22 | 
 23 | 	function encode(s) {
 24 | 		return config.raw ? s : encodeURIComponent(s);
 25 | 	}
 26 | 
 27 | 	function decode(s) {
 28 | 		return config.raw ? s : decodeURIComponent(s);
 29 | 	}
 30 | 
 31 | 	function stringifyCookieValue(value) {
 32 | 		return encode(config.json ? JSON.stringify(value) : String(value));
 33 | 	}
 34 | 
 35 | 	function parseCookieValue(s) {
 36 | 		if (s.indexOf('"') === 0) {
 37 | 			// This is a quoted cookie as according to RFC2068, unescape...
 38 | 			s = s.slice(1, -1).replace(/\\"/g, '"').replace(/\\\\/g, '\\');
 39 | 		}
 40 | 
 41 | 		try {
 42 | 			// Replace server-side written pluses with spaces.
 43 | 			// If we can't decode the cookie, ignore it, it's unusable.
 44 | 			// If we can't parse the cookie, ignore it, it's unusable.
 45 | 			s = decodeURIComponent(s.replace(pluses, ' '));
 46 | 			return config.json ? JSON.parse(s) : s;
 47 | 		} catch(e) {}
 48 | 	}
 49 | 
 50 | 	function read(s, converter) {
 51 | 		var value = config.raw ? s : parseCookieValue(s);
 52 | 		return $.isFunction(converter) ? converter(value) : value;
 53 | 	}
 54 | 
 55 | 	var config = $.cookie = function (key, value, options) {
 56 | 
 57 | 		// Write
 58 | 
 59 | 		if (value !== undefined && !$.isFunction(value)) {
 60 | 			options = $.extend({}, config.defaults, options);
 61 | 
 62 | 			if (typeof options.expires === 'number') {
 63 | 				var days = options.expires, t = options.expires = new Date();
 64 | 				t.setTime(+t + days * 864e+5);
 65 | 			}
 66 | 
 67 | 			return (document.cookie = [
 68 | 				encode(key), '=', stringifyCookieValue(value),
 69 | 				options.expires ? '; expires=' + options.expires.toUTCString() : '', // use expires attribute, max-age is not supported by IE
 70 | 				options.path    ? '; path=' + options.path : '',
 71 | 				options.domain  ? '; domain=' + options.domain : '',
 72 | 				options.secure  ? '; secure' : ''
 73 | 			].join(''));
 74 | 		}
 75 | 
 76 | 		// Read
 77 | 
 78 | 		var result = key ? undefined : {};
 79 | 
 80 | 		// To prevent the for loop in the first place assign an empty array
 81 | 		// in case there are no cookies at all. Also prevents odd result when
 82 | 		// calling $.cookie().
 83 | 		var cookies = document.cookie ? document.cookie.split('; ') : [];
 84 | 
 85 | 		for (var i = 0, l = cookies.length; i < l; i++) {
 86 | 			var parts = cookies[i].split('=');
 87 | 			var name = decode(parts.shift());
 88 | 			var cookie = parts.join('=');
 89 | 
 90 | 			if (key && key === name) {
 91 | 				// If second argument (value) is a function it's a converter...
 92 | 				result = read(cookie, value);
 93 | 				break;
 94 | 			}
 95 | 
 96 | 			// Prevent storing a cookie that we couldn't decode.
 97 | 			if (!key && (cookie = read(cookie)) !== undefined) {
 98 | 				result[name] = cookie;
 99 | 			}
100 | 		}
101 | 
102 | 		return result;
103 | 	};
104 | 
105 | 	config.defaults = {};
106 | 
107 | 	$.removeCookie = function (key, options) {
108 | 		if ($.cookie(key) === undefined) {
109 | 			return false;
110 | 		}
111 | 
112 | 		// Must not alter options, thus extending a fresh object...
113 | 		$.cookie(key, '', $.extend({}, options, { expires: -1 }));
114 | 		return !$.cookie(key);
115 | 	};
116 | 
117 | }));
118 | 


--------------------------------------------------------------------------------
/static/js/p2p/layout.js:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Created by Administrator on 2016/4/19.
 3 |  */
 4 | document.onkeydown = function (event) {
 5 |     var e = event || window.event || arguments.callee.caller.arguments[0];
 6 |     if (e && e.keyCode == 13) { // enter 键
 7 |         $("#nav_search_btn").click();
 8 |     }
 9 | };
10 | 
11 | /** 显示错误 */
12 | function my_alert(error_str) {
13 |     $('#myModal').modal('show')
14 |     $('#modal-alert').html(error_str);
15 | }
16 | 
17 | 
18 | $(document).ready(function () {
19 |     $("#nav_search_btn").click(function () {
20 |         var key_word = $("#search_key")[0].value;
21 |         if (key_word.length == 0)
22 |             my_alert("平台名称不能为空！")
23 |         else
24 |             window.location.href = "/search/" + key_word;
25 |     });
26 | 
27 |     if ($.cookie('username') == null) {  // 未登录
28 |         $('#sign_in_out').text('登录');
29 |         $('#register').removeClass('hidden')
30 |         $('#grzx').addClass('hidden')
31 |         $('#sign_in_out').click(function () {
32 |             window.location.href = "/sign_in";
33 |         })
34 |     } else { // 已登录
35 |         $('#sign_in_out').text('退出');
36 |         $('#register').addClass('hidden')
37 |         $('#grzx').removeClass('hidden')
38 |         $('#sign_in_out').click(function () {
39 |             $.removeCookie('username');
40 |             window.location.href = "/home";
41 |         })
42 |     }
43 | });
44 | 
45 | 
46 | 


--------------------------------------------------------------------------------
/static/js/p2p/qwzx_hot_topic.js:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * Created by Administrator on 2016/4/19.
  3 |  */
  4 | // 根据返回信息添加元素
  5 | function append_info(topic_id, items) {
  6 |     for (var i = 0; i < items.length; ++i) {
  7 |         $('#show').append(
  8 |             "<a href='/info/hot/topic/news/detail/" + topic_id + "/" + items[i]._id + "' target='_blank' class='list-group-item'>" +
  9 |             "<div class='row'>" +
 10 |             "<div class='col-md-9 ' style='white-space:nowrap;overflow:hidden;text-overflow:ellipsis;'>" + items[i].title + "</div>" +
 11 |             "<div class='col-md-3' style='white-space:nowrap;overflow:hidden;text-overflow:ellipsis;'>" + items[i].item_pub_time + "</div>" +
 12 |             "</div>" +
 13 |             "</a>"
 14 |         );
 15 |     }
 16 | }
 17 | 
 18 | // 显示热点词汇
 19 | function createRandomItemStyle() {
 20 |     var base = 225;
 21 |     return {
 22 |         normal: {
 23 |             color: 'rgb(' + [
 24 |                 Math.round(Math.random() * base),
 25 |                 Math.round(Math.random() * base),
 26 |                 Math.round(Math.random() * base)
 27 |             ].join(',') + ')'
 28 |         }
 29 |     };
 30 | }
 31 | 
 32 | 
 33 | function show_hot_word(word_data) {
 34 | 
 35 |     $("#hot_content").html("");
 36 |     var size = 50;
 37 |     var show_data = [];
 38 |     for (var i = 0; i < word_data.length; ++i) {
 39 |         var word = word_data[i];
 40 |         if (size > 40)
 41 |             size -= 4;
 42 |         else if (size > 24)
 43 |             size -= 2;
 44 |         else if (size > 8)
 45 |             size -= 1;
 46 | 
 47 |         var item = {};
 48 |         item['name'] = word['name'];
 49 |         item['value'] = size;
 50 |         item['itemStyle'] = createRandomItemStyle();
 51 |         show_data.push(item);
 52 |     }
 53 |     var cy_chart = echarts.init(document.getElementById('hot_content'));
 54 |     option = {
 55 |         series: [{
 56 |             type: 'wordCloud',
 57 |             size: ['100%', '100%'],
 58 |             textRotation: [0, 45, -45, 90],
 59 |             textPadding: 1,
 60 |             autoSize: {
 61 |                 enable: true,
 62 |                 minSize: 40
 63 |             },
 64 |             data: show_data
 65 |         }]
 66 |     };
 67 |     cy_chart.setOption(option);
 68 | }
 69 | 
 70 | // 显示热点趋势
 71 | function show_hot_trend(hot_map) {
 72 |     div_object = $("#hot_trend");
 73 |     div_object.height(Math.round(div_object.width() * 0.45))
 74 |     var hot_trend_chart = echarts.init(document.getElementById('hot_trend'));
 75 |     hot_trend_option = {
 76 |         tooltip: {
 77 |             trigger: 'axis'
 78 |         },
 79 |         grid: {
 80 |             left: '3%',
 81 |             right: '4%',
 82 |             bottom: '3%',
 83 |             containLabel: true
 84 |         },
 85 |         xAxis: [
 86 |             {
 87 |                 type: 'category',
 88 |                 boundaryGap: false,
 89 |                 data: hot_map.x
 90 |             }
 91 |         ],
 92 |         yAxis: [
 93 |             {
 94 |                 name: '热度值',
 95 |                 type: 'value'
 96 |             }
 97 |         ],
 98 |         series: [
 99 |             {
100 |                 name: '热度',
101 |                 type: 'line',
102 |                 label: {
103 |                     normal: {
104 |                         show: true,
105 |                         position: 'top'
106 |                     }
107 |                 },
108 |                 areaStyle: {normal: {}},
109 |                 data: hot_map.y
110 |             }
111 |         ]
112 |     };
113 |     hot_trend_chart.setOption(hot_trend_option);
114 | }
115 | 
116 | // 加载数据
117 | $(document).ready(function () {
118 | 
119 |     var topic_id = $("#topic_id").text();
120 |     if (topic_id == 1) {
121 |         $("#title").text("e租宝涉嫌违法经营分崩离析");
122 |     } else if (topic_id == 2) {
123 |         $("#title").text("P2P监管办法征求意见稿发布");
124 | 
125 |     } else if (topic_id == 3) {
126 |         $("#title").text("宜人贷上市");
127 | 
128 |     } else if (topic_id == 4) {
129 |         $("#title").text("翼龙贷3.7亿豪夺央视标王");
130 |     }
131 | 
132 |     $.getJSON("/info/hot/topic/preview/" + topic_id, function (data) {
133 |         show_hot_word(data.keyword_list);
134 |         append_info(topic_id, data.item_list);
135 |         show_hot_trend(data.hot_map)
136 | 
137 |     });
138 | 
139 | });


--------------------------------------------------------------------------------
/static/js/p2p/qwzx_type.js:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * Created by Administrator on 2016/4/19.
  3 |  */
  4 | 
  5 | var rowCount = 40;
  6 | var current = 0;
  7 | var type;
  8 | 
  9 | // 加载数据
 10 | $(document).ready(function () {
 11 | 
 12 |     type = $("#type").text();
 13 |     // 重命名
 14 |     if (type == 'news')
 15 |         $("#type").text("新闻")
 16 |     else if (type == 'policy')
 17 |         $("#type").text("政策")
 18 |     else if (type == 'opinion')
 19 |         $("#type").text("观点")
 20 |     else if (type == 'ugc')
 21 |         $("#type").text("用户评论")
 22 | 
 23 |     // 导航栏
 24 |     $.getJSON("/info/" + type + "/list/size", function (data) {
 25 |         $('#num_info').text('每页最多 ' + rowCount + '条，共 ' + data.list_size + '条')
 26 |         // 计算页数
 27 |         var pages = Math.floor(data.list_size / rowCount);
 28 |         if ((data.list_size % rowCount) > 0)
 29 |             pages += 1;
 30 |         // 添加导航栏
 31 |         for (var i = 0; i < pages; ++i) {
 32 |             if (i == 0)
 33 |                 $('#pages').append("<li class='active mynav' >" + "<a>" + (i + 1) + "</a></li>")
 34 |             else
 35 |                 $('#pages').append("<li class='mynav'><a>" + (i + 1) + "</a></li>")
 36 |         }
 37 |         // 添加点击事件
 38 |         $(".mynav").click(reget_list);
 39 |     });
 40 | 
 41 |     // 显示
 42 |     $.getJSON("/info/" + type + "/list/current=" + current + "&rowCount=" + rowCount, function (data) {
 43 |         show_list(data);
 44 |     });
 45 | });
 46 | 
 47 | // 重新刷新数据
 48 | function reget_list() {
 49 |     var url = "/info/" + type + "/list/current=" + ($(this).text() - 1) + "&rowCount=" + rowCount;
 50 |     $.getJSON(url, function (data) {
 51 |         show_list(data);
 52 |     });
 53 |     $(".mynav").attr("class", "mynav");
 54 |     $(this).attr("class", "mynav active");
 55 | 
 56 | }
 57 | 
 58 | // 显示数据
 59 | function show_list(data) {
 60 |     $('#show').html("");
 61 |     for (var i = 0; i < data.type_list.length; ++i) {
 62 |         var item = data.type_list[i];
 63 |         if (type == 'ugc') {
 64 |             $('#show').append(
 65 |                 "<a href='/info/" + type + "/" + item._id + "' class='list-group-item'>" +
 66 |                 "<div class='row'>" +
 67 |                 "<div class='col-md-9'  style='white-space:nowrap;overflow:hidden;text-overflow:ellipsis;color: #2b669a'>" +
 68 |                 "<strong>Q: </strong>&nbsp&nbsp" + item.title + "</div>" +
 69 |                 "</div>" +
 70 |                 "<div class='row' style='margin-top:5px'>" +
 71 |                 "<div class='col-md-9' style='white-space:nowrap;overflow:hidden;text-overflow:ellipsis;'>" +
 72 |                 "<strong>A: </strong>&nbsp&nbsp" + item.content + "</div>" +
 73 |                 "<div class='col-md-1' style='white-space:nowrap;overflow:hidden;text-overflow:ellipsis;'>" + item.author + "</div>" +
 74 |                 "<div class='col-md-2' style='white-space:nowrap;overflow:hidden;text-overflow:ellipsis;'>" + item.item_pub_time + "</div>" +
 75 |                 "</div>" +
 76 |                 "</a>"
 77 |             );
 78 |         } else {
 79 |             var title = item.title;
 80 |             if (title.length == 0) {
 81 |                 var content = item.content.substr(0, 40)
 82 |                 title = content;
 83 |             }
 84 |             var head_str = '&nbsp;<span class="label label-default">';
 85 |             if (type == 'news') {
 86 |                 head_str = '&nbsp;<span class="label label-danger">';
 87 |             } else if (type == 'policy') {
 88 |                 head_str = '&nbsp;<span class="label label-warning">';
 89 |             } else if (type == 'opinion') {
 90 |                 head_str = '&nbsp;<span class="label label-success">';
 91 |             }
 92 |             var tags_str = "";
 93 |             var tags = item.tags;
 94 |             if (tags.length > 0) {
 95 |                 var tags_list = tags.split(',');
 96 |                 for (var j = 0; j < tags_list.length; ++j) {
 97 |                     tag = tags_list[j]
 98 |                     tags_str += head_str + tag + '</span>';
 99 |                 }
100 |             }
101 | 
102 |             $('#show').append(
103 |                 "<a href='/info/" + type + "/" + item._id + "' class='list-group-item'>" +
104 |                 "<div class='row'>" +
105 |                 "<div class='col-md-8' style='white-space:nowrap;overflow:hidden;text-overflow:ellipsis;'>" + title + tags_str + "</div>" +
106 |                 "<div class='col-md-2' style='white-space:nowrap;overflow:hidden;text-overflow:ellipsis;'>" + item.author + "</div>" +
107 |                 "<div class='col-md-2' style='white-space:nowrap;overflow:hidden;text-overflow:ellipsis;'>" + item.item_pub_time + "</div>" +
108 |                 "</div>" +
109 |                 "</a>"
110 |             );
111 |         }
112 |     }
113 | }
114 | 


--------------------------------------------------------------------------------
/static/js/p2p/qwzx_type_detail.js:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Created by Administrator on 2016/4/19.
 3 |  */
 4 | 
 5 | $(document).ready(function () {
 6 | 
 7 |     // 处理文本中的换行和空格
 8 |     var content = $("#content").text();
 9 |     content = content.replace(new RegExp("#n#", "gm"), "<br>");
10 |     content = content.replace(new RegExp("#r#", "gm"), "");
11 |     content = content.replace(new RegExp(" ", "gm"), "&nbsp;")
12 |     $("#content").html(content);
13 | 
14 |     if ($("#title").text().length == 0)
15 |         $("#title").html(content.substr(0, 20) + "...")
16 | 
17 |     type = $("#type").text();
18 |     if (type == 'news')
19 |         $("#type").text("新闻")
20 |     else if (type == 'policy')
21 |         $("#type").text("政策")
22 |     else if (type == 'opinion')
23 |         $("#type").text("观点")
24 |     else if (type == 'ugc')
25 |         $("#type").text("用户评论")
26 | });
27 | 


--------------------------------------------------------------------------------
/templates/detail_problem.html:
--------------------------------------------------------------------------------
 1 | {% extends "layout.html" %}
 2 | 
 3 | {% block title %} 问题平台档案 {% endblock %}
 4 | 
 5 | {% block body %}
 6 |     <div class="container" style="margin-top: 70px;">
 7 |         <div class="row">
 8 | 
 9 |             <div class="col-md-8 col-md-offset-2">
10 |                 <p><strong>共<span id="platform_num">0</span>条记录</strong></p>
11 |                 <table class="table table-hover">
12 |                     <thead>
13 |                     <tr></tr>
14 |                     <tr class="warning">
15 |                         <th>编号</th>
16 |                         <th>平台名称</th>
17 |                         <th>上线时间</th>
18 |                         <th>问题时间</th>
19 |                         <th>地区</th>
20 |                         <th>注册资本</th>
21 |                         <th>问题类型</th>
22 |                     </tr>
23 |                     </thead>
24 |                     <tbody></tbody>
25 |                 </table>
26 |             </div>
27 | 
28 |         </div>
29 |     </div>
30 |     <script src="{{ url_for("static",filename='js/p2p/wtpt_da.js') }}"></script>
31 | {% endblock %}


--------------------------------------------------------------------------------
/templates/detail_problem_analyze.html:
--------------------------------------------------------------------------------
 1 | {% extends "layout.html" %}
 2 | 
 3 | {% block title %} 问题平台分析 {% endblock %}
 4 | 
 5 | {% block body %}
 6 | 
 7 |     <div class="container" style="margin-top: 70px;">
 8 |         <div class="row">
 9 | 
10 |             <div class="col-md-5 col-md-offset-1 ">
11 |                 <div id="bad_platform_prob_month_statistic" style="width:100%;height:300px;margin: 0 auto"></div>
12 |             </div>
13 | 
14 |             <div class="col-md-5 ">
15 |                 <div id="bad_platform_registmoney_statistic" style="width:100%;height:300px;margin: 0 auto"></div>
16 |             </div>
17 | 
18 |             <div class="col-md-5  col-md-offset-1">
19 |                 <div id="bad_platform_event_type_statistic" style="width:100%;height:300px;margin: 0 auto"></div>
20 |             </div>
21 | 
22 |             <div class="col-md-5 ">
23 |                 <div id="bad_platform_runtime_statistic" style="width:100%;height:300px;margin: 0 auto"></div>
24 |             </div>
25 | 
26 |             <div class="col-md-10 col-md-offset-1 ">
27 |                 <div id="industry_areas" style="width:100%;height:500px;margin: 0 auto"></div>
28 |             </div>
29 | 
30 |         </div>
31 |     </div>
32 | 
33 |     <script src="{{ url_for("static",filename='js/echarts-all-2.2.7.js') }}"></script>
34 |     <script src="{{ url_for("static",filename='js/p2p/wtpt_fx.js') }}"></script>
35 | 
36 | {% endblock %}


--------------------------------------------------------------------------------
/templates/detail_rank.html:
--------------------------------------------------------------------------------
  1 | {% extends "layout.html" %}
  2 | 
  3 | {% block title %} 平台档案 {% endblock %}
  4 | 
  5 | {% block body %}
  6 | 
  7 |     <div class="container" style="margin-top: 70px;">
  8 |         <div class="row">
  9 | 
 10 |             <div class="col-md-10 col-md-offset-1" style="margin-top: 10px;">
 11 |                 <p><strong>热门平台 Top<span id="platform_num">0</span>&nbsp;&nbsp;&nbsp;&nbsp;排序: <span
 12 |                         id="sort_tag"></span>&nbsp;<span id="sort_updown"></span></strong></p>
 13 |             </div>
 14 | 
 15 |             <div class="col-md-8 col-md-offset-1">
 16 |                 <table class="table table-hover">
 17 |                     <thead>
 18 |                     <tr></tr>
 19 |                     <tr class="info">
 20 |                         <th>编号</th>
 21 |                         <th>平台名称</th>
 22 |                         <th>
 23 |                             <div class="dropdown">
 24 |                                 综合排名
 25 |                            <span class="dropdown-toggle" id="dropdownMenu1" data-toggle="dropdown" aria-haspopup="true"
 26 |                                  aria-expanded="true">
 27 |                                 <span class="caret"></span>
 28 |                             </span>
 29 |                                 <ul class="dropdown-menu" aria-labelledby="dropdownMenu1">
 30 |                                     <li onclick="platform_sort('综合排名','1')" style="cursor:pointer"><a>递增排序</a></li>
 31 |                                     <li onclick="platform_sort('综合排名','-1')" style="cursor:pointer"><a>递减排序</a></li>
 32 |                                 </ul>
 33 |                             </div>
 34 |                         </th>
 35 |                         <th>
 36 |                             <div class="dropdown">
 37 |                                 平均收益
 38 |                            <span class="dropdown-toggle" id="dropdownMenu1" data-toggle="dropdown" aria-haspopup="true"
 39 |                                  aria-expanded="true">
 40 |                                 <span class="caret"></span>
 41 |                             </span>
 42 |                                 <ul class="dropdown-menu" aria-labelledby="dropdownMenu1">
 43 |                                     <li onclick="platform_sort('平均收益','1')" style="cursor:pointer"><a>递增排序</a></li>
 44 |                                     <li onclick="platform_sort('平均收益','-1')" style="cursor:pointer"><a>递减排序</a></li>
 45 |                                 </ul>
 46 |                             </div>
 47 |                         </th>
 48 |                         <th>
 49 |                             <div class="dropdown">
 50 |                                 注册资本
 51 |                            <span class="dropdown-toggle" id="dropdownMenu1" data-toggle="dropdown" aria-haspopup="true"
 52 |                                  aria-expanded="true">
 53 |                                 <span class="caret"></span>
 54 |                             </span>
 55 |                                 <ul class="dropdown-menu" aria-labelledby="dropdownMenu1">
 56 |                                     <li onclick="platform_sort('注册资本','1')" style="cursor:pointer"><a>递增排序</a></li>
 57 |                                     <li onclick="platform_sort('注册资本','-1')" style="cursor:pointer"><a>递减排序</a></li>
 58 |                                 </ul>
 59 |                             </div>
 60 |                         </th>
 61 |                         <th>
 62 |                             <div class="dropdown">上线时间
 63 |                            <span class="dropdown-toggle" id="dropdownMenu1" data-toggle="dropdown" aria-haspopup="true"
 64 |                                  aria-expanded="true">
 65 |                                <span class="caret"></span>
 66 |                             </span>
 67 |                                 <ul class="dropdown-menu" aria-labelledby="dropdownMenu1">
 68 |                                     <li onclick="platform_sort('上线时间','1')" style="cursor:pointer"><a>递增排序</a></li>
 69 |                                     <li onclick="platform_sort('上线时间','-1')" style="cursor:pointer"><a>递减排序</a></li>
 70 |                                 </ul>
 71 |                             </div>
 72 |                         </th>
 73 |                         <th>所在地区</th>
 74 |                         <th>平台详情</th>
 75 |                     </tr>
 76 |                     </thead>
 77 |                     <tbody>
 78 |                     </tbody>
 79 |                 </table>
 80 |             </div>
 81 | 
 82 |             <div class="col-md-3">
 83 | 
 84 |                 <a href="#" class="list-group-item list-group-item-info">
 85 |                     <span class="glyphicon glyphicon-hand-right"></span>&nbsp;&nbsp;用户观点向上轮播
 86 |                 </a>
 87 |                 <div id="recent_reviews" class="list-group" style="width: 100%;"></div>
 88 | 
 89 |                 <div id="rank_platform_earn" class="list-group" style="width: 100%;margin: 0 auto;">
 90 |                     <a href="#" class="list-group-item list-group-item-info">
 91 |                         <span class="glyphicon glyphicon-hand-right"></span>&nbsp;&nbsp;收益较高
 92 |                     </a>
 93 |                     <img src="{{ url_for("static",filename='img/mh3.jpg') }}"
 94 |                          style="width:100%;margin: 0 auto;border: 1px lightgrey solid">
 95 |                 </div>
 96 | 
 97 |                 <div id="chart_platform_background"
 98 |                      style="width:100%;height:330px;margin-top: 20px;color:lightcoral;"></div>
 99 | 
100 |             </div>
101 | 
102 |         </div>
103 |     </div>
104 | 
105 |     <script src="{{ url_for("static",filename='js/echarts-all-2.2.7.js') }}"></script>
106 |     <script src="{{ url_for("static",filename='js/p2p/ptda_detail_rank.js') }}"></script>
107 | 
108 | {% endblock %}


--------------------------------------------------------------------------------
/templates/grzx.html:
--------------------------------------------------------------------------------
 1 | {% extends "layout.html" %}
 2 | 
 3 | {% block title %} 个人中心 {% endblock %}
 4 | 
 5 | {% block body %}
 6 |     <div class="container" style="margin-top: 70px">
 7 |         <div class="row ">
 8 |                  <div class="col-md-10 col-md-offset-1">
 9 |                 <ol class="breadcrumb alert-info">
10 |                     <li class="active">尊敬的 <span id="welcome_username"></span> , 欢迎您来到个人中心</li>
11 |                 </ol>
12 |             </div>
13 | 
14 | 
15 |             <div class="col-md-7 col-md-offset-1" id="select_platform_names_div">
16 |                 <div><a class="list-group-item list-group-item-heading list-group-item-warning">
17 |                     <strong>提示 : </strong> 最多可关注三个平台，点击平台名称可以切换，点击
18 |                     <span class='glyphicon glyphicon-minus'></span>
19 |                     可删除，右侧输入框可以添加平台。
20 |                 </a></div>
21 |                 <br>
22 |                 <div id="selecet_platform_names"></div>
23 |             </div>
24 | 
25 |             <div class="col-md-3" id="select_div">
26 |                 <div class="input-group">
27 |                     <input id="input_name" type="text" class="form-control" onKeyUp="update_show_list()" placeholder="请输入要关注的平台名称">
28 |                     <span class="input-group-btn">
29 |                         <button class="btn btn-info" style="opacity: 0.9" type="button" onclick="add_interested_platform()">添加</button>
30 |                     </span>
31 |                 </div>
32 |                 <br>
33 |                 <div id="platform_name_list" class="list-group"></div>
34 |             </div>
35 | 
36 |         </div>
37 |     </div>
38 | 
39 |     <script src="{{ url_for("static",filename='js/echarts-all-2.2.7.js') }}"></script>
40 |     <script src="{{ url_for("static",filename='js/p2p/grzx.js') }}"></script>
41 | 
42 | {% endblock %}


--------------------------------------------------------------------------------
/templates/home.html:
--------------------------------------------------------------------------------
 1 | {% extends "layout.html" %}
 2 | 
 3 | {% block title %}首页 {% endblock %}
 4 | 
 5 | 
 6 | {% block body %}
 7 | 
 8 |     <div class="container-fluid" style="width:100%;padding:0 0 0 0px;">
 9 |         <img src="{{ url_for("static",filename='img/bg.jpg') }}" style="max-width:100%;margin: 0 auto">
10 |         <div class="jumbotron" style="margin-top: 0px;padding-top: 5px;">
11 |             <h1>昆仑镜</h1>
12 |             <p class="lead"> 昆仑镜为您提供全面的网贷行业资讯，为您的决策提供支持。</p>
13 |         </div>
14 |     </div>
15 | 
16 | {% endblock %}
17 | 


--------------------------------------------------------------------------------
/templates/info_hot_topic.html:
--------------------------------------------------------------------------------
 1 | {% extends "layout.html" %}
 2 | 
 3 | {% block title %} 热点话题 {% endblock %}
 4 | 
 5 | {% block body %}
 6 |     <div class="container" style="margin-top: 70px">
 7 |         <div class="row">
 8 | 
 9 |             <div class="col-md-7 col-md-offset-1">
10 |                 <span class="hidden" id="topic_id">{{ topic_id }}</span>
11 |                 <div id="show" class="list-group">
12 |                     <a href="#" class="list-group-item list-group-item-heading list-group-item-info">
13 |                         <strong><h3>话题：<span id="title"> </span></h3></strong>
14 |                     </a>
15 |                     <a href="#" class="list-group-item list-group-item-info">
16 |                         <span class="glyphicon glyphicon-hand-right"></span>&nbsp;&nbsp;话题热度变化
17 |                     </a>
18 |                     <div id="hot_trend" style="width:100%;border:1px solid lightgrey"></div>
19 |                     <a href="#" class="list-group-item list-group-item-info">
20 |                         <span class="glyphicon glyphicon-hand-right"></span>&nbsp;&nbsp;话题相关新闻
21 |                     </a>
22 |                 </div>
23 |             </div>
24 | 
25 |             <div class="col-md-4">
26 |                 <div id="hot_trend" style="width:100%;margin: 0 auto"></div>
27 |                 <div id="show">
28 |                     <a class="list-group-item list-group-item-info list-group-item-heading " style="width:300px;">
29 |                         <span class="glyphicon glyphicon-hand-right"></span>&nbsp;&nbsp;话题热点词汇
30 |                     </a>
31 |                     <div id="hot_content" style="width:300px;height:280px;"></div>
32 |                 </div>
33 |             </div>
34 | 
35 |         </div>
36 |     </div>
37 | 
38 |     <script src="{{ url_for("static",filename='js/echarts-all-2.2.7.js') }}"></script>
39 |     <script src="{{ url_for("static",filename='js/p2p/qwzx_hot_topic.js') }}"></script>
40 | {% endblock %}


--------------------------------------------------------------------------------
/templates/info_hot_topic_news_detail.html:
--------------------------------------------------------------------------------
 1 | {% extends "layout.html" %}
 2 | 
 3 | {% block title %} 热点新闻详情 {% endblock %}
 4 | 
 5 | {% block body %}
 6 |     <div class="container" style="margin-top: 70px">
 7 |         <div class="row">
 8 |             <div class="col-sm-10  col-sm-offset-1">
 9 |                 <div class="jumbotron">
10 |                     <h2 id="title">{{ data_info.title }}</h2>
11 |                     <hr>
12 |                     <p>
13 |                         日期：<span id="date">{{ data_info.item_pub_time }}</span><br>
14 |                     </p>
15 |                     <p id="content" style="margin-top: 0px">{{ data_info.concent }}</p>
16 |                 </div>
17 |             </div>
18 |         </div>
19 |     </div>
20 | 
21 |     <script>
22 |         // 页面加载后
23 |         $(document).ready(function () {
24 |             // 处理文本中的换行和空格
25 |             var content = $("#content").text();
26 |             content = content.replace(new RegExp("#n#", "gm"), "<br>");
27 |             content = content.replace(new RegExp("#r#", "gm"), "");
28 |             content = content.replace(new RegExp(" ", "gm"), "&nbsp;")
29 |             $("#content").html(content);
30 |         });
31 |     </script>
32 | {% endblock %}


--------------------------------------------------------------------------------
/templates/info_type.html:
--------------------------------------------------------------------------------
 1 | {% extends "layout.html" %}
 2 | 
 3 | {% block title %} 资讯类型 {% endblock %}
 4 | 
 5 | {% block body %}
 6 |     <div class="container" style="margin-top: 70px">
 7 |         <div class="row">
 8 |             <div class="col-md-12">
 9 |                 <ol class="breadcrumb">
10 |                     <li><a href="/info">资讯</a></li>
11 |                     <li id="type" class="active">{{ type }}</li>
12 |                 </ol>
13 |                 <p class="text-primary"><strong><span id="num_info"></span></strong></p>
14 |                 <div id="show" class="list-group"></div>
15 |                 <ul id="pages" class="pagination"></ul>
16 |             </div>
17 |         </div>
18 |     </div>
19 |     <script src="{{ url_for("static",filename='js/p2p/qwzx_type.js') }}"></script>
20 | {% endblock %}


--------------------------------------------------------------------------------
/templates/info_type_detail.html:
--------------------------------------------------------------------------------
 1 | {% extends "layout.html" %}
 2 | 
 3 | {% block title %} 资讯详情 {% endblock %}
 4 | 
 5 | {% block body %}
 6 | 
 7 |     <div class="container" style="margin-top: 70px">
 8 |         <ol class="breadcrumb col-sm-10 col-sm-offset-1">
 9 |             <li><a href="/info">资讯</a></li>
10 |             <li><a href="/info/{{ data_info.type }}" id="type">{{ data_info.type }}</a></li>
11 |             <li class="active">{{ data_info.title }}</li>
12 |         </ol>
13 |     </div>
14 | 
15 |     <div class="container">
16 |         <div class="row">
17 |             <div class="col-sm-10 col-sm-offset-1">
18 |                 <div class="jumbotron">
19 |                     <h2 id="title">{{ data_info.title }}</h2>
20 |                     <hr>
21 |                     <p>
22 |                         <span id="date">{{ data_info.item_pub_time }}</span>&nbsp;&nbsp;&nbsp;&nbsp;
23 |                         <span id="author">{{ data_info.author }}</span>&nbsp;&nbsp;&nbsp;&nbsp;
24 |                         <a id="url" href="{{ data_info.url }}" target="_blank">{{ data_info.url }}</a>
25 |                     </p>
26 |                     <hr>
27 |                     <p id="content" style="margin-top: 0px;line-height: 150%;">{{ data_info.content }}</p>
28 |                 </div>
29 |             </div>
30 |         </div>
31 |     </div>
32 |     <script src="{{ url_for("static",filename='js/p2p/qwzx_type_detail.js') }}"></script>
33 | {% endblock %}


--------------------------------------------------------------------------------
/templates/layout.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | 
 3 | <title>{% block title %} 昆仑镜 {% endblock %} </title>
 4 | 
 5 | <link type="text/css" rel="stylesheet" href="{{ url_for("static",filename='css/bootstrap.min.css') }}">
 6 | <link type="text/css" rel="stylesheet" href="{{ url_for("static",filename='css/bootstrap-theme.min.css') }}">
 7 | <!-- jQuery在bootstrap.min.js -->
 8 | <script src="{{ url_for("static",filename='js/jquery-1.12.1.min.js') }}"></script>
 9 | <script src="{{ url_for("static",filename='js/bootstrap.min.js') }}"></script>
10 | <script src="{{ url_for("static",filename='js/jquery.cookie.js') }}"></script>
11 | 
12 | <head>
13 |     <meta name="viewport" content="width=device-width, initial-scale=1, maximum-scale=1, user-scalable=no">
14 | </head>
15 | 
16 | {% block head %}
17 |     <nav class="navbar navbar-inverse navbar-fixed-top">
18 |         <div class="container">
19 |             <div id="navbar" class="navbar-collapse collapse">
20 |                 <a class="navbar-brand">
21 |                     <p style="font-weight: 900;color:white;float: left">昆仑镜</p>
22 |                 </a>
23 |                 <ul class="nav navbar-nav">
24 |                     <li id="navbar_home"><a href="/home">首页</a></li>
25 |                     <li id="navbar_home"><a href="/yqdp">舆情大盘</a></li>
26 |                     <li><a href="/info">全网资讯</a></li>
27 |                     <li class="dropdown">
28 |                         <a href="#" class="dropdown-toggle" data-toggle="dropdown" role="button" aria-haspopup="true"
29 |                            aria-expanded="false">舆情防雷 <span class="caret"></span></a>
30 |                         <ul class="dropdown-menu">
31 |                             <li><a href="/detail/problem_analyze">问题分析</a></li>
32 |                             <li><a href="/detail/problem">雷区</a></li>
33 |                         </ul>
34 |                     </li>
35 |                     <li><a href="/detail/rank">平台档案</a></li>
36 |                     <li><a href="/detail/navigation">投资顾问</a></li>
37 |                     <li id="grzx" class="hidden"><a href="/grzx">个人中心</a></li>
38 |                 </ul>
39 |                  <ul class="nav navbar-nav navbar-right">
40 |                     <li><a id='sign_in_out'></a></li>
41 |                  </ul>
42 |                 <ul class="nav navbar-nav navbar-right">
43 |                     <li id='register'><a href="/register">注册</a></li>
44 |                 </ul>
45 |                 <form class="navbar-form navbar-right">
46 |                     <div class="input-group">
47 |                         <input id="search_key" type="text" class="form-control" placeholder="请输入平台名称...">
48 |                         <span class="input-group-btn">
49 |                              <button id="nav_search_btn" class="btn btn-success" type="button">搜索</button>
50 |                         </span>
51 |                     </div>
52 |                 </form>
53 |             </div>
54 |         </div>
55 |     </nav>
56 |     <div id='myModal' class="modal fade">
57 |                 <div class="modal-dialog">
58 |                     <div class="modal-content">
59 |                         <div class="modal-header">
60 |                             <button type="button" class="close" data-dismiss="modal" aria-hidden="true">&times;</button>
61 |                             <h4 class="modal-title">友情提示</h4>
62 |                         </div>
63 |                         <div class="modal-body">
64 |                             <div class="alert alert-warning " role="alert" id="modal-alert"></div>
65 |                         </div>
66 |                     </div>
67 |                 </div>
68 |             </div>
69 |     <script src="{{ url_for("static",filename='js/p2p/layout.js') }}"></script>
70 | 
71 | {% endblock %}
72 | 
73 | {% block body %}
74 | {% endblock %}
75 | 
76 | {% block footer %}
77 | {% endblock %}
78 | 


--------------------------------------------------------------------------------
/templates/register.html:
--------------------------------------------------------------------------------
 1 | {% extends "layout.html" %}
 2 | 
 3 | {% block title %} 注册 {% endblock %}
 4 | 
 5 | 
 6 | {% block body %}
 7 | 
 8 |     <link type="text/css" rel="stylesheet" href="{{ url_for("static",filename='css/sign_in.css') }}">
 9 | 
10 | 
11 |     <div class="container" style="margin-top: 250px; ">
12 | 
13 |         <div class="row">
14 |             <div class="col-md-offset-4 col-md-4 ">
15 |                 <p><input class="sign_in_input" id="username" placeholder="请输入用户名" autofocus></p>
16 |                 <p><input class="sign_in_input" id="password" type="password" placeholder="请输入密码"></p>
17 |                 <p><input class="sign_in_input" id="password2" type="password" placeholder="请确认密码"></p>
18 |                 <div class="input-icon form-group-lg">
19 |                     <label style="display: block" class="select">
20 |                         <select id="platform_name_select" class="form-control">
21 |                             <option value="none" selected disabled>请关注一个平台</option>
22 |                         </select>
23 |                     </label>
24 |                 </div>
25 |                 <button class="btn btn-lg btn-warning btn-block" style="margin-top:10px;opacity: 0.9"
26 |                         onclick="sign_in()">注 册
27 |                 </button>
28 |             </div>
29 | 
30 |         </div>
31 |     </div>
32 | 
33 | 
34 |     <script>
35 | 
36 |         $(document).ready(function () {
37 |             $.getJSON("/ptpx/platform_name_list", function (data) {
38 |                 platform_name_list = data.platform_name_list
39 |                 for (var i = 0; i < platform_name_list.length; ++i) {
40 |                     platform_name = platform_name_list[i];
41 |                     $("#platform_name_select").append(" <option value='" + platform_name + "'>" + platform_name + "</option>")
42 |                 }
43 |             });
44 |         });
45 | 
46 |         function sign_in() {
47 | 
48 |             var username = $('#username')[0].value;
49 |             var password = $('#password')[0].value;
50 |             var password2 = $('#password2')[0].value;
51 |             var platform_name = $('#platform_name_select')[0].value
52 | 
53 |             if (username.length == 0) {
54 |                 my_alert("请输入用户名！")
55 |                 return;
56 |             }
57 |             if (password.length == 0) {
58 |                 my_alert("请输入密码！")
59 |                 return;
60 |             }
61 |             if (password2.length == 0) {
62 |                 my_alert("请确认密码！")
63 |                 return;
64 |             }
65 |             if (platform_name == 'none') {
66 |                 my_alert("请关注一个平台！")
67 |                 return
68 |             }
69 |             if (password != password2) {
70 |                 my_alert("两次输入的密码不一致，请重新输入！")
71 |                 return;
72 |             }
73 | 
74 |             var url = "/register/username=" + username + "&password=" + password + "&platform_name="+platform_name;
75 |             $.getJSON(url, function (data) {
76 |                 if (data.result == 0) {
77 |                     my_alert("用户名已经存在！")
78 |                 } else {
79 |                     my_alert("注册成功！即将跳转到登录页面...")
80 |                     //$('#username')[0].value = '';
81 |                     //$('#password')[0].value = '';
82 |                     //$('#password2')[0].value = '';
83 |                     window.setTimeout(function(){ location.href = "/sign_in"; },2500);
84 |                 }
85 |             });
86 |         }
87 |     </script>
88 | {% endblock %}
89 | 


--------------------------------------------------------------------------------
/templates/search_detail_info.html:
--------------------------------------------------------------------------------
1 | {% extends "detail_info.html" %}
2 | 
3 | {% block title %} 搜索结果 {% endblock %}


--------------------------------------------------------------------------------
/templates/search_info.html:
--------------------------------------------------------------------------------
 1 | {% extends "layout.html" %}
 2 | 
 3 | {% block title %} 图谱搜索 {% endblock %}
 4 | 
 5 | {% block body %}
 6 | 
 7 |     <div class="container" style="margin-top: 70px">
 8 | 
 9 |         <!-- 显示导航栏 -->
10 |         <ol class="breadcrumb">
11 |             <li><a href="/search">图谱搜索</a></li>
12 |             <li class="active">{{ data_info.key_word }}</li>
13 |         </ol>
14 | 
15 |         <div class="jumbotron">
16 |             <h1>{{ data_info.key_word }}</h1>
17 |             <hr>
18 |             <p>
19 |                 &nbsp;&nbsp;&nbsp;&nbsp; &nbsp;&nbsp;&nbsp;&nbsp;描述部分（示例）：此前，在Facebook上搜索好友是一件痛苦的差事。尽管Facebook提供了搜索栏，但用户很难根据某些条件迅速找到好友。不过，扎克伯格正在测试Facebook将于2013年1月15日发布的一项新功能。这一功能将改变Facebook的用户体验，对竞争对手形成威胁，并可能引起隐私保护组织的抗议。对扎克伯格而言，他需要的搜索条件是“居住在Palo
20 |                 Alto附近的普里西拉和我的朋友”。扎克伯格表示：“我们邀请到5个人，他们都喜欢宠物犬。”<br>
21 |                 &nbsp;&nbsp;&nbsp;&nbsp; &nbsp;&nbsp;&nbsp;&nbsp;Graph
22 |                 Search诞生之前业内观察家一直关注Facebook能否释放网站搜索栏的潜力。谷歌对此尤为关注，因为Facebook的搜索服务能获得谷歌搜索引擎无法获取的大量数据，成为谷歌的重要竞争对手。他们也关注Facebook的搜索产品如何运作。一切都水落石出。Facebook新推出的社交图谱搜索Graph
23 |                 Search与传统互联网搜索有着根本的不同。谷歌搜索引擎能检索全球的大量信息，帮助用户寻找问题答案。与此不同，Facebook的搜索服务利用该网站庞大的数据库，帮助用户更好地利用“社交图谱”。根据扎克伯格的描述，社交图谱包括用户与好友和熟人的关系，以及他们喜欢的明星和品牌。<br>
24 |                 &nbsp;&nbsp;&nbsp;&nbsp; &nbsp;&nbsp;&nbsp;&nbsp;在Graph
25 |                 Search发布数周前，Facebook高管仍在研究如何命名这一搜索服务。他们希望避免使用“搜索”一词，从而使该服务区别于传统的互联网搜索。例如，在发布的几天前，Facebook一名高管还以“浏览”来称呼该服务。然而在经过几小时讨论后，他们做出了妥协：没有比Graph
26 |                 Search更好的名字。扎克伯格表示：“这清楚地阐明这是一款搜索服务。而社交图谱是一件大事。”<br>
27 |                 &nbsp;&nbsp;&nbsp;&nbsp; &nbsp;&nbsp;&nbsp;&nbsp;Graph
28 |                 Search的理念在于，以谷歌搜索引擎发掘互联网信息的方式，发掘Facebook网站社交图谱的含义。扎克伯格表示：“人们使用搜索引擎去回答问题，而我们可以回答其他人无法回答的许多问题。其他搜索服务主要编目了公开信息，而Facebook的信息并不在此列。这是人们分享的内容。此前没有一种很好的方式去了解人们分享了哪些信息，以满足人类发现信息以及寻找他人的需求。我们可以在这一方面有所作为。我们也是全球唯一有能力做到这一点的公司。”
29 |                 结果令人兴奋。这一具有变革意义的产品能帮助用户做许多事，而这些事是用户无法自己去做的。凭借Graph
30 |                 Search，人们可以以全新方式去使用Facebook，例如搜索日期、查找招聘信息、寻找一同出游的伙伴，或检索餐厅等商户。更重要的是，Graph
31 |                 Search扩展了Facebook的核心使命：不仅帮助用户与已知的他人建立联系，还能成为一款发现工具。<br>
32 |                 &nbsp;&nbsp;&nbsp;&nbsp; &nbsp;&nbsp;&nbsp;&nbsp;扎克伯格表示，Graph
33 |                 Search帮助Facebook找回了自己的“根”。他表示：“在建立Facebook之初，我们提供了类似的功能，但仅仅覆盖了你的同学。随后，Facebook一方面关注如何帮你认识周围的新朋友，探索你的社区，一方面也帮助你与已认识的人保持联系。但对于几千人的团队来说，同时关注这两方面存在困难。因此，我们的关注重点从帮助你找到想要的人转向了与已认识的人保持联系。Graph
34 |                 Search是一个升级版的发现工具。探索你的社区是人类的核心需求，而这是我们向这一方向迈出的重要一步。”
35 |                 这只是许多步中的第一步。Graph Search将根据用户使用该服务的方式持续改进，因此Facebook并不急于全面推出该服务。在发布之初，Graph
36 |                 Search仅面向一小部分用户开放。扎克伯格认为，到面向全球上亿用户全面开放时，Graph Search将得到极大的改进。例如扎克伯格认为，Graph
37 |                 Search将帮助用户更方便地确定，在宠物犬的生日派对上应当邀请哪些好友。他表示：“我们目前还没有提供‘谁养狗’的选项。”
38 |                 Graph Search得到了Facebook的全面支持，项目团队共70人左右。
39 |             </p>
40 |         </div>
41 | 
42 |     </div>
43 | 
44 | {% endblock %}


--------------------------------------------------------------------------------
/templates/search_not_found.html:
--------------------------------------------------------------------------------
 1 | {% extends "layout.html" %}
 2 | 
 3 | {% block title %}未找到 {% endblock %}
 4 | 
 5 | {% block body %}
 6 | 
 7 |     <div class="container" style="margin-top: 90px;">
 8 |         <div class="row">
 9 |             <div class="col-md-12 text-center">
10 |                 <img src="{{ url_for("static",filename='img/not_found.jpg') }}">
11 |             </div>
12 |         </div>
13 |     </div>
14 | 
15 | {% endblock %}
16 | 


--------------------------------------------------------------------------------
/templates/sign_in.html:
--------------------------------------------------------------------------------
 1 | {% extends "layout.html" %}
 2 | 
 3 | {% block title %} 登录 {% endblock %}
 4 | 
 5 | 
 6 | {% block body %}
 7 | 
 8 |    <link type="text/css" rel="stylesheet" href="{{ url_for("static",filename='css/sign_in.css') }}">
 9 | 
10 | 
11 |     <div class="container" style="margin-top: 300px; ">
12 |         <div class="row">
13 |             <div class="col-md-offset-4 col-md-4 ">
14 |                 <!--h2 class=" text-center"><img src="{{ url_for("static",filename='img/1.jpg') }}" style="height:88px;margin: 0 auto"/></h2>
15 |                 <h2 class="text-center text-success"><strong>昆仑镜 · 洞见一切</strong></h2><br-->
16 |                 <p><input class="sign_in_input" id="username" placeholder="请输入用户名" autofocus></p>
17 |                 <p><input class="sign_in_input" id="password" type="password" placeholder="请输入密码" ></p>
18 |                 <button class="btn btn-lg btn-warning btn-block" style="opacity: 0.9" onclick="sign_in()">登  录</button>
19 | 
20 |             </div>
21 |         </div>
22 |     </div>
23 | 
24 | 
25 |     <script>
26 |         function sign_in(){
27 |             var username = $('#username')[0].value;
28 |             var password = $('#password')[0].value;
29 |             if(username.length==0) {
30 |                 my_alert("请输入用户名！")
31 |                 return;
32 |             }
33 |             if(password.length==0) {
34 |                 my_alert("请输入密码！")
35 |                 return;
36 |             }
37 | 
38 |             var url =   '/sign_in/username='+username+'&password='+password;
39 |             $.getJSON(url, function (data) {
40 |                 if(data.result==0){
41 |                      my_alert("验证错误，请输入正确的用户名和密码！")
42 |                 }else{
43 |                     $.cookie('username',username);
44 |                     my_alert("登录成功！");
45 |                     window.location.href = "/grzx";
46 |                 }
47 |             });
48 |         }
49 |     </script>
50 | {% endblock %}
51 | 


--------------------------------------------------------------------------------
/templates/yqdp.html:
--------------------------------------------------------------------------------
 1 | {% extends "layout.html" %}
 2 | 
 3 | {% block title %} 舆情大盘 {% endblock %}
 4 | 
 5 | {% block body %}
 6 |     <div class="container" style="margin-top: 70px">
 7 |         <div class="row col-md-offset">
 8 | 
 9 |             <div class="col-md-10 col-md-offset-1">
10 |                 <ol class="breadcrumb">
11 |                     <li class="active">年度舆情热度分析</li>
12 |                 </ol>
13 |                 <div id="month_summary" style="width:90%;margin: 0 auto"></div>
14 |             </div>
15 | 
16 |             <div class="col-md-5 col-md-offset-1">
17 |                 <ol class="breadcrumb">
18 |                     <li class="active">网贷平台分布分析</li>
19 |                 </ol>
20 |                 <div id="industry_areas" style="width:100%;margin: 0 auto"></div>
21 |                 <br>
22 |             </div>
23 | 
24 |              <div class="col-md-5">
25 |                 <ol class="breadcrumb">
26 |                     <li class="active">投资借款人数状况分析</li>
27 |                 </ol>
28 |                 <div id="industry_hot_people" style="width:100%;margin: 0 auto"></div>
29 |                  <br>
30 |             </div>
31 | 
32 |              <div class="col-md-5 col-md-offset-1">
33 |                 <ol class="breadcrumb">
34 |                     <li class="active">投资借款金额状况分析</li>
35 |                 </ol>
36 |                 <div id="industry_hot_money" style="width:100%;margin: 0 auto"></div>
37 |                  <br>
38 |             </div>
39 | 
40 |             <div class="col-md-5">
41 |                 <ol class="breadcrumb">
42 |                     <li class="active">综合利率分析</li>
43 |                 </ol>
44 |                 <div id="industry_interest" style="width:100%;margin: 0 auto"></div>
45 |                 <br>
46 |             </div>
47 | 
48 |         </div>
49 |     </div>
50 | 
51 |     <script src="{{ url_for("static",filename='js/echarts-all-2.2.7.js') }}"></script>
52 |     <script src="{{ url_for("static",filename='js/p2p/yqdp_charts.js') }}"></script>
53 | 
54 | {% endblock %}


--------------------------------------------------------------------------------
/test_db.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | from pymongo import MongoClient
 3 | 
 4 | ## 连接
 5 | conn = MongoClient('localhost', 27017)
 6 | db = conn.p2p
 7 | 
 8 | # 连接数据库
 9 | #type = 'opinion'
10 | #print(db[type].find_one({'_id': '25754'}))
11 | #print(int(db[type].count()))
12 | import sqlite3
13 | def sign_in_valid(userName, password):
14 |     result = db.user.find_one({'username':userName,'password':password})
15 |     print result['platform_name']
16 | 
17 | 
18 | result = db.user.find()
19 | for r in result :
20 |     print r
21 | sign_in_valid('mdw','123')
22 | 
23 | ls = []
24 | ls.append("mi")
25 | print ls
26 | ls.remove("mi")
27 | print ls
28 | 
29 | 


--------------------------------------------------------------------------------