├── .gitattributes
├── 12306火车票
    ├── .idea
    │   ├── inspectionProfiles
    │   │   └── profiles_settings.xml
    │   ├── misc.xml
    │   ├── modules.xml
    │   ├── vip-12306.iml
    │   └── workspace.xml
    ├── 12306.py
    ├── __pycache__
    │   ├── cons.cpython-36.pyc
    │   ├── cons.cpython-37.pyc
    │   ├── settings.cpython-36.pyc
    │   └── settings.cpython-37.pyc
    ├── captcha.jpg
    ├── cons.py
    ├── login.py
    └── settings.py
├── 51_job
    ├── README.md
    ├── clean_data
    │   ├── csv_clean_data.py
    │   ├── job_company
    │   │   └── 大数据公司类型图饼图.jpg
    │   ├── job_company_workyears_pic.py
    │   ├── job_pic.py
    │   ├── job_pic
    │   │   └── examples.jpg
    │   ├── test.py
    │   └── wordscloud.py
    └── get_data
    │   └── 51job_toCsv.py
├── Analysis_Wechat_Friends
    └── Analysis_Wechat.py
├── Baidu_Address
    ├── README.md
    ├── baidu_address.py
    ├── company.csv
    └── image
    │   └── smaple.PNG
├── Baidu_Music
    ├── baidu_music.py
    ├── baidu_music2.py
    └── wangyi_music.py
├── ChuanZhi_Class
    ├── result
    │   └── ts.txt
    ├── scrapy.cfg
    └── ts
    │   ├── __init__.py
    │   ├── __pycache__
    │       ├── __init__.cpython-37.pyc
    │       ├── items.cpython-37.pyc
    │       ├── pipelines.cpython-37.pyc
    │       └── settings.cpython-37.pyc
    │   ├── items.py
    │   ├── middlewares.py
    │   ├── pipelines.py
    │   ├── settings.py
    │   └── spiders
    │       ├── __init__.py
    │       ├── __pycache__
    │           ├── __init__.cpython-37.pyc
    │           └── lesson.cpython-37.pyc
    │       └── lesson.py
├── DangDang_Books
    ├── README.md
    ├── analysis.py
    ├── dangdang
    │   ├── dangdang
    │   │   ├── __init__.py
    │   │   ├── __pycache__
    │   │   │   ├── __init__.cpython-37.pyc
    │   │   │   ├── items.cpython-37.pyc
    │   │   │   ├── pipelines.cpython-37.pyc
    │   │   │   └── settings.cpython-37.pyc
    │   │   ├── items.py
    │   │   ├── middlewares.py
    │   │   ├── pipelines.py
    │   │   ├── settings.py
    │   │   └── spiders
    │   │   │   ├── __init__.py
    │   │   │   ├── __pycache__
    │   │   │       ├── __init__.cpython-37.pyc
    │   │   │       └── dd.cpython-37.pyc
    │   │   │   └── dd.py
    │   └── scrapy.cfg
    ├── ddSpider.py
    ├── pictureWall.py
    └── stopwords.txt
├── DouBan_Movie
    ├── 1.png
    ├── get_douban_comment3.py
    ├── ip.txt
    ├── ippools.py
    └── pic
    │   ├── 动物世界.png
    │   └── 巴斯特·斯克鲁格斯的歌谣.png
├── DouYou
    ├── README.md
    ├── douyu.csv
    └── test.py
├── LaGou
    ├── README.md
    ├── lagou1.csv
    ├── machine_learning_hz_job2.csv
    ├── 动态爬取.py
    └── 静态爬取.py
├── LianJia
    ├── README.md
    ├── cleaned.csv
    ├── group_by.py
    ├── housedata1.csv
    ├── housedata2.csv
    └── test_threading.py
├── Meituan
    ├── __init__.py
    ├── first.py
    ├── get_cookie.py
    ├── meituan.py
    └── mtwm.py
├── Movie_maoyan
    ├── WPS网盘.lnk
    ├── maoyan.csv
    ├── maoyan.py
    ├── readme.md
    ├── result.txt
    └── txt.py
├── Movie_tiantang
    ├── dytt.csv
    ├── readme.md
    └── spider_dytt.py
├── Photo_Position_GoldenAPI
    ├── .DS_Store
    ├── .idea
    │   ├── inspectionProfiles
    │   │   └── Project_Default.xml
    │   ├── misc.xml
    │   ├── modules.xml
    │   ├── vcs.xml
    │   ├── workspace.xml
    │   └── 地理位置.iml
    ├── __pycache__
    │   └── position_utils.cpython-37.pyc
    ├── main.py
    ├── picture
    │   ├── .DS_Store
    │   └── 20190828185021.jpg
    └── position_utils.py
├── Photo_qiantu
    ├── ip.txt
    ├── qiantu.photo
    │   └── simple_show.PNG
    └── qiantu.py
├── Photo_taobao
    ├── ip.txt
    ├── taobao_photo.py
    └── taobao_photo
    │   └── simple_show.PNG
├── QiDian_Story
    ├── add_txt.py
    ├── binaries.txt
    ├── get_xiaoshuo.py
    └── 凡人修仙之仙界篇
    │   ├── A目录.txt
    │   ├── 仙界篇外传一.txt
    │   ├── 仙界篇外传二.txt
    │   ├── 第一章 狐女.txt
    │   ├── 第三章 远去.txt
    │   ├── 第二章 石头哥哥.txt
    │   ├── 第五章 马兽.txt
    │   ├── 第六章 白袍少年.txt
    │   └── 第四章 相依.txt
├── Qsbk
    ├── duanzi.josn
    ├── qsbk
    │   ├── __init__.py
    │   ├── __pycache__
    │   │   ├── __init__.cpython-37.pyc
    │   │   ├── pipelines.cpython-37.pyc
    │   │   └── settings.cpython-37.pyc
    │   ├── items.py
    │   ├── middlewares.py
    │   ├── pipelines.py
    │   ├── settings.py
    │   └── spiders
    │   │   ├── __init__.py
    │   │   ├── __pycache__
    │   │       ├── __init__.cpython-37.pyc
    │   │       └── qsbk_spider.cpython-37.pyc
    │   │   └── qsbk_spider.py
    ├── qsbk_start.py
    └── scrapy.cfg
├── README.md
├── Sina_topic_spider
    ├── README.md
    ├── age-pie.html
    ├── age_bar.html
    ├── area.html
    ├── gender.html
    ├── sina_topic.csv
    ├── sina_topic_data_analysis.py
    ├── sina_topic_spider.py
    ├── stop_words.txt
    └── word_cloud.html
├── WangYi_Music
    ├── geci.py
    ├── music.csv
    ├── wangyiyun.py
    └── 歌词
    │   ├── 你若成风.txt
    │   ├── 全球变冷.txt
    │   ├── 内线.txt
    │   ├── 千古.txt
    │   ├── 千百度.txt
    │   ├── 城府.txt
    │   ├── 多余的解释.txt
    │   ├── 大千世界.txt
    │   ├── 天龙八部之宿敌.txt
    │   ├── 如果当时.txt
    │   ├── 幻听.txt
    │   ├── 庐州月.txt
    │   ├── 惊鸿一面.txt
    │   ├── 想象之中.txt
    │   ├── 我想牵着你的手.txt
    │   ├── 拆东墙.txt
    │   ├── 断桥残雪.txt
    │   ├── 明智之举.txt
    │   ├── 星座书上.txt
    │   ├── 有何不可.txt
    │   ├── 江湖 .txt
    │   ├── 河山大好.txt
    │   ├── 清明雨上.txt
    │   ├── 灰色头像.txt
    │   ├── 玫瑰花的葬礼.txt
    │   ├── 素颜.txt
    │   ├── 认错.txt
    │   ├── 违章动物.txt
    │   └── 雅俗共赏.txt
├── coffee.png
├── dangdang_book
    ├── README.md
    ├── dangdang_book
    │   ├── items.py
    │   ├── middlewares.py
    │   ├── pipelines.py
    │   ├── settings.py
    │   └── spiders
    │   │   ├── __init__.py
    │   │   └── dd_book.py
    └── scrapy.cfg
├── ele_me
    ├── 1.png
    ├── README.md
    ├── __init__.py
    ├── eleme_bar.png
    ├── eleme_wordcloud.png
    ├── elemedata.csv
    ├── fooddic.txt
    └── run.py
├── finance.eastmoney.com
    ├── README.md
    ├── __init__.py
    ├── 可还债
    │   ├── __init__.py
    │   ├── id20200424.csv
    │   └── zhaunzhai.py
    └── 股票
    │   ├── gupiao.py
    │   ├── id.csv
    │   └── result_20200423.csv
├── live.bible.is.com
    ├── README.md
    └── live.bible.is.py
├── minority_language
    ├── jike.py
    ├── jike2.py
    └── saier.py
├── reward.jpg
├── taobao
    ├── README.md
    ├── taobao.josn
    ├── taobao_food.sql
    ├── taobao_food_Mongodb.py
    ├── taobao_food_analysis.py
    ├── taobao_food_mysql.py
    └── test.py
├── utils
    └── crawlerHelper.py
├── yingjieshneg.com
    ├── 2020-04-20_company.csv
    ├── README.md
    └── yingjieshneg.py
├── yixuela.com
    ├── README.md
    └── poetry.py
├── 微博热搜
    ├── 人物.xlsx
    ├── 名词.xlsx
    ├── 婚恋.xlsx
    └── 热搜.py
├── 爬取中彩网彩票
    ├── 3D.xls
    └── test_CaiPiao.py
├── 高考志愿网
    ├── README.md
    ├── gkzy.py
    └── gkzy2.py
└── 高考网
    ├── 211高校排行.html
    ├── 985高校排行.html
    ├── analyse.py
    ├── college_data.csv
    ├── main.py
    ├── readme.md
    ├── 北京上海江苏高质量高校占比.html
    ├── 北京高质量高校占比.html
    ├── 占比前十城市高质量高校占比.html
    ├── 各地区高校数量段位图.html
    ├── 各城市高校数量.html
    ├── 各城市高质量高校数量.html
    ├── 高校分布热力图.html
    ├── 高校属性分析pie.html
    ├── 高校数量前十名.html
    ├── 高校数量后十名.html
    ├── 高校类型分析pie.html
    ├── 高质量高校分布热力图.html
    └── 高质量高校分布热力图map.html


/.gitattributes:
--------------------------------------------------------------------------------
1 | # Auto detect text files and perform LF normalization
2 | *.js linguist-language=python
3 | *.css linguist-language=python
4 | *.html linguist-language=python
5 | *.sql linguist-language=python
6 | *.csv linguist-language=python
7 | *.txt linguist-language=python
8 | *.json linguist-language=python


--------------------------------------------------------------------------------
/12306火车票/.idea/inspectionProfiles/profiles_settings.xml:
--------------------------------------------------------------------------------
1 | <component name="InspectionProjectProfileManager">
2 |   <settings>
3 |     <option name="useProjectProfile" value="false" />
4 |     <option name="USE_PROJECT_PROFILE" value="false" />
5 |     <version value="1.0" />
6 |   </settings>
7 | </component>


--------------------------------------------------------------------------------
/12306火车票/.idea/misc.xml:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="UTF-8"?>
2 | <project version="4">
3 |   <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.6.0 (C:\Program Files (x86)\Python36-32\python.exe)" project-jdk-type="Python SDK" />
4 | </project>


--------------------------------------------------------------------------------
/12306火车票/.idea/modules.xml:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="UTF-8"?>
2 | <project version="4">
3 |   <component name="ProjectModuleManager">
4 |     <modules>
5 |       <module fileurl="file://$PROJECT_DIR$/.idea/vip-12306.iml" filepath="$PROJECT_DIR$/.idea/vip-12306.iml" />
6 |     </modules>
7 |   </component>
8 | </project>


--------------------------------------------------------------------------------
/12306火车票/.idea/vip-12306.iml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <module type="PYTHON_MODULE" version="4">
 3 |   <component name="NewModuleRootManager">
 4 |     <content url="file://$MODULE_DIR$" />
 5 |     <orderEntry type="jdk" jdkName="Python 3.6.0 (C:\Program Files (x86)\Python36-32\python.exe)" jdkType="Python SDK" />
 6 |     <orderEntry type="sourceFolder" forTests="false" />
 7 |   </component>
 8 |   <component name="TestRunnerService">
 9 |     <option name="PROJECT_TEST_RUNNER" value="Unittests" />
10 |   </component>
11 | </module>


--------------------------------------------------------------------------------
/12306火车票/12306.py:
--------------------------------------------------------------------------------
 1 | # !/usr/bin/env python
 2 | # -*- coding:utf-8 -*-
 3 | __author__ = '强子'
 4 | import requests
 5 | from settings import *
 6 | import cons
 7 | 
 8 | dict_station = {}
 9 | for i in cons.station.split('@'):
10 |     tmp_list = i.split('|')
11 |     #print(tmp_list)
12 |     if len(tmp_list) > 2:
13 |         dict_station[tmp_list[1]] = tmp_list[2]
14 | print(dict_station)
15 | 
16 | from_station = dict_station[FROM_STATION]
17 | to_station = dict_station[TO_STATION]
18 | print(from_station,to_station)
19 | headers={'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36'}
20 | 
21 | def queryTicket():#query_ticket
22 |     url='https://kyfw.12306.cn/otn/leftTicket/queryX?leftTicketDTO.train_date='+TRAIN_DATE+'&leftTicketDTO.from_station='+from_station+'&leftTicketDTO.to_station='+to_station+'&purpose_codes=ADULT'
23 |     print(url)
24 |     response = requests.get(url=url,headers=headers,verify=False)
25 |     result = response.json()
26 |     print(result['data']['result'])
27 |     print(TRAIN_DATE,FROM_STATION,TO_STATION)
28 |     print('车次 '+' 座位 '+' 有无票'+' 票数')
29 |     return result['data']['result']
30 | 
31 | n = 0
32 | '''
33 | 23 = 软卧
34 | 28 = 硬卧
35 | 3 = 车次
36 | 29=硬座
37 | '''
38 | 
39 | for i in queryTicket():
40 |     tmp_list = i.split('|')
41 |     #for ii in tmp_list:
42 |     #    print(n)
43 |     #     print(ii)
44 |      #    n += 1
45 |     set = tmp_list[29]
46 |     set1 = tmp_list[23]
47 |     if set == '' or set == '无':
48 |         print(tmp_list[3],'硬座 '+'无票',tmp_list[29])
49 | 
50 |     else:
51 |         print(tmp_list[3],'硬座 '+'有票',tmp_list[29])
52 |         #下单


--------------------------------------------------------------------------------
/12306火车票/__pycache__/cons.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/why19970628/Python_Crawler/23ba5cf5ad12d5d0f9f3d2376c0c0ea32fc3d2de/12306火车票/__pycache__/cons.cpython-36.pyc


--------------------------------------------------------------------------------
/12306火车票/__pycache__/cons.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/why19970628/Python_Crawler/23ba5cf5ad12d5d0f9f3d2376c0c0ea32fc3d2de/12306火车票/__pycache__/cons.cpython-37.pyc


--------------------------------------------------------------------------------
/12306火车票/__pycache__/settings.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/why19970628/Python_Crawler/23ba5cf5ad12d5d0f9f3d2376c0c0ea32fc3d2de/12306火车票/__pycache__/settings.cpython-36.pyc


--------------------------------------------------------------------------------
/12306火车票/__pycache__/settings.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/why19970628/Python_Crawler/23ba5cf5ad12d5d0f9f3d2376c0c0ea32fc3d2de/12306火车票/__pycache__/settings.cpython-37.pyc


--------------------------------------------------------------------------------
/12306火车票/captcha.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/why19970628/Python_Crawler/23ba5cf5ad12d5d0f9f3d2376c0c0ea32fc3d2de/12306火车票/captcha.jpg


--------------------------------------------------------------------------------
/12306火车票/login.py:
--------------------------------------------------------------------------------
 1 | ﻿# !/usr/bin/env python
 2 | # -*- coding:utf-8 -*-
 3 | # author:心蓝
 4 | import requests
 5 | 
 6 | """
 7 | 实现12306网站的登录
 8 | """
 9 | map = {
10 |     '1': '37,42',
11 |     '2': '111,42',
12 |     '3': '180,42',
13 |     '4': '254,42',
14 |     '5': '37,118',
15 |     '6': '111,118',
16 |     '7': '180,118',
17 |     '8': '254,118',
18 | }
19 | 
20 | 
21 | def get_point(indexs):
22 |     """
23 |     根据输入的序号获取相应的坐标
24 |     :param indexs: 1,2
25 |     :return:
26 |     """
27 |     indexs = indexs.split(',')
28 |     temp = []
29 |     for index in indexs:
30 |         temp.append(map[index])
31 |     return ','.join(temp)
32 | 
33 | 
34 | # cookie 保持 浏览器
35 | session = requests.Session()
36 | 
37 | # 伪装
38 | headers = {
39 |     'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36'
40 | }
41 | session.headers.update(headers)
42 | # 1.访问登录页面
43 | login_url = 'https://kyfw.12306.cn/otn/login/init'
44 | session.get(login_url)
45 | 
46 | # 2.下载验证码图片
47 | captcha_url = 'https://kyfw.12306.cn/passport/captcha/captcha-image?login_site=E&module=login&rand=sjrand&0.5846169880733507'
48 | captcha_response = session.get(captcha_url)
49 | 
50 | with open('captcha.jpg', 'wb') as f:
51 |     f.write(captcha_response.content)
52 | 
53 | # 3.校验验证码
54 | check_captcha_url = 'https://kyfw.12306.cn/passport/captcha/captcha-check'
55 | form_data = {
56 |     'answer': get_point(input('请输入正确的序号>>>:')),
57 |     'login_site': 'E',
58 |     'rand': 'sjrand'
59 | }
60 | check_response = session.post(check_captcha_url, data=form_data)
61 | #print(check_response.json())
62 | if check_response.json()['result_code'] == '4':  #'result_message': '验证码校验成功', 'result_code': '4'
63 |     # 校验成功
64 |     # 4.校验用户名和密码
65 |     login_url = 'https://kyfw.12306.cn/passport/web/login'
66 |     form_data = {
67 |         'username': '你的账号',
68 |         'password': '你的密码',
69 |         'appid': 'otn'
70 |     }
71 |     login_response = session.post(login_url, data=form_data)
72 |     print(login_response.json())
73 |     #'result_message': '登录成功', 'result_code': 0, 'uamtk': '0YeWhGwOquOICVxAQZz0NxXSX6a_0AJcOBG6zfDMNsolm1210'
74 |     if login_response.json()['result_code'] == 0:
75 | 
76 |         # 5.获取 权限 token
77 |         uamtk_url = 'https://kyfw.12306.cn/passport/web/auth/uamtk'
78 |         uamtk_response = session.post(uamtk_url, data={'appid': 'otn'})
79 |         #print(uamtk_response.json())
80 |         #'result_message': '验证通过', 'result_code': 0, 'apptk': None, 'newapptk': '-oTvBp0Sfb_LwV6irTcmGcf9jtyO5W_xykRJNL2t4Gk511210'
81 |         if uamtk_response.json()['result_code'] == 0:
82 |             # 6.校验token
83 |             check_token_url = 'https://kyfw.12306.cn/otn/uamauthclient'
84 |             check_token_response = session.post(check_token_url, data={'tk': uamtk_response.json()['newapptk']})
85 |             print(check_token_response.json())
86 | 
87 | 


--------------------------------------------------------------------------------
/12306火车票/settings.py:
--------------------------------------------------------------------------------
1 | # !/usr/bin/env python
2 | # -*- coding:utf-8 -*-
3 | __author__ = '强子'
4 | 
5 | TRAIN_DATE = '2019-06-11'
6 | FROM_STATION = '新乡'
7 | TO_STATION = '南阳'
8 | SET = 23


--------------------------------------------------------------------------------
/51_job/README.md:
--------------------------------------------------------------------------------
 1 | ﻿
 2 | - 内容： 爬取51job前程无忧简关于数据分析的职位信息，并对获取的数据进行数据清洗与分析，如各城市招聘岗位数、薪资与各城市工作地点数量，关系，学历，经验要求等关系、公司类型与对应岗位数、职位要求等可视化。
 3 | 
 4 | - 对应CSDN文章：《爬取51job前程无忧简历](https://blog.csdn.net/weixin_43746433/article/details/90490227)》
 5 | 
 6 | - 数据下载：链接：https://pan.baidu.com/s/1j-4HQduESyl2hm7-c3mTlg 
 7 | 提取码：rf0e 
 8 | 
 9 | 
10 | - 微信：why19970628
11 | 
12 | - 欢迎与我交流
13 | 


--------------------------------------------------------------------------------
/51_job/clean_data/job_company/大数据公司类型图饼图.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/why19970628/Python_Crawler/23ba5cf5ad12d5d0f9f3d2376c0c0ea32fc3d2de/51_job/clean_data/job_company/大数据公司类型图饼图.jpg


--------------------------------------------------------------------------------
/51_job/clean_data/job_pic.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | data=pd.read_csv('test_datasets_finally.csv',delimiter='#',header=0)
 3 | df=pd.DataFrame(data)
 4 | print(df.shape)
 5 | print(df.index)
 6 | print(df.loc[:,'area'].nunique())#地区数
 7 | area=df.loc[:,'area'].value_counts()
 8 | print(area.shape)
 9 | print(area.head())
10 | print(area.index)
11 | area2=area.values.tolist()
12 | area=area.reset_index()
13 | print(area.head())
14 | area1=area.loc[:,'index'].tolist()
15 | print('地区',area1)
16 | print('数量',area2)
17 | 
18 | from pyecharts import Bar
19 | from pyecharts import Geo
20 | from pyecharts import Map
21 | map = Map("大数据工作分布图", "data from 51job",title_color="#404a59", title_pos="center")
22 | map.add("", area1,area2 , maptype='china',is_visualmap=True,visual_text_color='#000',is_label_show=True)
23 | map.render("./job_pic/大数据工作城市分布.html")
24 | map.render(path='snapshot.png')
25 | #map.render(path='snapshot.pdf')
26 | 
27 | #effectScatter  heatmap
28 | geo = Geo("大数据工作分布热力图", "data from 51job", title_color="#fff", title_pos="center", width=1200, height=600, background_color='#404a59')
29 | geo.add("大数据工作分布热力图", area1, area2, visual_range=[0, 35], type='heatmap',visual_text_color="#fff", symbol_size=15, is_visualmap=True, is_roam=False)
30 | geo.render('./job_pic/大数据工作分布热力图.html')
31 | 
32 | geo = Geo("大数据工作分布城市评分", "data from 51job", title_color="#fff", title_pos="center", width=1200, height=600, background_color='#404a59')
33 | # type="effectScatter", is_random=True, effect_scale=5  使点具有发散性
34 | geo.add("空气质量评分", area1, area2,maptype='china', type="effectScatter", is_random=True, effect_scale=5, visual_range=[0, 5],visual_text_color="#fff", symbol_size=10, is_visualmap=True, is_roam=False)
35 | geo.render("./job_pic/大数据工作分布城市评分.html")
36 | 
37 | 
38 | #from pyecharts.charts import Geo
39 | #map = Map("全国地图示例" )
40 | #map.add("", area, maptype='china' ,visual_text_color="#fff",symbol_size=10, is_visualmap=True)
41 | #map.render("全国大数据工作城市.html")
42 | #map


--------------------------------------------------------------------------------
/51_job/clean_data/job_pic/examples.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/why19970628/Python_Crawler/23ba5cf5ad12d5d0f9f3d2376c0c0ea32fc3d2de/51_job/clean_data/job_pic/examples.jpg


--------------------------------------------------------------------------------
/51_job/clean_data/wordscloud.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | import jieba, re
 3 | from scipy.misc import imread
 4 | from wordcloud import WordCloud, ImageColorGenerator, STOPWORDS
 5 | import matplotlib.pyplot as plt
 6 | data = pd.read_csv('test_datasets_finally.csv',delimiter='#')  # 读取Excel转为dabaframe
 7 | df = pd.DataFrame(data)
 8 | print('去掉空值前有{}行'.format(df.shape[0]))  # 获得一共有多少行
 9 | file1 = df.loc[:,'describe'].dropna(how='any')  # 去掉空值
10 | print('去掉空值后有{}行'.format(file1.shape[0]))  # 获得一共有多少行
11 | print(file1.head())
12 | text1 = ''.join(i for i in file1)  # 把所有字符串连接成一个长文本
13 | responsibility = re.sub(re.compile('，|；|\.|、|。'), '', text1)  # 去掉逗号等符号
14 | wordlist1 = " ".join(jieba.cut(responsibility, cut_all=True))  # 分析岗位职责
15 | # wordlist1=" ".join(jieba.cut(requirement,cut_all=True))#分析岗位要求
16 | font_path = r'C:\Windows\Fonts\simkai.ttf'
17 | stopwords = list(STOPWORDS) + ['数据', '分析', '负责', '相关', '公司', '进行', '工作','岗位',
18 | '岗位职责','上学','互联网','以上','以上学历','任职','要求'] +\
19 |  ['数据分析','以上学历','优先','计算','经验','学历','上学','熟练','使用']#分析岗位要求
20 | #bgimg=imread(r'1.png')#设置背景图片
21 | wc = WordCloud(font_path=font_path,  # 设置字体
22 |                background_color="black",  # 背景颜色
23 |                max_words=1000,  # 词云显示的最大词数
24 |                stopwords=stopwords,  # 设置停用词
25 |                max_font_size=300,  # 字体最大值
26 |                #mask=bgimg,  # 设置背景图片
27 |                random_state=42,  # 设置有多少种随机生成状态，即有多少种配色
28 |                width=1200, height=860,
29 |                margin=4,  # 设置图片默认的大小,margin为词语边缘距离
30 |                ).generate(str(wordlist1))
31 | #image_colors = ImageColorGenerator(bgimg)  # 根据图片生成词云颜色
32 | plt.imshow(wc)
33 | plt.axis("off")
34 | plt.savefig("./job_pic/examples1.jpg")  # 必须在plt.show之前，不是图片空白
35 | plt.show()
36 | 


--------------------------------------------------------------------------------
/Analysis_Wechat_Friends/Analysis_Wechat.py:
--------------------------------------------------------------------------------
  1 | import itchat
  2 | import pandas as pd
  3 | from pyecharts import Pie, Map,  Page, Bar
  4 | 
  5 | 
  6 | # 根据key值得到对应的信息
  7 | def get_key_info(friends_info, key):
  8 |     return list(map(lambda friend_info: friend_info.get(key), friends_info))
  9 | 
 10 | 
 11 | # 获得所需的微信好友信息
 12 | def get_friends_info():
 13 |     itchat.auto_login(hotReload=True)
 14 |     friends = itchat.get_friends()
 15 |     print('~~~~~~~~~~~~~~~~~~~~~~~~~')
 16 |     friends_info = dict(
 17 |         # 省份
 18 |         province = get_key_info(friends, "Province"),
 19 |         # 城市
 20 |         city = get_key_info(friends, "City"),
 21 |         # 昵称
 22 |         nickname = get_key_info(friends, "Nickname"),
 23 |         # 性别
 24 |         sex = get_key_info(friends, "Sex"),
 25 |         # 签名
 26 |         signature = get_key_info(friends, "Signature"),
 27 |         # 备注
 28 |         remarkname = get_key_info(friends, "RemarkName"),
 29 |         # 用户名拼音全拼
 30 |         pyquanpin = get_key_info(friends, "PYQuanPin")
 31 |     )
 32 |     return friends_info
 33 | 
 34 | 
 35 | # 性别分析
 36 | def analysisSex():
 37 |     friends_info = get_friends_info()
 38 |     df = pd.DataFrame(friends_info)
 39 |     print(df)
 40 |     sex_count = df.groupby(['sex'], as_index=True)['sex'].count()
 41 |     print(sex_count)
 42 |     temp = dict(zip(list(sex_count.index), list(sex_count)))
 43 |     print(temp)
 44 |     data = {}
 45 |     data['保密'] = temp.pop(0)
 46 |     data['男'] = temp.pop(1)
 47 |     data['女'] = temp.pop(2)
 48 |     # 画图
 49 |     page = Page()
 50 |     attr, value = data.keys(), data.values()
 51 |     chart = Pie('微信好友性别比')
 52 |     chart.add('', attr, value, center=[50, 50],
 53 |               redius=[30, 70], is_label_show=True, legend_orient='horizontal', legend_pos='center',
 54 |               legend_top='bottom', is_area_show=True)
 55 |     page.add(chart)
 56 |     page.render('analysisSex.html')
 57 | 
 58 | 
 59 | # 省份分析
 60 | def analysisProvince():
 61 |     friends_info = get_friends_info()
 62 |     df = pd.DataFrame(friends_info)
 63 |     province_count = df.groupby('province', as_index=True)['province'].count().sort_values()
 64 |     temp = list(map(lambda x: x if x != '' else '未知', list(province_count.index)))
 65 |     # 画图
 66 |     page = Page()
 67 |     # style = Style(width=1100, height=600)
 68 |     # style_middle = Style(width=900, height=500)
 69 |     attr, value = temp, list(province_count)
 70 |     chart1 = Map('好友分布(中国地图)')#, **style.init_style
 71 |     chart1.add('', attr, value, is_label_show=True, is_visualmap=True, visual_text_color='#000')
 72 |     page.add(chart1)
 73 |     chart2 = Bar('好友分布柱状图')#, **style_middle.init_style
 74 |     chart2.add('', attr, value, is_stack=True, is_convert=True,
 75 |                label_pos='inside', is_legend_show=True, is_label_show=True)
 76 |     page.add(chart2)
 77 |     page.render('analysisProvince.html')
 78 |     
 79 | 
 80 | # 具体省份分析
 81 | def analysisCity(province):
 82 |     friends_info = get_friends_info()
 83 |     df = pd.DataFrame(friends_info)
 84 |     temp1 = df.query('province == "%s"' % province)
 85 |     city_count = temp1.groupby('city', as_index=True)['city'].count().sort_values()
 86 |     attr = list(map(lambda x: '%s市' % x if x != '' else '未知', list(city_count.index)))
 87 |     value = list(city_count)
 88 |     # 画图
 89 |     page = Page()
 90 |     # style = Style(width=1100, height=600)
 91 |     # style_middle = Style(width=900, height=500)
 92 |     chart1 = Map('%s好友分布' % province)#, **style.init_style
 93 |     chart1.add('', attr, value, maptype='%s' % province, is_label_show=True,
 94 |                is_visualmap=True, visual_text_color='#000')
 95 |     page.add(chart1)
 96 |     chart2 = Bar('%s好友分布柱状图' % province)#, **style_middle.init_style
 97 |     chart2.add('', attr, value, is_stack=True, is_convert=True, label_pos='inside', is_label_show=True)
 98 |     page.add(chart2)
 99 |     page.render('analysisCity.html')
100 | 
101 | if __name__ == '__main__':
102 |     analysisSex()
103 |     analysisProvince()
104 |     analysisCity("河南")
105 | 
106 | 


--------------------------------------------------------------------------------
/Baidu_Address/README.md:
--------------------------------------------------------------------------------
 1 | ﻿爬虫小程序
 2 | 
 3 | ![Image text](https://github.com/why19970628/Python_Crawler/tree/master/Baidu_Address/image/smaple.PNG)
 4 | 
 5 | 
 6 | 爬取百度地图的某位置的公司信息，包括公司名称，公司地址等
 7 | 
 8 | csv文件大约几十条数据
 9 | 
10 | 博客地址：https://blog.csdn.net/weixin_43746433
11 | 
12 | 微信：why19970628
13 | 
14 | 欢迎与我交流
15 | 


--------------------------------------------------------------------------------
/Baidu_Address/baidu_address.py:
--------------------------------------------------------------------------------
 1 | from selenium import webdriver
 2 | from selenium.webdriver.support.ui import WebDriverWait
 3 | import pandas as pd
 4 | import time
 5 | from time import sleep
 6 | import csv
 7 | chrome_driver = r"D:\ProgramData\Anaconda3\Lib\site-packages\selenium\webdriver\chrome\chromedriver.exe"
 8 | browser = webdriver.Chrome(executable_path=chrome_driver)
 9 | wait=WebDriverWait(browser,3)
10 | browser.get('https://map.baidu.com/search/%E6%96%B0%E4%B9%A1%E4%BA%92%E8%81%94%E7%BD%91%E5%A4%A7%E5%8E%A6%E9%83%BD%E6%9C%89%E5%93%AA%E4%BA%9B%E5%85%AC%E5%8F%B8/@12683385.160376176,4180157.68,19z?querytype=s&da_src=shareurl&wd=%E6%96%B0%E4%B9%A1%E4%BA%92%E8%81%94%E7%BD%91%E5%A4%A7%E5%8E%A6%E9%83%BD%E6%9C%89%E5%93%AA%E4%BA%9B%E5%85%AC%E5%8F%B8&c=152&src=0&pn=0&sug=0&l=19&b=(12682905.160376176,4179893.43;12683865.160376176,4180421.93)&from=webmap&biz_forward=%7B%22scaler%22:1,%22styles%22:%22pl%22%7D&device_ratio=1')
11 | sleep(3)
12 | 
13 | def search(writer):
14 |     for i in range(10):
15 | 
16 |         company_names    =   browser.find_elements_by_xpath('//div[@class="ml_30 mr_90"]/div[@class="row"]/span/a')
17 |         print(len(company_names))
18 | 
19 |         company_addresses =   browser.find_elements_by_xpath('//div[@class="ml_30 mr_90"]/div[@class="row addr"]/span')
20 |         print(len(company_addresses))
21 | 
22 | 
23 |         # ipone_lists=[]
24 |         # try:
25 |         #     ipones=browser.find_elements_by_xpath('//div[@class="ml_30 mr_90"]/div[@class="row tel"]')#电话
26 |         #     for i in ipones:
27 |         #         ipone_lists.append(ipones[i])
28 |         # except:
29 |         #     ipone_lists.append('无')
30 |         # if browser.find_elements_by_xpath('//div[@class="row tel"]'):
31 |         #     company_iphones r= browser.find_elements_by_xpath('//div[@class="ml_30 mr_90"]/div[@class="row tel"]')
32 |         #     for i in range(len(company_iphones)):
33 |         #         ipone_lists.append(company_iphones[i].text)
34 |         # ipone_lists.append('无')
35 |         # print(ipone_lists)
36 |         # print(len(ipone_lists))
37 | 
38 |         for i in range(len(company_names)):
39 |             company_name = company_names[i].text
40 | 
41 |             company_address = company_addresses[i].text
42 | 
43 |             print(company_name, company_address)
44 |             # ipone_list=ipone_lists[i]
45 | 
46 |             writer.writerow([company_name, company_address])
47 | 
48 |         browser.find_element_by_xpath('//div[@id="poi_page"]/p/span/a[@tid="toNextPage"]').click()
49 |         sleep(5)
50 | 
51 | 
52 | def main():
53 |     fp = open('company.csv', 'w', newline='', encoding="utf_8_sig")
54 |     writer = csv.writer(fp)
55 |     writer.writerow(['公司名称', '地址', '电话'])
56 |     search(writer)
57 |     print('Over ！！！！')
58 | 
59 | 
60 | 
61 | 
62 | 
63 | 
64 | 
65 | if __name__ == '__main__':
66 |     main()
67 | 


--------------------------------------------------------------------------------
/Baidu_Address/company.csv:
--------------------------------------------------------------------------------
 1 | ﻿公司名称,地址,电话
 2 | 互联网大厦,河南省新乡市红旗区新中大道(中)
 3 | 新乡市嘟嘟网络技术有限公司,洪门镇金穗大道新中大道交叉口互联网大厦8楼
 4 | 嘉亿国际新闻大厦,新乡市红旗区洪门镇红旗区互联网大厦西
 5 | 叁河鼎盛公司,洪门镇金穗大道与新中大道交叉口互联网大厦六楼607
 6 | 饿了么公司,洪门镇金穗大道(中)互联网大厦30层
 7 | 新乡搜狗运营中心,河南省新乡市红旗区金穗大道互联网大厦6楼
 8 | 新乡天驰网络科技有限公司,河南省新乡市红旗区金穗大道互联网大厦702室
 9 | 万达物流公司,新中大道互联网大厦22楼
10 | 大胜商贸有限公司,互联网大厦商业步行街131号
11 | 万达仓储公司,洪门镇金穗大道互联网大厦2202
12 | 银谷普惠公司,新乡市红旗区开发区街道互联网大厦1801
13 | 平安公司,推测位置
14 | 互联网大厦-南1门,河南省新乡市红旗区洪门镇互联网大厦西嘉亿国际新闻大厦平安公司
15 | 新乡市动之力广告有限公司,推测位置
16 | 新利净化技术有限公司,新中大道与金穗大道交叉口西100米
17 | 新乡市百特智能转运设备有限公司,金穗大道东嘉亿互联网大厦1804室
18 | 新乡市中誉鼎力软件科技有限公司,新乡市红旗区互联网大厦3601室
19 | 庭雅商贸公司,新乡市互联网大厦701
20 | 新乡市彩纶纸业有限公司,洪门镇金穗大道与新中大道互联网大厦31楼3110室
21 | 指南者网络科技有限公司,推测位置
22 | 新乡搜狗运营中心,河南省新乡市红旗区金穗大道互联网大厦6楼
23 | 嘉德建筑工程有限公司,新乡市红旗区新中大道和金穗大道交叉口新乡市互联网大厦1103室
24 | 博洋翻译,推测位置
25 | 河南省乐境通电子商务有限公司,洪门镇金穗大道东嘉亿互联网大厦509
26 | 丰时商贸有限公司,新乡市红旗区金穗大道互联网大厦1910室
27 | 酷雷曼河南省运营中心,河南省新乡市红旗区金穗大道互联网大厦
28 | 河南一棵树电子商务有限公司,新乡市金穗大道与新中大道交叉口西北角嘉亿互联网大厦1601室
29 | 郑州一如既往软件科技有限公司,新中大道与金穗大道西北角互联网大厦
30 | 互联网大厦-地下停车场,河南省新乡市红旗区金穗大道东互联网大厦1505
31 | 新乡十里红妆婚庆公司,新乡市红旗区金穗大道与新中大道交叉口西北角互联网大厦1层
32 | 仁真装饰,新中大道与金穗大道交叉口互联网大厦2002
33 | 捷润科技,推测位置
34 | 互联网大厦-B座,洪门镇宝龙广场对面互联网大厦10层1011室
35 | 互联网大厦-北门,河南省新乡市红旗区金穗大道与新中大道交汇处
36 | 平安普惠投资咨询有限公司新乡新飞大道分公司,新乡市红旗区新中大道(中)附近
37 | 新乡市深鹏装饰工程有限公司,金穗大道嘉亿东方明珠32楼
38 | 新乡市惠民天下畜牧设备有限公司,新乡市红旗区嘉亿国际新闻大厦25层
39 | 互联网大厦停车场-出入口,河南省新乡市红旗区嘉亿街
40 | 互联网大厦停车场-出口,河南省新乡市红旗区金穗大道东
41 | 互联网大厦-西门,金穗大道(东)北100米
42 | 互联网大厦停车场-入口,河南省新乡市红旗区新中大道(中)辅路
43 | 新乡市福运到家互联网科技股份有限公司,平原路蓝钻国际
44 | 拉扎斯网络科技(上海)有限公司新乡分公司,洪门镇金穗大道与新中大道路口嘉亿东方明珠30层
45 | 汇益互联网服务有限公司,河南省新乡市红旗区金穗大道东辅路
46 | 广州金不换财务咨询有限公司新乡分公司,河南省新乡市红旗区嘉亿新闻大厦1905室
47 | 恒大人寿(新乡中心支公司),河南省新乡市红旗区金穗大道新闻大厦22楼
48 | 新乡市园林绿化工程有限公司,新乡市金穗大道与新中大道交叉口嘉亿新闻大厦27层
49 | 互联网大厦-南2门,金穗大道(东)北50米
50 | 河南恒东商贸有限公司,河南省新乡市红旗区金穗大道新闻大厦2611、2612室
51 | 中国建设银行24小时自助银行(金穂大道支行),新乡市红旗区金穗大道与新中大道交叉口西北角互联网大厦1层附近
52 | 北京高幂数据科技有限公司河南分公司,新乡市红旗区嘉亿新闻大厦2601
53 | 河南金友互联网技术有限公司,淘宝城7楼
54 | 康宝莱公司,洪门镇新中大道与金穗大道交叉口嘉亿东方明珠16楼
55 | 河南纳澜电器有限公司,河南省新乡市红旗区嘉亿国际新闻大厦河南纳澜电器有限公司
56 | 车邦(深圳)互联网金融服务有限公司,河南省新乡市红旗区牧野大道南段电子信息科技园
57 | 积木家互联网装修(新乡体验中心),新二街与金穗大道(东)交叉口东北100米靖业摩尔151附近
58 | 河南御乐坊酒店有限公司,河南省新乡市红旗区新中大道海利川蜀香火锅东北100米
59 | EOTO视觉摄影工作室,新乡市红旗区金穗大道东互联网大厦2810
60 | 中煤润邦机械装备股份有限公司,新乡市新中大道与金穗大道西北角嘉亿东方明珠13层1303房
61 | 河南省帕菲特搬运设备有限公司,河南省新乡市金穗大道与新中大道交叉口西北角嘉亿东方明珠701室
62 | 幸福之家互联网家装资源平台,河南省新乡市红旗区向阳路296号
63 | 爱美得美发工作室,金穗大道与新中大道西北角互联网大厦商业裙房104室
64 | 嘉亿东方明珠中国民生银行,河南省新乡市红旗区互联网大厦嘉亿东方明珠中国民生银行
65 | 恒富电子,河南省新乡市红旗区新中大道与新飞大道交叉口嘉亿新闻大厦恒富电子
66 | 黄金时代游泳健身俱乐部,推测位置
67 | 八马茶业(互联网大厦北),金穗大道东互联网大厦负一楼
68 | 塞纳春天互联网家装(黄岗分店),河南省新乡市红旗区嘉亿·明珠商业街130
69 | 四季康道养生会馆,河南省新乡市牧野区西华大道佳煌不锈钢西南侧10米
70 | 雅藏文化,新乡市红旗区洪门镇互联网大厦17楼1705室
71 | 丰泽堂老银铺,推测位置
72 | 独角兽花店,河南省新乡市红旗区互联网大厦裙房红咖啡一楼
73 | 3Q便利店(嘉亿店),新中大道嘉亿互联网大厦北红咖啡一楼
74 | 街电(青禾馅饼嘉亿店),新中大道与金穗大道交叉口嘉亿互联网大厦1308室
75 | 森怡康新乡运营中心,推测位置
76 | 东方明珠-停车场,新乡市红旗区金穗大道和新中大道交叉口西北角互联网大厦商业街134号
77 | 丁丁贷,河南省新乡市新中大道与金穗大道交叉口互联网大厦15楼
78 | 善林金融,新乡市红旗区金穗大道与新中大道交叉口西北角互联网大厦1层附近
79 | 东方明珠停车场-出口,金穗大道与新中大道交叉口西北角互联网大厦1层附近
80 | 嘉亿·东方明珠-地下停车场,金穗大道与新中大道交叉口西北角互联网大厦1层
81 | 


--------------------------------------------------------------------------------
/Baidu_Address/image/smaple.PNG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/why19970628/Python_Crawler/23ba5cf5ad12d5d0f9f3d2376c0c0ea32fc3d2de/Baidu_Address/image/smaple.PNG


--------------------------------------------------------------------------------
/Baidu_Music/baidu_music.py:
--------------------------------------------------------------------------------
 1 | import requests
 2 | import re,json,pprint,os
 3 | #url='http://zhangmenshiting.qianqian.com/data2/music/a612909cdafecf20933bd2942c43421c/596603939/596603939.mp3?xcode=10263e95dfecc6e6f4316fffb8ff8771'
 4 | def download_music(songid):
 5 |     url='http://musicapi.taihe.com/v1/restserver/ting?method=baidu.ting.song.playAAC&format=jsonp&callback=jQuery17208091693203165158_1545207385401&songid='+songid+'&from=web&_=1545207388641'
 6 |     url2='href="http://music.163.com/song/media/outer/url?id=317151.mp3'
 7 |     response=requests.get(url)
 8 |     data=json.loads(re.findall("{.*}",response.text)[0])
 9 |     music_name=data['songinfo']['title']
10 |     artist=data['songinfo']['artist']
11 |     music_url=data['bitrate']['file_link']
12 |     #pprint.pprint(data)
13 |     return music_name,music_url,artist
14 | #music_name,music_url=download_music('265715650')
15 | #print(music_name,music_url,)
16 | def get_songid(artist_id):
17 |     for i in range(0, 41, 20):
18 |         reponse=requests.get(url="http://music.taihe.com/artist/"+artist_id)
19 |         #print(reponse.text)#ctrl+u 查看网页源代码
20 |         songids=re.findall('{&quot;id&quot;:&quot;(.*)&quot;,&quot;kr_top&quot;',reponse.text)
21 |         return  songids
22 | def save_music(music_name,music_url,artist):
23 |     music_res = requests.get(music_url)
24 |     try:
25 |         folder = os.path.exists(artist)
26 | 
27 |         if not folder:  # 判断是否存在文件夹如果不存在则创建为文件夹
28 |             os.makedirs(artist)  # makedirs 创建文件时如果路径不存在会创建这个路径
29 |             print
30 |             "---  new folder...  ---"
31 |             print
32 |             "---  OK  ---"
33 | 
34 |         else:
35 |             print
36 |             "---  There is this folder!  ---"
37 | 
38 |         file = "D:/软件（学习）/Python/TanZhou/百度音乐/" + artist + '/' + music_name + ".mp3"
39 |         with open(file,'wb') as f:
40 |             f.write(music_res.content)
41 |     except:
42 |         print('下载失败')
43 | def run():
44 |     artist_id=input('请输入网易歌手ID:')
45 |     singids=get_songid(artist_id)
46 |     for songid in singids:
47 |         music_name, music_url,artist=download_music(songid)
48 |         save_music(music_name, music_url,artist)
49 |         print(music_name + "  下载完成")
50 | run()
51 | 
52 | 
53 | 
54 | 


--------------------------------------------------------------------------------
/Baidu_Music/baidu_music2.py:
--------------------------------------------------------------------------------
 1 | import requests
 2 | import re,json,pprint,os
 3 | from urllib import request
 4 | import urllib
 5 | from lxml import etree
 6 | header = {
 7 |     'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36'}
 8 | #url='http://zhangmenshiting.qianqian.com/data2/music/a612909cdafecf20933bd2942c43421c/596603939/596603939.mp3?xcode=10263e95dfecc6e6f4316fffb8ff8771'
 9 | def download_music(songid):
10 |     url='http://musicapi.taihe.com/v1/restserver/ting?method=baidu.ting.song.playAAC&format=jsonp&callback=jQuery17208091693203165158_1545207385401&songid='+songid+'&from=web&_=1545207388641'
11 |     url2='href="http://music.163.com/song/media/outer/url?id=317151.mp3'
12 |     response=requests.get(url)
13 |     data=json.loads(re.findall("{.*}",response.text)[0])
14 |     music_name=data['songinfo']['title']
15 |     artist=data['songinfo']['artist']
16 |     music_url=data['bitrate']['file_link']
17 |     #pprint.pprint(data)
18 |     return music_name,music_url,artist
19 | #music_name,music_url=download_music('265715650')
20 | #print(music_name,music_url,)
21 | 
22 | def get_songid(artist_id):
23 |     song_id = urllib.request.quote(artist_id)
24 |     songid=[]
25 |     for i in range(0, 21, 20):
26 |         url = "http://music.taihe.com/search/song?s=1&key="+song_id+"&jump=0&start="+str(i)+"&size=20&third_type=0"
27 |         print(url)
28 |         req = request.Request(url,headers=header)
29 |         html = request.urlopen(req).read().decode('utf-8')
30 |         #songids=re.findall('data-playdata="(.*)"moduleName"',html)
31 |         songids=re.findall('&quot;sid&quot;:(.*),&quot;author&quot;:',html)
32 |         #print(songids)
33 |         html = etree.HTML(html)
34 |         songid=songid+songids
35 |     song_num = html.xpath('//ul[@class="tab-list"]/li/a[@class="list"]/text()')[0]
36 |     #print(song_num)
37 |     #print(songid)
38 |     return songid,song_num
39 | #get_songid('薛之谦')
40 | def save_music(music_name,music_url,artist):
41 |     music_res = requests.get(music_url)
42 |     try:
43 |         folder = os.path.exists(artist)
44 | 
45 |         if not folder:  # 判断是否存在文件夹如果不存在则创建为文件夹
46 |             os.makedirs(artist)  # makedirs 创建文件时如果路径不存在会创建这个路径
47 |             print
48 |             "---  new folder...  ---"
49 |             print
50 |             "---  OK  ---"
51 | 
52 |         else:
53 |             print
54 |             "---  There is this folder!  ---"
55 | 
56 |         file = "D:/软件（学习）/Python/TanZhou/百度音乐/" + artist + '/' + music_name + ".mp3"
57 |         with open(file,'wb') as f:
58 |             f.write(music_res.content)
59 |     except:
60 |         print('下载失败')
61 | def run():
62 |     artist_id=input('请输入网易歌手名字:')
63 |     singids=get_songid(artist_id)[0]
64 |     #print(singids)
65 |     songmun=get_songid(artist_id)[1]
66 |     print(songmun)
67 |     for songid in singids:
68 |         music_name, music_url,artist=download_music(songid)
69 |         save_music(music_name, music_url,artist)
70 |         print(music_name + "  下载完成")
71 | run()
72 | 
73 | 
74 | 
75 | 


--------------------------------------------------------------------------------
/Baidu_Music/wangyi_music.py:
--------------------------------------------------------------------------------
 1 | # import urllib
 2 | from urllib import request
 3 | # import requests
 4 | # url2='http://music.163.com/song/media/outer/url?id=423776423.mp3'
 5 | # print(url2)
 6 | # urllib.request.urlretrieve(url2,'3.mp3')
 7 | # music_res = requests.get(url2)
 8 | # with open('4.mp3','wb') as f:
 9 |     # f.write(music_res.content)
10 | # print('成功')
11 | 
12 | import requests
13 | from bs4 import BeautifulSoup
14 | import urllib.request
15 | import os
16 | import re
17 | 
18 | headers = {
19 |     'Referer': 'http://music.163.com/',
20 |     'Host': 'music.163.com',
21 |     'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.75 Safari/537.36',
22 |     'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
23 | }
24 | 
25 | # 歌单的url地址
26 | play_url = 'http://music.163.com/playlist?id=2182968685'
27 | 
28 | # 获取页面内容
29 | s = requests.session()
30 | response = s.get(play_url, headers=headers).content
31 | 
32 | # 使用bs4匹配出对应的歌曲名称和地址
33 | s = BeautifulSoup(response, 'lxml')
34 | main = s.find('ul', {'class': 'f-hide'})
35 | pat='data-rid="(.*?)"'
36 | singerid=re.compile(pat).findall(str(s))
37 | id=singerid[0]
38 | print(singerid[0])
39 | 
40 | lists = []
41 | for music in main.find_all('a'):
42 |     list = []
43 |     # print('{} : {}'.format(music.text, music['href']))
44 |     musicUrl = 'http://music.163.com/song/media/outer/url' + music['href'][5:] + '.mp3'
45 |     musicName = music.text
46 |     # 单首歌曲的名字和地址放在list列表中
47 |     list.append(musicName)
48 |     list.append(musicUrl)
49 |     # 全部歌曲信息放在lists列表中
50 |     lists.append(list)
51 | 
52 | print(lists)
53 | 
54 | # 下载列表中的全部歌曲，并以歌曲名命名下载后的文件，文件位置为当前文件夹
55 | for i in lists:
56 |     url = i[1]
57 |     name = i[0]
58 |     try:
59 |         folder = os.path.exists(id)
60 | 
61 |         if not folder:  # 判断是否存在文件夹如果不存在则创建为文件夹
62 |             os.makedirs(id)  # makedirs 创建文件时如果路径不存在会创建这个路径
63 |             print
64 |             "---  new folder...  ---"
65 |             print
66 |             "---  OK  ---"
67 | 
68 |         else:
69 |             print
70 |             "---  There is this folder!  ---"
71 |         print('正在下载', name)
72 |         file="D:/软件（学习）/Python/TanZhou/百度音乐/"+id+'/'+name+".mp3"
73 |         urllib.request.urlretrieve(url, file)
74 |         print('下载成功')
75 |     except:
76 |         print('下载失败')


--------------------------------------------------------------------------------
/ChuanZhi_Class/result/ts.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/why19970628/Python_Crawler/23ba5cf5ad12d5d0f9f3d2376c0c0ea32fc3d2de/ChuanZhi_Class/result/ts.txt


--------------------------------------------------------------------------------
/ChuanZhi_Class/scrapy.cfg:
--------------------------------------------------------------------------------
 1 | # Automatically created by: scrapy startproject
 2 | #
 3 | # For more information about the [deploy] section see:
 4 | # https://scrapyd.readthedocs.io/en/latest/deploy.html
 5 | 
 6 | [settings]
 7 | default = ts.settings
 8 | 
 9 | [deploy]
10 | #url = http://localhost:6800/
11 | project = ts
12 | 


--------------------------------------------------------------------------------
/ChuanZhi_Class/ts/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/why19970628/Python_Crawler/23ba5cf5ad12d5d0f9f3d2376c0c0ea32fc3d2de/ChuanZhi_Class/ts/__init__.py


--------------------------------------------------------------------------------
/ChuanZhi_Class/ts/__pycache__/__init__.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/why19970628/Python_Crawler/23ba5cf5ad12d5d0f9f3d2376c0c0ea32fc3d2de/ChuanZhi_Class/ts/__pycache__/__init__.cpython-37.pyc


--------------------------------------------------------------------------------
/ChuanZhi_Class/ts/__pycache__/items.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/why19970628/Python_Crawler/23ba5cf5ad12d5d0f9f3d2376c0c0ea32fc3d2de/ChuanZhi_Class/ts/__pycache__/items.cpython-37.pyc


--------------------------------------------------------------------------------
/ChuanZhi_Class/ts/__pycache__/pipelines.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/why19970628/Python_Crawler/23ba5cf5ad12d5d0f9f3d2376c0c0ea32fc3d2de/ChuanZhi_Class/ts/__pycache__/pipelines.cpython-37.pyc


--------------------------------------------------------------------------------
/ChuanZhi_Class/ts/__pycache__/settings.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/why19970628/Python_Crawler/23ba5cf5ad12d5d0f9f3d2376c0c0ea32fc3d2de/ChuanZhi_Class/ts/__pycache__/settings.cpython-37.pyc


--------------------------------------------------------------------------------
/ChuanZhi_Class/ts/items.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | # Define here the models for your scraped items
 4 | #
 5 | # See documentation in:
 6 | # https://doc.scrapy.org/en/latest/topics/items.html
 7 | 
 8 | import scrapy
 9 | 
10 | 
11 | class TsItem(scrapy.Item):
12 |     # define the fields for your item here like:
13 |     # name = scrapy.Field()
14 |     title=scrapy.Field()
15 |     link=scrapy.Field()
16 |     stu=scrapy.Field()
17 | 


--------------------------------------------------------------------------------
/ChuanZhi_Class/ts/middlewares.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | 
  3 | # Define here the models for your spider middleware
  4 | #
  5 | # See documentation in:
  6 | # https://doc.scrapy.org/en/latest/topics/spider-middleware.html
  7 | 
  8 | from scrapy import signals
  9 | 
 10 | 
 11 | class TsSpiderMiddleware(object):
 12 |     # Not all methods need to be defined. If a method is not defined,
 13 |     # scrapy acts as if the spider middleware does not modify the
 14 |     # passed objects.
 15 | 
 16 |     @classmethod
 17 |     def from_crawler(cls, crawler):
 18 |         # This method is used by Scrapy to create your spiders.
 19 |         s = cls()
 20 |         crawler.signals.connect(s.spider_opened, signal=signals.spider_opened)
 21 |         return s
 22 | 
 23 |     def process_spider_input(self, response, spider):
 24 |         # Called for each response that goes through the spider
 25 |         # middleware and into the spider.
 26 | 
 27 |         # Should return None or raise an exception.
 28 |         return None
 29 | 
 30 |     def process_spider_output(self, response, result, spider):
 31 |         # Called with the results returned from the Spider, after
 32 |         # it has processed the response.
 33 | 
 34 |         # Must return an iterable of Request, dict or Item objects.
 35 |         for i in result:
 36 |             yield i
 37 | 
 38 |     def process_spider_exception(self, response, exception, spider):
 39 |         # Called when a spider or process_spider_input() method
 40 |         # (from other spider middleware) raises an exception.
 41 | 
 42 |         # Should return either None or an iterable of Response, dict
 43 |         # or Item objects.
 44 |         pass
 45 | 
 46 |     def process_start_requests(self, start_requests, spider):
 47 |         # Called with the start requests of the spider, and works
 48 |         # similarly to the process_spider_output() method, except
 49 |         # that it doesn’t have a response associated.
 50 | 
 51 |         # Must return only requests (not items).
 52 |         for r in start_requests:
 53 |             yield r
 54 | 
 55 |     def spider_opened(self, spider):
 56 |         spider.logger.info('Spider opened: %s' % spider.name)
 57 | 
 58 | 
 59 | class TsDownloaderMiddleware(object):
 60 |     # Not all methods need to be defined. If a method is not defined,
 61 |     # scrapy acts as if the downloader middleware does not modify the
 62 |     # passed objects.
 63 | 
 64 |     @classmethod
 65 |     def from_crawler(cls, crawler):
 66 |         # This method is used by Scrapy to create your spiders.
 67 |         s = cls()
 68 |         crawler.signals.connect(s.spider_opened, signal=signals.spider_opened)
 69 |         return s
 70 | 
 71 |     def process_request(self, request, spider):
 72 |         # Called for each request that goes through the downloader
 73 |         # middleware.
 74 | 
 75 |         # Must either:
 76 |         # - return None: continue processing this request
 77 |         # - or return a Response object
 78 |         # - or return a Request object
 79 |         # - or raise IgnoreRequest: process_exception() methods of
 80 |         #   installed downloader middleware will be called
 81 |         return None
 82 | 
 83 |     def process_response(self, request, response, spider):
 84 |         # Called with the response returned from the downloader.
 85 | 
 86 |         # Must either;
 87 |         # - return a Response object
 88 |         # - return a Request object
 89 |         # - or raise IgnoreRequest
 90 |         return response
 91 | 
 92 |     def process_exception(self, request, exception, spider):
 93 |         # Called when a download handler or a process_request()
 94 |         # (from other downloader middleware) raises an exception.
 95 | 
 96 |         # Must either:
 97 |         # - return None: continue processing this exception
 98 |         # - return a Response object: stops process_exception() chain
 99 |         # - return a Request object: stops process_exception() chain
100 |         pass
101 | 
102 |     def spider_opened(self, spider):
103 |         spider.logger.info('Spider opened: %s' % spider.name)
104 | 


--------------------------------------------------------------------------------
/ChuanZhi_Class/ts/pipelines.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | # Define your item pipelines here
 4 | #
 5 | # Don't forget to add your pipeline to the ITEM_PIPELINES setting
 6 | # See: https://doc.scrapy.org/en/latest/topics/item-pipeline.html
 7 | 
 8 | 
 9 | class TsPipeline(object):
10 |     def __init__(self):
11 |         self.fh=open("../result/ts.txt","a")
12 | 
13 |     def process_item(self, item, spider):
14 |         print(item['title'])
15 |         print(item['link'])
16 |         print(item['stu'])
17 |         print('~~~~~~')
18 |         self.fh.write(item['title'][0]+"\n"+item['link'][0]+"\n"+item['stu'][0]+"\n"+"~~~~~~~"+"\n")
19 |         return item
20 | 
21 |     def close_spider(self):
22 |         self.fh.close()


--------------------------------------------------------------------------------
/ChuanZhi_Class/ts/settings.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | # Scrapy settings for ts project
 4 | #
 5 | # For simplicity, this file contains only settings considered important or
 6 | # commonly used. You can find more settings consulting the documentation:
 7 | #
 8 | #     https://doc.scrapy.org/en/latest/topics/settings.html
 9 | #     https://doc.scrapy.org/en/latest/topics/downloader-middleware.html
10 | #     https://doc.scrapy.org/en/latest/topics/spider-middleware.html
11 | 
12 | BOT_NAME = 'ts'
13 | 
14 | SPIDER_MODULES = ['ts.spiders']
15 | NEWSPIDER_MODULE = 'ts.spiders'
16 | 
17 | 
18 | # Crawl responsibly by identifying yourself (and your website) on the user-agent
19 | #USER_AGENT = 'ts (+http://www.yourdomain.com)'
20 | 
21 | # Obey robots.txt rules
22 | ROBOTSTXT_OBEY = True
23 | 
24 | # Configure maximum concurrent requests performed by Scrapy (default: 16)
25 | #CONCURRENT_REQUESTS = 32
26 | 
27 | # Configure a delay for requests for the same website (default: 0)
28 | # See https://doc.scrapy.org/en/latest/topics/settings.html#download-delay
29 | # See also autothrottle settings and docs
30 | #DOWNLOAD_DELAY = 3
31 | # The download delay setting will honor only one of:
32 | #CONCURRENT_REQUESTS_PER_DOMAIN = 16
33 | #CONCURRENT_REQUESTS_PER_IP = 16
34 | 
35 | # Disable cookies (enabled by default)
36 | #COOKIES_ENABLED = False
37 | 
38 | # Disable Telnet Console (enabled by default)
39 | #TELNETCONSOLE_ENABLED = False
40 | 
41 | # Override the default request headers:
42 | #DEFAULT_REQUEST_HEADERS = {
43 | #   'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
44 | #   'Accept-Language': 'en',
45 | #}
46 | 
47 | # Enable or disable spider middlewares
48 | # See https://doc.scrapy.org/en/latest/topics/spider-middleware.html
49 | #SPIDER_MIDDLEWARES = {
50 | #    'ts.middlewares.TsSpiderMiddleware': 543,
51 | #}
52 | 
53 | # Enable or disable downloader middlewares
54 | # See https://doc.scrapy.org/en/latest/topics/downloader-middleware.html
55 | #DOWNLOADER_MIDDLEWARES = {
56 | #    'ts.middlewares.TsDownloaderMiddleware': 543,
57 | #}
58 | 
59 | # Enable or disable extensions
60 | # See https://doc.scrapy.org/en/latest/topics/extensions.html
61 | #EXTENSIONS = {
62 | #    'scrapy.extensions.telnet.TelnetConsole': None,
63 | #}
64 | 
65 | # Configure item pipelines
66 | # See https://doc.scrapy.org/en/latest/topics/item-pipeline.html
67 | ITEM_PIPELINES = {
68 |     'ts.pipelines.TsPipeline': 300,
69 | }
70 | 
71 | # Enable and configure the AutoThrottle extension (disabled by default)
72 | # See https://doc.scrapy.org/en/latest/topics/autothrottle.html
73 | #AUTOTHROTTLE_ENABLED = True
74 | # The initial download delay
75 | #AUTOTHROTTLE_START_DELAY = 5
76 | # The maximum download delay to be set in case of high latencies
77 | #AUTOTHROTTLE_MAX_DELAY = 60
78 | # The average number of requests Scrapy should be sending in parallel to
79 | # each remote server
80 | #AUTOTHROTTLE_TARGET_CONCURRENCY = 1.0
81 | # Enable showing throttling stats for every response received:
82 | #AUTOTHROTTLE_DEBUG = False
83 | 
84 | # Enable and configure HTTP caching (disabled by default)
85 | # See https://doc.scrapy.org/en/latest/topics/downloader-middleware.html#httpcache-middleware-settings
86 | #HTTPCACHE_ENABLED = True
87 | #HTTPCACHE_EXPIRATION_SECS = 0
88 | #HTTPCACHE_DIR = 'httpcache'
89 | #HTTPCACHE_IGNORE_HTTP_CODES = []
90 | #HTTPCACHE_STORAGE = 'scrapy.extensions.httpcache.FilesystemCacheStorage'
91 | 


--------------------------------------------------------------------------------
/ChuanZhi_Class/ts/spiders/__init__.py:
--------------------------------------------------------------------------------
1 | # This package will contain the spiders of your Scrapy project
2 | #
3 | # Please refer to the documentation for information on how to create and manage
4 | # your spiders.
5 | 


--------------------------------------------------------------------------------
/ChuanZhi_Class/ts/spiders/__pycache__/__init__.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/why19970628/Python_Crawler/23ba5cf5ad12d5d0f9f3d2376c0c0ea32fc3d2de/ChuanZhi_Class/ts/spiders/__pycache__/__init__.cpython-37.pyc


--------------------------------------------------------------------------------
/ChuanZhi_Class/ts/spiders/__pycache__/lesson.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/why19970628/Python_Crawler/23ba5cf5ad12d5d0f9f3d2376c0c0ea32fc3d2de/ChuanZhi_Class/ts/spiders/__pycache__/lesson.cpython-37.pyc


--------------------------------------------------------------------------------
/ChuanZhi_Class/ts/spiders/lesson.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | import scrapy
 3 | from ts.items import TsItem
 4 | from scrapy.http import Request
 5 | 
 6 | 
 7 | class LessonSpider(scrapy.Spider):
 8 |     name = 'lesson'
 9 |     #allowed_domains = ['hellobi.com']
10 |     #start_urls = ['https://edu.hellobi.com/course/1']
11 |     #def parse(self, response):
12 |     #    item=TsItem()
13 |     #    #item['title']=response.xpath("//ol[@class='breadcrumb']/li[@class='active']/text()").extract()
14 |     #    #item['link'] = response.xpath("//ul[@class='nav nav-tabs']/li[@class='active']/a/@href").extract()
15 |     #    #item['stu'] = response.xpath("//span[@class='course-view']/text()").extract()
16 |     #    yield item
17 |     #    for i in range(2,121):
18 |     #        url='https://edu.hellobi.com/course/'+str(i)
19 |     #        yield Request(url,callback=self.parse)
20 |     allowed_domains = ['douban.com']
21 |     start_urls = ['https://movie.douban.com/subject/27615441/']
22 |     def parse(self, response):
23 |         item=TsItem()
24 |         item['stu'] = response.xpath("//h3/span[@class='comment-info']/a/@href=").extract()
25 |         print(item['stu'])
26 |         yield item
27 | 


--------------------------------------------------------------------------------
/DangDang_Books/README.md:
--------------------------------------------------------------------------------
 1 | ﻿爬虫：爬取当当网图书信息， 书名，书图,价格，简介，评分，评论数量。
 2 | 
 3 | dangdang文件夹为利用scrapy框架爬取图书信息
 4 | 
 5 | 数据分析：对其进行简单的数据分析，如图书评论数量分布的漏斗图，价格分布的柱状图等等
 6 | 
 7 | python_61.pkl文件大约1000条数据
 8 | 
 9 | 博客地址：https://blog.csdn.net/weixin_43746433
10 | 
11 | CSDN文章地址：:https://blog.csdn.net/weixin_43746433/article/details/100059191
12 | 
13 | 微信：why19970628
14 | 
15 | 欢迎与我交流
16 | 


--------------------------------------------------------------------------------
/DangDang_Books/dangdang/dangdang/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/why19970628/Python_Crawler/23ba5cf5ad12d5d0f9f3d2376c0c0ea32fc3d2de/DangDang_Books/dangdang/dangdang/__init__.py


--------------------------------------------------------------------------------
/DangDang_Books/dangdang/dangdang/__pycache__/__init__.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/why19970628/Python_Crawler/23ba5cf5ad12d5d0f9f3d2376c0c0ea32fc3d2de/DangDang_Books/dangdang/dangdang/__pycache__/__init__.cpython-37.pyc


--------------------------------------------------------------------------------
/DangDang_Books/dangdang/dangdang/__pycache__/items.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/why19970628/Python_Crawler/23ba5cf5ad12d5d0f9f3d2376c0c0ea32fc3d2de/DangDang_Books/dangdang/dangdang/__pycache__/items.cpython-37.pyc


--------------------------------------------------------------------------------
/DangDang_Books/dangdang/dangdang/__pycache__/pipelines.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/why19970628/Python_Crawler/23ba5cf5ad12d5d0f9f3d2376c0c0ea32fc3d2de/DangDang_Books/dangdang/dangdang/__pycache__/pipelines.cpython-37.pyc


--------------------------------------------------------------------------------
/DangDang_Books/dangdang/dangdang/__pycache__/settings.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/why19970628/Python_Crawler/23ba5cf5ad12d5d0f9f3d2376c0c0ea32fc3d2de/DangDang_Books/dangdang/dangdang/__pycache__/settings.cpython-37.pyc


--------------------------------------------------------------------------------
/DangDang_Books/dangdang/dangdang/items.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | # Define here the models for your scraped items
 4 | #
 5 | # See documentation in:
 6 | # https://doc.scrapy.org/en/latest/topics/items.html
 7 | 
 8 | import scrapy
 9 | 
10 | 
11 | class DangdangItem(scrapy.Item):
12 |     # define the fields for your item here like:
13 |     # name = scrapy.Field()
14 |     title=scrapy.Field()
15 |     link=scrapy.Field()
16 |     comment=scrapy.Field()
17 | 
18 | 


--------------------------------------------------------------------------------
/DangDang_Books/dangdang/dangdang/middlewares.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | 
  3 | # Define here the models for your spider middleware
  4 | #
  5 | # See documentation in:
  6 | # https://doc.scrapy.org/en/latest/topics/spider-middleware.html
  7 | 
  8 | from scrapy import signals
  9 | 
 10 | 
 11 | class DangdangSpiderMiddleware(object):
 12 |     # Not all methods need to be defined. If a method is not defined,
 13 |     # scrapy acts as if the spider middleware does not modify the
 14 |     # passed objects.
 15 | 
 16 |     @classmethod
 17 |     def from_crawler(cls, crawler):
 18 |         # This method is used by Scrapy to create your spiders.
 19 |         s = cls()
 20 |         crawler.signals.connect(s.spider_opened, signal=signals.spider_opened)
 21 |         return s
 22 | 
 23 |     def process_spider_input(self, response, spider):
 24 |         # Called for each response that goes through the spider
 25 |         # middleware and into the spider.
 26 | 
 27 |         # Should return None or raise an exception.
 28 |         return None
 29 | 
 30 |     def process_spider_output(self, response, result, spider):
 31 |         # Called with the results returned from the Spider, after
 32 |         # it has processed the response.
 33 | 
 34 |         # Must return an iterable of Request, dict or Item objects.
 35 |         for i in result:
 36 |             yield i
 37 | 
 38 |     def process_spider_exception(self, response, exception, spider):
 39 |         # Called when a spider or process_spider_input() method
 40 |         # (from other spider middleware) raises an exception.
 41 | 
 42 |         # Should return either None or an iterable of Response, dict
 43 |         # or Item objects.
 44 |         pass
 45 | 
 46 |     def process_start_requests(self, start_requests, spider):
 47 |         # Called with the start requests of the spider, and works
 48 |         # similarly to the process_spider_output() method, except
 49 |         # that it doesn’t have a response associated.
 50 | 
 51 |         # Must return only requests (not items).
 52 |         for r in start_requests:
 53 |             yield r
 54 | 
 55 |     def spider_opened(self, spider):
 56 |         spider.logger.info('Spider opened: %s' % spider.name)
 57 | 
 58 | 
 59 | class DangdangDownloaderMiddleware(object):
 60 |     # Not all methods need to be defined. If a method is not defined,
 61 |     # scrapy acts as if the downloader middleware does not modify the
 62 |     # passed objects.
 63 | 
 64 |     @classmethod
 65 |     def from_crawler(cls, crawler):
 66 |         # This method is used by Scrapy to create your spiders.
 67 |         s = cls()
 68 |         crawler.signals.connect(s.spider_opened, signal=signals.spider_opened)
 69 |         return s
 70 | 
 71 |     def process_request(self, request, spider):
 72 |         # Called for each request that goes through the downloader
 73 |         # middleware.
 74 | 
 75 |         # Must either:
 76 |         # - return None: continue processing this request
 77 |         # - or return a Response object
 78 |         # - or return a Request object
 79 |         # - or raise IgnoreRequest: process_exception() methods of
 80 |         #   installed downloader middleware will be called
 81 |         return None
 82 | 
 83 |     def process_response(self, request, response, spider):
 84 |         # Called with the response returned from the downloader.
 85 | 
 86 |         # Must either;
 87 |         # - return a Response object
 88 |         # - return a Request object
 89 |         # - or raise IgnoreRequest
 90 |         return response
 91 | 
 92 |     def process_exception(self, request, exception, spider):
 93 |         # Called when a download handler or a process_request()
 94 |         # (from other downloader middleware) raises an exception.
 95 | 
 96 |         # Must either:
 97 |         # - return None: continue processing this exception
 98 |         # - return a Response object: stops process_exception() chain
 99 |         # - return a Request object: stops process_exception() chain
100 |         pass
101 | 
102 |     def spider_opened(self, spider):
103 |         spider.logger.info('Spider opened: %s' % spider.name)
104 | 


--------------------------------------------------------------------------------
/DangDang_Books/dangdang/dangdang/pipelines.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | # Define your item pipelines here
 4 | #
 5 | # Don't forget to add your pipeline to the ITEM_PIPELINES setting
 6 | # See: https://doc.scrapy.org/en/latest/topics/item-pipeline.html
 7 | 
 8 | 
 9 | class DangdangPipeline(object):
10 |     def process_item(self, item, spider):
11 |         #for i in range(0,len(item['link'])):
12 |         #    title=item['title'][i]
13 |         #    link=item['link'][i]
14 |         #    comment=item['comment'][i]
15 |         #    print(title)
16 |         #    print(link)
17 |         #    print(comment)
18 |         #    print('')
19 | 
20 |         return item
21 | 


--------------------------------------------------------------------------------
/DangDang_Books/dangdang/dangdang/settings.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | # Scrapy settings for dangdang project
 4 | #
 5 | # For simplicity, this file contains only settings considered important or
 6 | # commonly used. You can find more settings consulting the documentation:
 7 | #
 8 | #     https://doc.scrapy.org/en/latest/topics/settings.html
 9 | #     https://doc.scrapy.org/en/latest/topics/downloader-middleware.html
10 | #     https://doc.scrapy.org/en/latest/topics/spider-middleware.html
11 | 
12 | BOT_NAME = 'dangdang'
13 | 
14 | SPIDER_MODULES = ['dangdang.spiders']
15 | NEWSPIDER_MODULE = 'dangdang.spiders'
16 | 
17 | 
18 | # Crawl responsibly by identifying yourself (and your website) on the user-agent
19 | #USER_AGENT = 'dangdang (+http://www.yourdomain.com)'
20 | 
21 | # Obey robots.txt rules
22 | ROBOTSTXT_OBEY = False
23 | 
24 | # Configure maximum concurrent requests performed by Scrapy (default: 16)
25 | #CONCURRENT_REQUESTS = 32
26 | 
27 | # Configure a delay for requests for the same website (default: 0)
28 | # See https://doc.scrapy.org/en/latest/topics/settings.html#download-delay
29 | # See also autothrottle settings and docs
30 | #DOWNLOAD_DELAY = 3
31 | # The download delay setting will honor only one of:
32 | #CONCURRENT_REQUESTS_PER_DOMAIN = 16
33 | #CONCURRENT_REQUESTS_PER_IP = 16
34 | 
35 | # Disable cookies (enabled by default)
36 | #COOKIES_ENABLED = False
37 | 
38 | # Disable Telnet Console (enabled by default)
39 | #TELNETCONSOLE_ENABLED = False
40 | 
41 | # Override the default request headers:
42 | #DEFAULT_REQUEST_HEADERS = {
43 | #   'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
44 | #   'Accept-Language': 'en',
45 | #}
46 | 
47 | # Enable or disable spider middlewares
48 | # See https://doc.scrapy.org/en/latest/topics/spider-middleware.html
49 | #SPIDER_MIDDLEWARES = {
50 | #    'dangdang.middlewares.DangdangSpiderMiddleware': 543,
51 | #}
52 | 
53 | # Enable or disable downloader middlewares
54 | # See https://doc.scrapy.org/en/latest/topics/downloader-middleware.html
55 | #DOWNLOADER_MIDDLEWARES = {
56 | #    'dangdang.middlewares.DangdangDownloaderMiddleware': 543,
57 | #}
58 | 
59 | # Enable or disable extensions
60 | # See https://doc.scrapy.org/en/latest/topics/extensions.html
61 | #EXTENSIONS = {
62 | #    'scrapy.extensions.telnet.TelnetConsole': None,
63 | #}
64 | 
65 | # Configure item pipelines
66 | # See https://doc.scrapy.org/en/latest/topics/item-pipeline.html
67 | ITEM_PIPELINES = {
68 |     'dangdang.pipelines.DangdangPipeline': 300,
69 | }
70 | 
71 | # Enable and configure the AutoThrottle extension (disabled by default)
72 | # See https://doc.scrapy.org/en/latest/topics/autothrottle.html
73 | #AUTOTHROTTLE_ENABLED = True
74 | # The initial download delay
75 | #AUTOTHROTTLE_START_DELAY = 5
76 | # The maximum download delay to be set in case of high latencies
77 | #AUTOTHROTTLE_MAX_DELAY = 60
78 | # The average number of requests Scrapy should be sending in parallel to
79 | # each remote server
80 | #AUTOTHROTTLE_TARGET_CONCURRENCY = 1.0
81 | # Enable showing throttling stats for every response received:
82 | #AUTOTHROTTLE_DEBUG = False
83 | 
84 | # Enable and configure HTTP caching (disabled by default)
85 | # See https://doc.scrapy.org/en/latest/topics/downloader-middleware.html#httpcache-middleware-settings
86 | #HTTPCACHE_ENABLED = True
87 | #HTTPCACHE_EXPIRATION_SECS = 0
88 | #HTTPCACHE_DIR = 'httpcache'
89 | #HTTPCACHE_IGNORE_HTTP_CODES = []
90 | #HTTPCACHE_STORAGE = 'scrapy.extensions.httpcache.FilesystemCacheStorage'
91 | 


--------------------------------------------------------------------------------
/DangDang_Books/dangdang/dangdang/spiders/__init__.py:
--------------------------------------------------------------------------------
1 | # This package will contain the spiders of your Scrapy project
2 | #
3 | # Please refer to the documentation for information on how to create and manage
4 | # your spiders.
5 | 


--------------------------------------------------------------------------------
/DangDang_Books/dangdang/dangdang/spiders/__pycache__/__init__.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/why19970628/Python_Crawler/23ba5cf5ad12d5d0f9f3d2376c0c0ea32fc3d2de/DangDang_Books/dangdang/dangdang/spiders/__pycache__/__init__.cpython-37.pyc


--------------------------------------------------------------------------------
/DangDang_Books/dangdang/dangdang/spiders/__pycache__/dd.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/why19970628/Python_Crawler/23ba5cf5ad12d5d0f9f3d2376c0c0ea32fc3d2de/DangDang_Books/dangdang/dangdang/spiders/__pycache__/dd.cpython-37.pyc


--------------------------------------------------------------------------------
/DangDang_Books/dangdang/dangdang/spiders/dd.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | import scrapy
 3 | from dangdang.items import DangdangItem
 4 | from scrapy.http import Request
 5 | 
 6 | 
 7 | class DdSpider(scrapy.Spider):
 8 |     name = 'dd'
 9 |     allowed_domains = ['dangdang.com']
10 |     start_urls = ['http://dangdang.com/']
11 | 
12 |     def parse(self, response):
13 |         item=DangdangItem()
14 |         item['title']=response.xpath("//a[@class='pic']/@title").extract
15 |         item['link'] = response.xpath("//a[@class='pic']/@href").extract
16 |         item['comment'] = response.xpath("//a[@class='search_comment_num']/text()").extract
17 |         print(item['title'])
18 |         print(item['link'])
19 |         print(item['comment'])
20 |         yield item
21 | 
22 |         #for i in range(1,5):
23 |         #    url='http://search.dangdang.com/?key=%B3%CC%D0%F2%C9%E8%BC%C6&act=input&page_index'+str(i)
24 |         #    yield Request(url,callback=self.parse)
25 | 
26 | 


--------------------------------------------------------------------------------
/DangDang_Books/dangdang/scrapy.cfg:
--------------------------------------------------------------------------------
 1 | # Automatically created by: scrapy startproject
 2 | #
 3 | # For more information about the [deploy] section see:
 4 | # https://scrapyd.readthedocs.io/en/latest/deploy.html
 5 | 
 6 | [settings]
 7 | default = dangdang.settings
 8 | 
 9 | [deploy]
10 | #url = http://localhost:6800/
11 | project = dangdang
12 | 


--------------------------------------------------------------------------------
/DangDang_Books/ddSpider.py:
--------------------------------------------------------------------------------
 1 | import time
 2 | import pickle
 3 | import random
 4 | import requests
 5 | from bs4 import BeautifulSoup
 6 | 
 7 | 
 8 | headers = {
 9 | 	'Upgrade-Insecure-Requests': '1',
10 | 	'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.119 Safari/537.36',
11 | 	'Accept-Encoding': 'gzip, deflate',
12 | 	'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8',
13 | 	'Cache-Control': 'no-cache',
14 | 	'Connection': 'keep-alive',
15 | 	'Host': 'search.dangdang.com'
16 | }
17 | 
18 | 
19 | 
20 | '''解析, 提取需要的数据'''
21 | def parseHtml(html):
22 | 	data = {}
23 | 	soup = BeautifulSoup(html, 'lxml')
24 | 	conshoplist = soup.find_all('div', {'class': 'con shoplist'})[0]
25 | 	for each in conshoplist.find_all('li'):
26 | 		# 书名
27 | 		bookname = each.find_all('a')[0].get('title').strip(' ')
28 | 		# 书图
29 | 		img_src = each.find_all('a')[0].img.get('data-original')
30 | 		if img_src is None:
31 | 			img_src = each.find_all('a')[0].img.get('src')
32 | 		img_src = img_src.strip(' ')
33 | 		# 价格
34 | 		price = float(each.find_all('p', {'class': 'price'})[0].span.text[1:])
35 | 		# 简介
36 | 		detail = each.find_all('p', {'class': 'detail'})[0].text
37 | 		# 评分
38 | 		stars = float(each.find_all('p', {'class': 'search_star_line'})[0].span.span.get('style').split(': ')[-1].strip('%;')) / 20
39 | 		# 评论数量
40 | 		num_comments = float(each.find_all('p', {'class': 'search_star_line'})[0].a.text[:-3])
41 | 		data[bookname] = [img_src, price, detail, stars, num_comments]
42 | 	return data
43 | 
44 | 
45 | '''主函数'''
46 | def main(keyword):
47 | 	url = 'http://search.dangdang.com/?key={}&act=input&page_index={}'
48 | 	results = {}
49 | 	num_page = 0
50 | 	while True:
51 | 		num_page += 1
52 | 		print('[INFO]: Start to get the data of page%d...' % num_page)
53 | 		page_url  = url.format(keyword, num_page)
54 | 		res = requests.get(page_url, headers=headers)
55 | 		if '抱歉，没有找到与“%s”相关的商品，建议适当减少筛选条件' % keyword in res.text:
56 | 			break
57 | 		page_data = parseHtml(res.text)
58 | 		results.update(page_data)
59 | 		time.sleep(random.random() + 0.5)
60 | 	with open('%s_%d.pkl' % (keyword, num_page-1), 'wb') as f:
61 | 		pickle.dump(results, f)
62 | 	return results
63 | 
64 | 
65 | if __name__ == '__main__':
66 | 	main('python')


--------------------------------------------------------------------------------
/DangDang_Books/pictureWall.py:
--------------------------------------------------------------------------------
 1 | 
 2 | import os
 3 | import time
 4 | import math
 5 | import pickle
 6 | import requests
 7 | from PIL import Image
 8 | 
 9 | 
10 | PICDIR = 'pictures'
11 | headers = {
12 |     'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.119 Safari/537.36',
13 | }
14 | 
15 | 
16 | '''图片下载'''
17 | def downloadPics(urls, savedir):
18 |     if not os.path.exists(savedir):
19 |         os.mkdir(savedir)
20 |     for idx, url in enumerate(urls):
21 |         res = requests.get(url, headers=headers)
22 |         with open(os.path.join(savedir, '%d.jpg' % idx), 'wb') as f:
23 |             f.write(res.content)
24 |         time.sleep(0.5)
25 | 
26 | 
27 | '''制作照片墙'''
28 | def makePicturesWall(picdir):
29 |     picslist = os.listdir(picdir)
30 |     num_pics = len(picslist)
31 |     print('照片数量',num_pics)
32 |     size = 64
33 |     line_numpics = int(math.sqrt(num_pics))#正方形
34 |     picwall = Image.new('RGBA', (line_numpics*size, line_numpics*size))
35 |     x = 0
36 |     y = 0
37 |     for pic in picslist:
38 |         img = Image.open(os.path.join(picdir, pic))
39 |         img = img.resize((size, size), Image.ANTIALIAS)     #改变图片尺寸
40 |         picwall.paste(img, (x*size, y*size))    #合并图片
41 |         x += 1
42 |         if x == line_numpics:
43 |             x = 0
44 |             y += 1
45 |     print('[INFO]: Generate pictures wall successfully...')
46 |     picwall.save("picwall.png")     #保存图片
47 | 
48 | 
49 | if __name__ == '__main__':
50 |     with open('python_61.pkl', 'rb') as f:
51 |         data = pickle.load(f)
52 |     urls = [j[0] for i, j in data.items()]  #加载图片下载 url
53 |     # downloadPics(urls, PICDIR)
54 |     makePicturesWall(PICDIR)


--------------------------------------------------------------------------------
/DouBan_Movie/1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/why19970628/Python_Crawler/23ba5cf5ad12d5d0f9f3d2376c0c0ea32fc3d2de/DouBan_Movie/1.png


--------------------------------------------------------------------------------
/DouBan_Movie/ip.txt:
--------------------------------------------------------------------------------
 1 | 218.17.139.5:808
 2 | 203.86.26.9:3128
 3 | 60.191.134.164:9999
 4 | 116.62.4.184:8118
 5 | 122.227.139.170:3128
 6 | 203.93.209.163:53281
 7 | 123.7.61.8:53281
 8 | 219.234.5.128:3128
 9 | 183.47.40.35:8088
10 | 59.78.2.140:1080
11 | 61.128.208.94:3128
12 | 113.108.242.36:47713
13 | 115.28.209.249:3128
14 | 113.108.242.36:47713
15 | 14.20.235.114:9797
16 | 221.210.120.153:54402
17 | 36.110.14.186:3128
18 | 59.53.134.202:808
19 | 221.6.201.18:9999
20 | 124.237.83.14:53281
21 | 124.243.226.18:8888
22 | 58.215.140.6:8080
23 | 211.99.26.183:808
24 | 112.250.109.173:53281
25 | 218.60.8.83:3129
26 | 218.60.8.99:3129
27 | 119.51.89.18:1080
28 | 61.128.208.94:3128
29 | 59.53.134.202:808
30 | 114.116.10.21:3128
31 | 202.112.237.102:3128
32 | 58.215.140.6:8080
33 | 106.15.42.179:33543
34 | 61.145.182.27:53281
35 | 115.148.173.121:808
36 | 171.37.156.39:8123
37 | 123.13.245.51:9999
38 | 218.60.8.83:3129
39 | 218.60.8.99:3129
40 | 61.183.233.6:54896
41 | 114.116.10.21:3128
42 | 202.112.237.102:3128
43 | 101.37.79.125:3128
44 | 59.53.137.116:808
45 | 221.7.255.167:8080
46 | 124.152.32.140:53281
47 | 221.7.255.168:8080
48 | 112.115.57.20:3128
49 | 61.183.233.6:54896
50 | 


--------------------------------------------------------------------------------
/DouBan_Movie/pic/动物世界.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/why19970628/Python_Crawler/23ba5cf5ad12d5d0f9f3d2376c0c0ea32fc3d2de/DouBan_Movie/pic/动物世界.png


--------------------------------------------------------------------------------
/DouBan_Movie/pic/巴斯特·斯克鲁格斯的歌谣.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/why19970628/Python_Crawler/23ba5cf5ad12d5d0f9f3d2376c0c0ea32fc3d2de/DouBan_Movie/pic/巴斯特·斯克鲁格斯的歌谣.png


--------------------------------------------------------------------------------
/DouYou/README.md:
--------------------------------------------------------------------------------
 1 | ﻿爬取斗鱼网所有主播的类别，房间标题，房间ID，主播名称，热度。
 2 | 
 3 | csv文件大约15000条数据
 4 | 
 5 | 博客地址：https://blog.csdn.net/weixin_43746433
 6 | 
 7 | 微信：why19970628
 8 | 
 9 | 欢迎与我交流
10 | 


--------------------------------------------------------------------------------
/LaGou/README.md:
--------------------------------------------------------------------------------
 1 | 爬取拉勾网的职位的信息，分为静态和动态网页，生成csv文件。
 2 | 
 3 | 博客地址：https://blog.csdn.net/weixin_43746433
 4 | 
 5 | 爬虫详情：https://blog.csdn.net/weixin_43746433/article/details/94398440
 6 | 
 7 | 微信：why19970628
 8 | 
 9 | 欢迎与我交流
10 | 


--------------------------------------------------------------------------------
/LaGou/动态爬取.py:
--------------------------------------------------------------------------------
 1 | ﻿# import pandas as pd
 2 | # data=pd.read_csv('cleaned.csv')
 3 | # data=pd.DataFrame(data)
 4 | # area=data.groupby(by='area',axis=0).mean()['price']
 5 | # area=area
 6 | #
 7 | # #print(data.loc[:,'price'].mean())
 8 | # #area=data.groupby(by='area')['price']
 9 | # print(area)
10 | import requests
11 | from lxml import etree
12 | import pandas as pd
13 | from time import sleep
14 | import random
15 | 
16 | # cookie
17 | cookie = '你的cookie'
18 | # headers
19 | headers = {
20 |     'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36',
21 |     # 'cookie':cookie
22 | }
23 | 
24 | # sleep(random.randint(3, 10))
25 | #url = 'https://www.lagou.com/zhaopin/'
26 | #res = requests.get(url, headers=headers)
27 | #print(res.text)
28 | 
29 | #print('正在抓取第{}页...'.format(i), url)
30 | # 查看网页结构循环页数进行采集
31 | for i in range(1, 2):
32 |     sleep(random.randint(3, 10))
33 |     url = 'https://www.lagou.com/zhaopin/jiqixuexi/{}/?filterOption=3'.format(i)
34 |     res =requests.get(url,headers = headers)
35 |     #print(res.text)
36 |     print('正在抓取第{}页...'.format(i), url)
37 |     # 请求网页并解析
38 |     con = etree.HTML(requests.get(url=url, headers=headers).text)
39 |     # 使用xpath表达式抽取各目标字段
40 |     job_name = [i for i in con.xpath("//a[@class='position_link']/h3/text()")]
41 |     job_address = [i for i in con.xpath("//a[@class='position_link']/span/em/text()")]
42 |     job_company = [i for i in con.xpath("//div[@class='company_name']/a/text()")]
43 |     job_salary = [i for i in con.xpath("//span[@class='money']/text()")]
44 |     job_exp_edu = [i for i in con.xpath("//div[@class='li_b_l']/text()")]
45 |     job_exp_edu2 = [i for i in [i.strip() for i in job_exp_edu] if i != '']
46 |     job_industry = [i.strip() for i in con.xpath("//div[@class='industry']/text()")]
47 |     job_tempation = [i for i in con.xpath("//div[@class='list_item_bot']/div[@class='li_b_r']/text()")]
48 |     job_links = [i for i in con.xpath("//div[@class='p_top']/a/@href")]
49 |     print(job_links)
50 | 
51 |     # 获取详情页链接后采集详情页岗位描述信息
52 |     job_des =[]
53 |     for link in job_links:
54 |         sleep(random.randint(3, 10))
55 |         print('link:',link)
56 |         con2 = etree.HTML(requests.get(url=link, headers=headers).text)
57 |         #print(con)
58 |         des = [[i for i in con2.xpath("//dd[@class='job_bt']/div/p/text()")]]
59 |         job_des += des
60 |     #print(job_des)
61 |     break #遍历一次
62 | 
63 | # 对数据进行字典封装
64 | dataset = {
65 |     '岗位名称': job_name,
66 |     '工作地址': job_address,
67 |     '公司': job_company,
68 |     '薪资': job_salary,
69 |     '经验学历': job_exp_edu2,
70 |     '所属行业': job_industry,
71 |     '岗位福利': job_tempation,
72 |     '任职要求': job_des
73 | }
74 | 
75 | # 转化为数据框并存为csv
76 | data = pd.DataFrame(dataset)
77 | data.to_csv('machine_learning_hz_job2.csv')
78 | 


--------------------------------------------------------------------------------
/LianJia/README.md:
--------------------------------------------------------------------------------
 1 | 爬取链家的北京所有小区的信息，生成csv文件。
 2 | 
 3 | 博客地址：https://blog.csdn.net/weixin_43746433
 4 | 
 5 | 爬虫详情：https://blog.csdn.net/weixin_43746433/article/details/95951341
 6 | 
 7 | 微信：why19970628
 8 | 
 9 | 欢迎与我交流
10 | 


--------------------------------------------------------------------------------
/LianJia/group_by.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | import numpy as np
 3 | data1=pd.read_csv('housedata1.csv')
 4 | print(data1.shape)
 5 | data2=pd.read_csv('housedata2.csv')
 6 | print(data2.shape)
 7 | data=pd.concat([data1,data2],axis=0,ignore_index=False)
 8 | print(data.head())
 9 | print(data.shape)
10 | data=pd.DataFrame(data)
11 | 
12 | data=data.sort_values('area')
13 | data=data.reset_index()
14 | data=data.drop(labels='index',axis=1)
15 | print(data.head())
16 | print(data.loc[:,'area'].value_counts())
17 | for i,data['price'][i] in enumerate(data['price']):
18 |     data['price'][i]=int(data['price'][i].replace('元/平',''))
19 |     #print(i,data['price'][i])
20 | print('changed_price\n',data['price'].head())
21 | print(data.head())
22 | 
23 | print(type(data['price'][0]))
24 | data.to_csv('cleaned.csv')
25 | 
26 | print(data.loc[:,'area'].value_counts())
27 | print(data.describe())
28 | 
29 | area=data.groupby(by='area')['price'].mean()
30 | 
31 | #print(data.loc[:,'price'].mean())
32 | #area=data.groupby(by='area')['price']
33 | print(area)


--------------------------------------------------------------------------------
/Meituan/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/why19970628/Python_Crawler/23ba5cf5ad12d5d0f9f3d2376c0c0ea32fc3d2de/Meituan/__init__.py


--------------------------------------------------------------------------------
/Meituan/meituan.py:
--------------------------------------------------------------------------------
 1 | import csv
 2 | import time
 3 | import threading
 4 | from get_cookie import get_cookie
 5 | from get_cookie import parse
 6 | 
 7 | 
 8 | def crow(n, l):  # 参数n 区分第几个线程，l存储url的列表
 9 |     lock = threading.Lock()
10 |     sym = 0  # 是否连续三次抓取失败的标志位
11 |     pc = get_cookie()  # 获取IP 和 Cookie
12 |     m = 0  # 记录抓取的数量
13 |     now = time.time()
14 |     while True:
15 |         if len(l) > 0:
16 |             u = l.pop(0)
17 |             ll = len(l)
18 |             m += 1
19 |             ttt = time.time() - now
20 |             result = parse(u, pc, m, n, ll, ttt)
21 |             mark = result[0]
22 |             info = result[1]
23 |             if mark == 2:
24 |                 time.sleep(1.5)
25 |                 result = parse(u, pc, m, n, ll, ttt)
26 |                 mark = result[0]
27 |                 info = result[1]
28 |                 if mark != 0:
29 |                     sym += 1
30 |             if mark == 1:
31 |                 pc = get_cookie()
32 |                 result = parse(u, pc, m, n, ll, ttt)
33 |                 mark = result[0]
34 |                 info = result[1]
35 |                 if mark != 0:
36 |                     sym += 1
37 |             if mark == 0:  # 抓取成功
38 |                 sym = 0
39 |                 lock.acquire()
40 |                 with open('meituan.csv', 'a', newline='', encoding='gb18030')as f:
41 |                     write = csv.writer(f)
42 |                     write.writerow(info)
43 |                 f.close()
44 |                 lock.release()
45 |             if sym > 2:  # 连续三次抓取失败，换ip、cookie
46 |                 sym = 0
47 |                 pc = get_cookie()
48 |         else:
49 |             print('&&&&线程：%d结束' % n)
50 |             break
51 | 
52 | 
53 | if __name__ == '__main__':
54 |     url_list = []
55 |     with open('mt_id.csv', 'r', encoding='gb18030')as f:
56 |         read = csv.reader(f)
57 |         for line in read:
58 |             d_list = ['', '']
59 |             url = 'https://meishi.meituan.com/i/poi/' + str(line[2]) + '?ct_poi=' + str(line[3])
60 |             d_list[0] = url
61 |             d_list[1] = line[1]
62 |             url_list.append(d_list)
63 |         f.close()
64 |     th_list = []
65 |     for i in range(1, 6):
66 |         t = threading.Thread(target=crow, args=(i, url_list,))
67 |         print('*****线程%d开始启动...' % i)
68 |         t.start()
69 |         th_list.append(t)
70 |         time.sleep(30)
71 |     for t in th_list:
72 |         t.join()
73 | 


--------------------------------------------------------------------------------
/Movie_maoyan/WPS网盘.lnk:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/why19970628/Python_Crawler/23ba5cf5ad12d5d0f9f3d2376c0c0ea32fc3d2de/Movie_maoyan/WPS网盘.lnk


--------------------------------------------------------------------------------
/Movie_maoyan/maoyan.py:
--------------------------------------------------------------------------------
 1 | # -*- coding:utf-8 -*-
 2 | import requests
 3 | import re
 4 | import json
 5 | from multiprocessing import Pool
 6 | 
 7 | 
 8 | 
 9 | def get_one_page(url):
10 |     headers = {
11 |         'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.117 Safari/537.36'
12 |     }
13 |     response = requests.get(url, headers=headers)
14 |     if response.status_code == 200:
15 |         return response.text
16 |     return None
17 | 
18 | 
19 | import pandas as pd
20 | 
21 | def parse_one_page(html):
22 |     pattern = re.compile(
23 |         '<dd>.*?board-index.*?>(.*?)</i>.*?data-src="(.*?)".*?name.*?a.*?>(.*?)</a>.*?star.*?>(.*?)</p>.*?releasetime.*?>(.*?)</p>.*?integer.*?>(.*?)</i>.*?fraction.*?>(.*?)</i>.*?</dd>',
24 |         re.S)
25 |     items = re.findall(pattern, html)
26 |     #print(items)
27 |     content = []
28 |     for item in items:
29 |         dataset = {}
30 |         dataset['index']=item[0]
31 |         print(dataset['index'])
32 |         dataset['image']=item[1]
33 |         dataset['title']=item[2].strip()
34 |         dataset['actor']=item[3].strip()[3:] if len(item[3]) > 3 else ''
35 |         dataset['time'] =item[4].strip()[5:] if len(item[4]) > 5 else ''
36 |         dataset['score']=item[5].strip() + item[6].strip()
37 |         content.append(dataset)
38 |     return content
39 | 
40 | 
41 | 
42 | def write_to_file(content):
43 |     df = pd.DataFrame(content)
44 |     #print(df.index)
45 |     df.to_csv('maoyan.csv',index=False,mode='a+')
46 | 
47 | def main(offset):
48 |     url = 'http://maoyan.com/board/4?offset=' + str(offset)
49 |     html = get_one_page(url)
50 |     data=parse_one_page(html)
51 |     write_to_file(data)
52 | 
53 | import time
54 | if __name__ == '__main__':
55 | 
56 |     start=time.time()
57 |     pool=Pool()
58 | 
59 |     pool.map(main,[i*10 for i in range(10)])
60 |     # for i in range(10):
61 |     #     main(offset=i * 10)
62 | 
63 |     print('花费时间:',time.time()-start)


--------------------------------------------------------------------------------
/Movie_maoyan/readme.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/why19970628/Python_Crawler/23ba5cf5ad12d5d0f9f3d2376c0c0ea32fc3d2de/Movie_maoyan/readme.md


--------------------------------------------------------------------------------
/Movie_maoyan/result.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/why19970628/Python_Crawler/23ba5cf5ad12d5d0f9f3d2376c0c0ea32fc3d2de/Movie_maoyan/result.txt


--------------------------------------------------------------------------------
/Movie_maoyan/txt.py:
--------------------------------------------------------------------------------
 1 | # -*- coding:utf-8 -*-
 2 | import requests
 3 | import re
 4 | import json
 5 | from multiprocessing import Pool
 6 | 
 7 | 
 8 | def get_one_page(url):
 9 |     headers = {
10 |         'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.117 Safari/537.36'
11 |     }
12 |     response = requests.get(url, headers=headers)
13 |     if response.status_code == 200:
14 |         return response.text
15 |     return None
16 | 
17 | 
18 | def parse_one_page(html):
19 |     pattern = re.compile(
20 |         '<dd>.*?board-index.*?>(.*?)</i>.*?data-src="(.*?)".*?name.*?a.*?>(.*?)</a>.*?star.*?>(.*?)</p>.*?releasetime.*?>(.*?)</p>.*?integer.*?>(.*?)</i>.*?fraction.*?>(.*?)</i>.*?</dd>',
21 |         re.S)
22 |     items = re.findall(pattern, html)
23 |     for item in items:
24 |         yield {
25 |             'index': item[0],
26 |             'image': item[1],
27 |             'title': item[2].strip(),
28 |             'actor': item[3].strip()[3:] if len(item[3]) > 3 else '',
29 |             'time': item[4].strip()[5:] if len(item[4]) > 5 else '',
30 |             'score': item[5].strip() + item[6].strip()
31 |         }
32 | 
33 | 
34 | def write_to_file(content):
35 |     with open('result.txt', 'a', encoding='utf-8') as f:
36 |         f.write(json.dumps(content, ensure_ascii=False) + '\n')
37 | 
38 | 
39 | def main(offset):
40 |     url = 'http://maoyan.com/board/4?offset=' + str(offset)
41 |     html = get_one_page(url)
42 |     for item in parse_one_page(html):
43 |         write_to_file(item)
44 | 
45 | 
46 | if __name__ == '__main__':
47 |     pool = Pool()
48 |     pool.map(main, [i * 10 for i in range(10)])


--------------------------------------------------------------------------------
/Movie_tiantang/readme.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/why19970628/Python_Crawler/23ba5cf5ad12d5d0f9f3d2376c0c0ea32fc3d2de/Movie_tiantang/readme.md


--------------------------------------------------------------------------------
/Photo_Position_GoldenAPI/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/why19970628/Python_Crawler/23ba5cf5ad12d5d0f9f3d2376c0c0ea32fc3d2de/Photo_Position_GoldenAPI/.DS_Store


--------------------------------------------------------------------------------
/Photo_Position_GoldenAPI/.idea/inspectionProfiles/Project_Default.xml:
--------------------------------------------------------------------------------
 1 | <component name="InspectionProjectProfileManager">
 2 |   <profile version="1.0">
 3 |     <option name="myName" value="Project Default" />
 4 |     <inspection_tool class="PyUnresolvedReferencesInspection" enabled="true" level="WARNING" enabled_by_default="true">
 5 |       <option name="ignoredIdentifiers">
 6 |         <list>
 7 |           <option value="datetime.date.today" />
 8 |         </list>
 9 |       </option>
10 |     </inspection_tool>
11 |   </profile>
12 | </component>


--------------------------------------------------------------------------------
/Photo_Position_GoldenAPI/.idea/misc.xml:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="UTF-8"?>
2 | <project version="4">
3 |   <component name="JavaScriptSettings">
4 |     <option name="languageLevel" value="ES6" />
5 |   </component>
6 |   <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.7" project-jdk-type="Python SDK" />
7 | </project>


--------------------------------------------------------------------------------
/Photo_Position_GoldenAPI/.idea/modules.xml:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="UTF-8"?>
2 | <project version="4">
3 |   <component name="ProjectModuleManager">
4 |     <modules>
5 |       <module fileurl="file://$PROJECT_DIR$/.idea/地理位置.iml" filepath="$PROJECT_DIR$/.idea/地理位置.iml" />
6 |     </modules>
7 |   </component>
8 | </project>


--------------------------------------------------------------------------------
/Photo_Position_GoldenAPI/.idea/vcs.xml:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="UTF-8"?>
2 | <project version="4">
3 |   <component name="VcsDirectoryMappings">
4 |     <mapping directory="$PROJECT_DIR$/../../.." vcs="Git" />
5 |   </component>
6 | </project>


--------------------------------------------------------------------------------
/Photo_Position_GoldenAPI/.idea/地理位置.iml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <module type="PYTHON_MODULE" version="4">
 3 |   <component name="NewModuleRootManager">
 4 |     <content url="file://$MODULE_DIR$" />
 5 |     <orderEntry type="inheritedJdk" />
 6 |     <orderEntry type="sourceFolder" forTests="false" />
 7 |   </component>
 8 |   <component name="TestRunnerService">
 9 |     <option name="projectConfiguration" value="pytest" />
10 |     <option name="PROJECT_TEST_RUNNER" value="pytest" />
11 |   </component>
12 | </module>


--------------------------------------------------------------------------------
/Photo_Position_GoldenAPI/__pycache__/position_utils.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/why19970628/Python_Crawler/23ba5cf5ad12d5d0f9f3d2376c0c0ea32fc3d2de/Photo_Position_GoldenAPI/__pycache__/position_utils.cpython-37.pyc


--------------------------------------------------------------------------------
/Photo_Position_GoldenAPI/picture/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/why19970628/Python_Crawler/23ba5cf5ad12d5d0f9f3d2376c0c0ea32fc3d2de/Photo_Position_GoldenAPI/picture/.DS_Store


--------------------------------------------------------------------------------
/Photo_Position_GoldenAPI/picture/20190828185021.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/why19970628/Python_Crawler/23ba5cf5ad12d5d0f9f3d2376c0c0ea32fc3d2de/Photo_Position_GoldenAPI/picture/20190828185021.jpg


--------------------------------------------------------------------------------
/Photo_Position_GoldenAPI/position_utils.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # encoding: utf-8
  3 | 
  4 | """
  5 | @version: v1.0
  6 | @author: xag
  7 | @license: Apache Licence
  8 | @contact: xinganguo@gmail.com
  9 | @site: http://www.xingag.top
 10 | @software: PyCharm
 11 | @file: position_utils.py
 12 | @time: 2019-08-23 17:44
 13 | @description：坐标转换
 14 | """
 15 | 
 16 | # -*- coding: utf-8 -*-
 17 | import json
 18 | import math
 19 | 
 20 | x_pi = 3.14159265358979324 * 3000.0 / 180.0
 21 | pi = 3.1415926535897932384626  # π
 22 | a = 6378245.0  # 长半轴
 23 | ee = 0.00669342162296594323  # 扁率
 24 | 
 25 | 
 26 | def wgs84togcj02(lng, lat):
 27 |     """
 28 |     WGS84转GCJ02(火星坐标系)
 29 |     :param lng:WGS84坐标系的经度
 30 |     :param lat:WGS84坐标系的纬度
 31 |     :return:
 32 |     """
 33 |     if out_of_china(lng, lat):  # 判断是否在国内
 34 |         return lng, lat
 35 |     dlat = transformlat(lng - 105.0, lat - 35.0)
 36 |     dlng = transformlng(lng - 105.0, lat - 35.0)
 37 |     radlat = lat / 180.0 * pi
 38 |     magic = math.sin(radlat)
 39 |     magic = 1 - ee * magic * magic
 40 |     sqrtmagic = math.sqrt(magic)
 41 |     dlat = (dlat * 180.0) / ((a * (1 - ee)) / (magic * sqrtmagic) * pi)
 42 |     dlng = (dlng * 180.0) / (a / sqrtmagic * math.cos(radlat) * pi)
 43 |     mglat = lat + dlat
 44 |     mglng = lng + dlng
 45 |     return [mglng, mglat]
 46 | 
 47 | 
 48 | def gcj02towgs84(lng, lat):
 49 |     """
 50 |     GCJ02(火星坐标系)转GPS84
 51 |     :param lng:火星坐标系的经度
 52 |     :param lat:火星坐标系纬度
 53 |     :return:
 54 |     """
 55 |     if out_of_china(lng, lat):
 56 |         return lng, lat
 57 |     dlat = transformlat(lng - 105.0, lat - 35.0)
 58 |     dlng = transformlng(lng - 105.0, lat - 35.0)
 59 |     radlat = lat / 180.0 * pi
 60 |     magic = math.sin(radlat)
 61 |     magic = 1 - ee * magic * magic
 62 |     sqrtmagic = math.sqrt(magic)
 63 |     dlat = (dlat * 180.0) / ((a * (1 - ee)) / (magic * sqrtmagic) * pi)
 64 |     dlng = (dlng * 180.0) / (a / sqrtmagic * math.cos(radlat) * pi)
 65 |     mglat = lat + dlat
 66 |     mglng = lng + dlng
 67 |     return [lng * 2 - mglng, lat * 2 - mglat]
 68 | 
 69 | 
 70 | def transformlat(lng, lat):
 71 |     ret = -100.0 + 2.0 * lng + 3.0 * lat + 0.2 * lat * lat + \
 72 |           0.1 * lng * lat + 0.2 * math.sqrt(math.fabs(lng))
 73 |     ret += (20.0 * math.sin(6.0 * lng * pi) + 20.0 *
 74 |             math.sin(2.0 * lng * pi)) * 2.0 / 3.0
 75 |     ret += (20.0 * math.sin(lat * pi) + 40.0 *
 76 |             math.sin(lat / 3.0 * pi)) * 2.0 / 3.0
 77 |     ret += (160.0 * math.sin(lat / 12.0 * pi) + 320 *
 78 |             math.sin(lat * pi / 30.0)) * 2.0 / 3.0
 79 |     return ret
 80 | 
 81 | 
 82 | def transformlng(lng, lat):
 83 |     ret = 300.0 + lng + 2.0 * lat + 0.1 * lng * lng + \
 84 |           0.1 * lng * lat + 0.1 * math.sqrt(math.fabs(lng))
 85 |     ret += (20.0 * math.sin(6.0 * lng * pi) + 20.0 *
 86 |             math.sin(2.0 * lng * pi)) * 2.0 / 3.0
 87 |     ret += (20.0 * math.sin(lng * pi) + 40.0 *
 88 |             math.sin(lng / 3.0 * pi)) * 2.0 / 3.0
 89 |     ret += (150.0 * math.sin(lng / 12.0 * pi) + 300.0 *
 90 |             math.sin(lng / 30.0 * pi)) * 2.0 / 3.0
 91 |     return ret
 92 | 
 93 | 
 94 | def out_of_china(lng, lat):
 95 |     """
 96 |     判断是否在国内，不在国内不做偏移
 97 |     :param lng:
 98 |     :param lat:
 99 |     :return:
100 |     """
101 |     if lng < 72.004 or lng > 137.8347:
102 |         return True
103 |     if lat < 0.8293 or lat > 55.8271:
104 |         return True
105 |     return False
106 | 


--------------------------------------------------------------------------------
/Photo_qiantu/ip.txt:
--------------------------------------------------------------------------------
 1 | 113.116.245.211:9797
 2 | 124.232.133.199:3128
 3 | 113.116.245.211:9797
 4 | 163.204.240.140:9999
 5 | 1.197.203.240:9999
 6 | 49.51.155.45:8081
 7 | 163.204.241.198:9999
 8 | 125.62.27.53:3128
 9 | 61.128.208.94:3128
10 | 120.83.101.8:9999
11 | 121.233.251.11:9999
12 | 1.198.72.173:9999
13 | 110.172.221.241:8080
14 | 27.191.234.69:9999
15 | 116.196.90.181:3128
16 | 101.231.234.38:8080
17 | 110.172.221.241:8080
18 | 163.204.244.138:9999
19 | 121.69.46.177:9000
20 | 121.233.207.221:9999
21 | 


--------------------------------------------------------------------------------
/Photo_qiantu/qiantu.photo/simple_show.PNG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/why19970628/Python_Crawler/23ba5cf5ad12d5d0f9f3d2376c0c0ea32fc3d2de/Photo_qiantu/qiantu.photo/simple_show.PNG


--------------------------------------------------------------------------------
/Photo_qiantu/qiantu.py:
--------------------------------------------------------------------------------
 1 | from  urllib import  request
 2 | import urllib
 3 | import random
 4 | from urllib.error import URLError
 5 | from urllib.request import ProxyHandler, build_opener
 6 | import re
 7 | def get_ip():
 8 |     fr=open('ip.txt','r')
 9 |     ips=fr.readlines()
10 |     new=[]
11 |     for line in ips:
12 |         temp=line.strip()
13 |         new.append(temp)
14 |     ip=random.choice(new)
15 |     return ip
16 |     print(ip)
17 | proxy =get_ip()
18 | proxy_handler = ProxyHandler({
19 | 'http': 'http://' + proxy,
20 | 'https': 'https://' + proxy
21 | })
22 | opener = build_opener(proxy_handler)
23 | import  threading
24 | class One(threading.Thread):
25 |     def __init__(self):
26 |         threading.Thread.__init__(self)
27 |     def run(self):
28 |         try:
29 |             for i in range(1,5,2):
30 |                 pageurl='http://www.58pic.com/piccate/3-0-0-p'+str(i)+'.html'
31 |                 data =urllib.request.urlopen(pageurl).read().decode('utf-8','ignore')
32 |                 pat='class="card-trait".*?src="(.*?).jpg!'
33 |                 image_url=re.compile(pat).findall(data)
34 |                 print('url个数',len(image_url))
35 |                 for j in range(0,len(image_url)):
36 |                     try:
37 |                         this_list=image_url[j]
38 |                         this_url='https:'+this_list+'.jpg!w1024_0'
39 |                         file='D:/软件（学习）/Python/Test/chapter6/qiantu.photo/'+str(i)+str(j)+'.jpg'
40 |                         urllib.request.urlretrieve(this_url,file)
41 |                         print('第'+str(i)+'页第'+str(j)+'个图片成功')
42 |                     except urllib.error.URLError as e:
43 |                         print(e.reason)
44 | 
45 |         except URLError as e:
46 |             print(e.reason)
47 | 
48 | 
49 | class Two(threading.Thread):
50 |     def __init__(self):
51 |         threading.Thread.__init__(self)
52 | 
53 |     def run(self):
54 |         try:
55 |             for i in range(2, 5, 2):
56 |                 pageurl = 'http://www.58pic.com/piccate/3-0-0-p'+str(i)+'.html'
57 |                 data = urllib.request.urlopen(pageurl).read().decode('utf-8', 'ignore')
58 |                 pat = 'class="card-trait".*?src="(.*?).jpg!'
59 |                 image_url = re.compile(pat).findall(data)
60 |                 for j in range(0, len(image_url)):
61 |                     try:
62 |                         this_list = image_url[j]
63 |                         this_url = 'https:'+this_list + '.jpg!w1024_0'
64 |                         file = 'D:/软件（学习）/Python/Test/chapter6/qiantu.photo/' + str(i) + str(j) + '.jpg'
65 |                         urllib.request.urlretrieve(this_url, file)
66 |                         print('第' + str(i) + '页第' + str(j) + '个图片成功')
67 |                     except urllib.error.URLError as e:
68 |                         print(e.reason)
69 | 
70 |         except URLError as e:
71 |             print(e.reason)
72 | one=One()
73 | one.start()
74 | two=Two()
75 | two.start()


--------------------------------------------------------------------------------
/Photo_taobao/ip.txt:
--------------------------------------------------------------------------------
 1 | 113.116.245.211:9797
 2 | 124.232.133.199:3128
 3 | 113.116.245.211:9797
 4 | 163.204.240.140:9999
 5 | 1.197.203.240:9999
 6 | 49.51.155.45:8081
 7 | 163.204.241.198:9999
 8 | 125.62.27.53:3128
 9 | 61.128.208.94:3128
10 | 120.83.101.8:9999
11 | 121.233.251.11:9999
12 | 1.198.72.173:9999
13 | 110.172.221.241:8080
14 | 27.191.234.69:9999
15 | 116.196.90.181:3128
16 | 101.231.234.38:8080
17 | 110.172.221.241:8080
18 | 163.204.244.138:9999
19 | 121.69.46.177:9000
20 | 121.233.207.221:9999
21 | 


--------------------------------------------------------------------------------
/Photo_taobao/taobao_photo.py:
--------------------------------------------------------------------------------
 1 | from urllib import request
 2 | import urllib
 3 | import urllib.parse
 4 | import re
 5 | from urllib.error import URLError
 6 | key='连衣裙'
 7 | key=urllib.request.quote(key)
 8 | headers=('user-agent',"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36")
 9 | opener=urllib.request.build_opener()
10 | opener.addheaders=[headers]
11 | urllib.request.install_opener(opener)
12 | for i in range(1,20):
13 |     url='https://re.taobao.com/search?spm=a231k.8165028.0782702702.204.60792e63WFZKub&prepvid=300_11.10.228.22_44360_1543657608665&extra=&keyword='+key+'&frontcatid=&isinner=1&refpid=420435_1006&page='+str(i)+'&rewriteKeyword&_input_charset=utf-8'
14 |     print(url)
15 |     data=urllib.request.urlopen(url).read().decode('utf-8','ignore')
16 |     #pat='<a href="(.*?)"'
17 |     pat='img data-ks-lazyload="(.*?)_260x260.jpg'
18 |     image_url=re.compile(pat).findall(data)
19 |     for j in range(0,len(image_url)):
20 |         this_img=image_url[j]
21 |         file='./taobao_photo/'+str(i)+str(j)+'.jpg'
22 |         urllib.request.urlretrieve(this_img,filename=file)
23 |     print(image_url)
24 | 


--------------------------------------------------------------------------------
/Photo_taobao/taobao_photo/simple_show.PNG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/why19970628/Python_Crawler/23ba5cf5ad12d5d0f9f3d2376c0c0ea32fc3d2de/Photo_taobao/taobao_photo/simple_show.PNG


--------------------------------------------------------------------------------
/QiDian_Story/add_txt.py:
--------------------------------------------------------------------------------
 1 | ﻿import os
 2 | def ListFilesToTxt(dir, file, wildcard, recursion):
 3 |     exts = wildcard.split(" ")
 4 |     files = os.listdir(dir)
 5 |     for name in files:
 6 |         #print(name)
 7 |         fullname = os.path.join(dir, name)  ##吧目录和文件名生成一个目录
 8 |         if (os.path.isdir(fullname) & recursion):  #判断路径是否为文件夹
 9 |             ListFilesToTxt(fullname, file, wildcard, recursion)
10 |         else:
11 |             for ext in exts:
12 |                 if (name.endswith(ext)):
13 |                     file.write(name + "\n")
14 |                     break
15 | 
16 | 
17 | def Test():
18 |     dir = "../爬取起点小说_语音识别/凡人修仙之仙界篇"
19 |     outfile = "../爬取起点小说_语音识别/凡人修仙之仙界篇/A目录.txt"
20 |     wildcard = ".txt .exe .dll .lib"
21 | 
22 |     file = open(outfile, "w")
23 |     if not file:
24 |         print("cannot open the file %s for writing" % outfile)
25 |     ListFilesToTxt(dir, file, wildcard, 1)
26 |     file.close()
27 | 
28 | 
29 | Test()
30 | 


--------------------------------------------------------------------------------
/QiDian_Story/binaries.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/why19970628/Python_Crawler/23ba5cf5ad12d5d0f9f3d2376c0c0ea32fc3d2de/QiDian_Story/binaries.txt


--------------------------------------------------------------------------------
/QiDian_Story/get_xiaoshuo.py:
--------------------------------------------------------------------------------
 1 | import requests
 2 | from urllib import request
 3 | from lxml import etree
 4 | import os
 5 | header = {
 6 |     'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36'}
 7 | 
 8 | class Spider(object):
 9 |     def start_request(self):
10 |         url = 'https://www.qidian.com/all'
11 |         req = request.Request(url,headers=header)
12 |         html= request.urlopen(req).read().decode('utf-8')
13 |         html=etree.HTML(html)
14 |         print(html)
15 |         bigtit_list=html.xpath('//div[@class="book-mid-info"]/h4/a/text()')   ##爬取所有的文章名字
16 |         bigsrc_list = html.xpath('//div[@class="book-mid-info"]/h4/a/@href')
17 | 
18 |         print(bigtit_list)
19 |         print(bigsrc_list)
20 |         for bigtit,bigsrc in zip(bigtit_list,bigsrc_list):
21 |             if os.path.exists(bigtit)==False:
22 |                 os.mkdir(bigtit)
23 |             self.file_data(bigsrc,bigtit)
24 |     def file_data(self,bigsrc,bigtit):   #详情页
25 |         url="http:"+bigsrc
26 |         req = request.Request(url, headers=header)
27 |         html = request.urlopen(req).read().decode('utf-8')
28 |         html = etree.HTML(html)
29 |         print(html)
30 |         Lit_tit_list = html.xpath('//ul[@class="cf"]/li/a/text()')  #爬取每个章节名字
31 |         Lit_href_list = html.xpath('//ul[@class="cf"]/li/a/@href')  #每个章节链接
32 |         for tit,src in zip(Lit_tit_list,Lit_href_list):
33 |             self.finally_file(tit,src,bigtit)
34 | 
35 |     def finally_file(self,tit,src,bigtit):
36 |         url = "http:" + src
37 |         req = request.Request(url, headers=header)
38 |         html = request.urlopen(req).read().decode('utf-8')
39 |         html = etree.HTML(html)
40 |         text_list = html.xpath('//div[@class="read-content j_readContent"]/p/text()')
41 |         text = "\n".join(text_list)
42 |         file_name = bigtit + "\\" + tit + ".txt"
43 |         print("正在抓取文章：" + file_name)
44 |         with open(file_name, 'a', encoding="utf-8") as f:
45 |             f.write(text)
46 | 
47 | spider=Spider()
48 | spider.start_request()


--------------------------------------------------------------------------------
/QiDian_Story/凡人修仙之仙界篇/A目录.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/why19970628/Python_Crawler/23ba5cf5ad12d5d0f9f3d2376c0c0ea32fc3d2de/QiDian_Story/凡人修仙之仙界篇/A目录.txt


--------------------------------------------------------------------------------
/QiDian_Story/凡人修仙之仙界篇/仙界篇外传一.txt:
--------------------------------------------------------------------------------
 1 | 　　“在下韩立，是从灵界飞升的。这里就是传闻中的飞台！除道友外，好像并无其他人。”韩立单手轻轻一按池水，就一下飞出了水池，身上青光一闪，更是无声息的一点水渍都没有了。
 2 | 　　这时他已经发现，掌天瓶元合五极山等重宝全都安然无恙的在其体内，倒是魔光等人却藏在灵兽袋中全无任何反应了。
 3 | 　　而此地看起来只是一个十分简易高台，除了中心处的水池外，高台四周则被一层蔚蓝光幕笼罩。
 4 | 　　光幕之外，隐约可见鹅毛大雪漫天飞舞，外面竟是一个晶莹的冰雪世界。
 5 | 　　“原来是韩道友。灵界话，这可有些头痛了？下面的那些界面，十个倒有两三个都叫此名的。算了，我就随便给你登记一下吧。反正上面一般也不会来查这种事情的。”高升先是眉头一皱，但马上又一展的无所谓说道。
 6 | 　　接着他两手虚空一抓，手中顿时出现了一面晶莹玉书和一根淡银色巨笔，并飞快往上面书写着什么。
 7 | 　　韩立心中虽然有一分诧异，但面上并未露出，反而站在原地静静看着对方的举动。
 8 | 　　片刻后，高升两手光芒一闪，玉书和巨笔同时消失不见，取而代之的多出了一张紫金色的薄薄玉牌，并面带笑容的冲韩立递了过来。
 9 | 　　“这是？”韩立并没有马上接过来，反而不动声色的问了一句。
10 | 　　“这是道友的仙牌，每一名飞升仙人登记在册后，都可以领取的东西。道友以后凭此东西，可以进入各大仙城而无需遭受任何盘查的，并且还进入一些特殊地方，而无需缴纳任何费用的。呵呵，这可只是你们飞升仙人才能有的待遇，像我等这些从仙界本土真仙可没有这般待遇的。”高升用一种异常羡慕的口气说道。
11 | 　　“哦，这是为何？”韩立终于露出一丝讶然来。
12 | 　　“真仙界无边无际，本土仙人更是数以亿万记，但从下界靠自己力量飞升上来的仙人却少之又少的。但各个仙域中的高阶仙人中，飞升仙人和本土仙人数量却差不了多少。这代表了什么，道友应该很清楚了吧。”高升含笑的解释道。
13 | 　　“原来如此，我有几分明白了。既然仙界有此规定，那我也就不客气了。”韩立有些恍然，点了点头，接着一抬手，就将紫金色玉牌接了过来，低首一看后，只见上面赫然铭印着自己的全身影像，栩栩如生，旁边则还标注了自己名字还有“飞升”等几个金篆文。
14 | 　　韩立袖子一抖，玉牌顿时一闪的不见了踪影，口中又一笑的问道：
15 | 　　“听高兄口气，仙界似乎颇有秩序，但不知道友口中的上面指的是……”
16 | 　　“此事韩兄不用多问，只要看完这枚记忆石中所记东西，也就大概有些了解了。”高升没在多说什么，手一扬，又抛过来一枚蓝汪汪的晶石，回道。
17 | 　　“那韩某多谢了。”韩立一把将晶石抓住，脸上露出一丝谢意，将其往额头上一贴，也就明白了使用方法，再略一催动法决后，就将神识浸入了其中。
18 | 　　足足一盏茶工夫后，韩立才轻吐一口气的将晶石从额头上拿开，并缓缓说道：
19 | 　　“原来如此，仙界如今被分成无数仙域，而这些仙域则又被各个仙宫加以监管着。这倒有些意思，只是监管而不统治？”
20 | 　　“道友果然是明白之人。既然这样，我给道友明说一下仙界如今的大势情形了。不知道友可有兴趣听上一二！”高升听到韩立的言语，脸上却浮现一丝奇怪之色的问道。
21 | 　　“还请高兄指点一二。”韩立一拱手，脸色凝重了几分。
22 | 　　“韩道友不用客气。仙界情形的确和仙界颇有些的不同的。仙域虽然名义上都是以仙宫为尊，但实际上每个仙域都有不少实力不在仙宫之下的大势力存在，甚至有些超强实力还能稳稳压住自己仙域仙宫一头。只是诸位道祖都需要仙界维持一定的秩序，以免出一些不好收拾的大乱子，这才默认各个仙域名义上都以仙宫为主的。当然这些仙宫本身也是仙域中有数的大势力，否则也根本无法服众的。而这些仙宫执掌者也大都很识趣，一般也不会去触怒其他和自己相当的大势力，所以现在的仙界还算安宁。只要不去触犯某些仙界禁律，自呢个在仙界能好好逍遥的。而道友是飞升仙人，情形和本土仙人还略有不同的。刚飞升真仙纵然经过雷劫洗礼，体内真元已经开始改变，可以接受仙灵力了，但若转化完全恐怕还需要数百年时间之久的。若在下是道友的话，必定会选择仙域某一势力依附，等体内真元彻底转化完成后，才会再考虑其他的事情。否则道友是初临仙界，光是日常的灵石消耗，恐怕也是一笔不小的数目。”高升也不再客气了，侃侃而谈了一番。
23 | 　　韩立一边听着，一边露出若有所思的神色。
24 | 　　“多谢高兄指点，听道友口气也不是仙宫之人吧。”韩立笑了一笑后，忽然问道。
25 | 　　“韩道友看出来了，在下的确不是仙宫之人，而是石矶殿中人。之所以会出现在此，只不过是在应付仙宫交给本殿的一些差事而已。不过若是道友若肯现在加入本殿的话，高某自然是极为欢迎的。石矶殿不但能够提供其他势力所给的一切待遇，并且我还可做主，让道友在本殿登天阁中免费任选一部高阶仙家功法修炼。”高升也笑了起来，丝毫没有隐瞒之意的回道。
26 |         


--------------------------------------------------------------------------------
/QiDian_Story/凡人修仙之仙界篇/仙界篇外传二.txt:
--------------------------------------------------------------------------------
 1 | 　　“仙家功法？”韩立双目一眯，反问了一句。
 2 | 　　“仙家功法是专门供仙人以上修炼的法决，只有修炼此等功法才能真正炼化仙灵力，让低阶仙人一步步向高阶仙人之境前进。不过大部分低中阶仙家功法即使修炼圆满，也只能让人提高一两个境界而已，到时又不得不再挑选一部重新修炼了。只有那些各大势力各自珍藏的高阶仙家功法，才具有让真仙直接修炼到极高境界的能力。而此种等阶仙家典籍，在外面是少有流传的。即使有那么一些，也都是一些大路货色而已。”高升似乎看出了韩立心中疑惑，主动解释了一番。
 3 | 　　“哦，难道没有直通道祖境界的仙家法门吗？”韩立缓缓问道。
 4 | 　　“道祖？哈哈，韩道友志气之高还真不是一般人可想的。但可惜，哪怕是仙界传闻的几大神典，也没有哪一部可以让人直接修成道祖的。现在仙界已知的数百道祖，每一人都是经历了不知多少次劫难，外加承受了庞人难以想象的造化天缘，才能最终有此境地的。就算最高深莫测的仙家功法，也只能让人修炼到道祖下的大罗境，再往上的话，只能完全靠自己了。”高升露出了一丝怪异的神色，但哈哈一笑后，还是加以解释了一番。
 5 | 　　“看来成就道祖，是一件可望不可及的事情！但不知道祖之下，还有几层仙人境的。”韩立闻言，未流露出惊异的神色，反而点点头后又问道。
 6 | 　　“虽然我等被下界人统称为真仙，按实际上真仙境只是仙人中低阶存在，上面还有金仙、太乙、大罗等几大境界。每一大境界的修炼，动不动都要百万年以上的苦修，故而其中还会再分细分几个阶位。纵然是同一大境界之人，有阶位相差的话，神通实力也会天壤之别的。”高升不加思索的回道。
 7 | 　　“高兄现在是何种境界”韩立若有所思的问道。
 8 | 　　“惭愧的很，在下修成真仙已经数百万年之久了，但一直卡在真仙高阶无法再进一步。若是没有其他的造化，此生也只能在此境界渡过了。”高升闻言，苦笑了起来。
 9 | 　　韩立听了这话，脸色微微一变。
10 | 　　对方实力他丝毫无法看透，竟然还只是真仙境而已。
11 | 　　“这般说来，像在下这般的新进飞升仙人，应该只是真仙境低阶了。”韩立喃喃说道。
12 | 　　“低位？勉强算是吧！毕竟韩道友现在仙元力还未彻底转化过来，只能算是伪仙，还要再等上数百年时间，才能算是真正进入低阶真仙境。”高升笑了一笑的回道。
13 | 　　韩立听了自然更是半晌无语，好一会儿，才再次问道：
14 | 　　“听高升口气，仙界中除了我等真仙外，普通修炼者也不少吧。”
15 | 　　“仙界中的真仙以下修炼者的确存在。我等仙人纵然神通广大，但也不能保证自己后代也一定同样能修炼到仙人境界的，这些人因为无法吐纳接受仙灵力，寿元有限，只能再自行结婚生子，又会诞生更多的凡人。如此不断循环下，人数之多可想而知了。”高升似乎对这些凡人颇不以为然。“
16 | 　　“原来如此！看来一部高阶仙家功法，对我等这些真仙的确十分重要了。”韩立点了点头，开始思量起来。
17 | 　　“我们石矶殿对对飞升仙人算是颇为看重的了，其他势力不会开出比此好多少的条件。而且道友可以先在本殿担任外门客卿之位，若是过了几百年真觉得本殿真不适合道友的话，只要拿出和所选仙家功法相等的东西，也可自行离去的，本殿并不会加以阻拦分毫的。”高升似乎看出了韩立心中的最后一丝犹豫，又如此的劝说道。
18 | 　　“还有这种条件！好，既然高兄说的如此之好，那韩某就先去贵殿亲眼看上一看吧。”韩立目光一闪后，终于做出了决定。
19 | 　　就像对方所说，他现在最重要的事情还是先转化体内仙元力，将真仙境真正巩固下来。
20 | 　　在此情形下，他先依附一大势力后，再慢慢了解整个仙界，也是一个不错的选择。
21 | 　　高升听到韩立回复，自然大喜，连称韩立绝不会后悔后，就袖子一抖，一道虚影从其背后一分而出，再一凝后，赫然幻化成另外一名身穿银袍的“高升”。
22 | 　　“你在这里先替我当值一段时间，我先带韩道友去石矶殿。”
23 | 　　“那要快去快回了，万一被此区域的监察使发现，我可无法替你掩饰多久的。”银袍高升面无表情的回道。
24 | 　　“这个你不用担心。这座飞仙台地处如此偏远之地，监察使者怎会轻易到此，上一次有监察使者出现，是好几百年前的事情了吧。再说我只是送人，多则半月少则数日就会返回的。”高升毫不在意的说道。
25 | 　　“你有数就行，到时候可别真连累到我了。”银袍高升不再多说什么，几步走到水池边上盘膝坐下，闭上双目，竟就此入定起来。
26 | 　　韩立见此情形，脸上自然有一丝讶色浮现。
27 | 　　“让韩兄见笑了，这是我修炼的一具垢土化身，虽然实力还不及我，但对付一名低阶真仙还是绰绰有余的。有他替在此轮值，我也可安心离去了。走，高某这就送是道友去石矶殿。”高升一边笑着解释道，一边单手掐诀冲高台外光幕一点。
28 | 　　“噗”的一声。
29 | 　　光幕应声而开的现出一个一人高圆形通道。
30 | 　　高升大步从中一走而出，韩立则一飘的紧随而去。
31 | 　　方一离高台的瞬间，漫天大雪铺天而下，同时一股奇寒之力迎面一卷而来。
32 | 　　纵然以韩立的肉身强横，也不禁眉头一皱。
33 | 　　旁边高升目睹韩立情形，脸上不禁一丝讶色闪过。
34 | 　　北寒仙域既然以寒字作为仙域之名，平常时外面温度就冰冷无比，若是碰到这样的雪天，空气中蕴含的奇寒之力厉害更是可想而知了。
35 | 　　即使他们这些本仙域土生土长的低阶仙人，也必须在佩戴一两件特殊炼制的防寒器物，才敢直接面对雪天的。
36 | 　　而韩立这般一名刚刚飞升的仙人，竟也能面不改色的视此雪如无物，这怎不让高升暗暗心惊。
37 | 　　高升心念飞快转动着，但手中动作却丝毫不停。
38 | 　　只见他蓦然往下方打出一道白色法决，下方厚厚雪层中数声清鸣之音传出。
39 | 　　在雪花狂舞之中，两头通体洁白的巨大孔雀从雪地中一冲而出。
40 | 　　“道友请！”
41 | 　　高升脚步一抬的上了其中一头雪孔雀后，用手指一点另外一头，笑着冲韩立说道。
42 | 　　韩立微微一笑，也身形一晃的出现在了另一头孔雀上。
43 | 　　在高升一低喝后，两头仙禽当即展翅而飞，迎着晶莹雪花的冲天而起。
44 | 　　而韩立在仙界行程，则就此踏出了第一步。
45 |         


--------------------------------------------------------------------------------
/QiDian_Story/凡人修仙之仙界篇/第三章 远去.txt:
--------------------------------------------------------------------------------
 1 | 　　女童呆呆看着眼前发生的一切，小嘴半张，似乎还没有从方才的震惊中缓过神来。
 2 | 　　她目光愣愣的望着虬髯大汉三人的尸体残躯，身子一软的坐倒在地，两行泪水从眼中催然淌下。
 3 | 　　她起初只是嘤嘤的小声啜泣，似乎是想到了什么伤心事，哭声骤然变大。
 4 | 　　如江水崩堤般爆发的嚎啕声，再次打破了荒地的宁静，就似一曲让人肝肠寸断的哀歌，在向老天爷倾诉着人间的坎坷与不平。
 5 | 　　也不知过了多久，哭声戛然而止。
 6 | 　　女童已再次站了起来，秀拳紧握，小脸上的乌黑和血渍已被泪水冲尽，再次露出一张清丽娇嫩的面庞，只是一双大眼中似乎比之前多了一些什么。
 7 | 　　突然，她抬腿奔向了前方不远处，另一块灰白巨石方向。
 8 | 　　巨石下方，一具血肉模糊的残躯躺在血泊中，依稀可辨是那虬髯大汉，早已死透，离巨石数丈远的地方，还有一具犹如烂泥般的短小男子尸体，应是此前那马脸男子。
 9 | 　　至于齐姓道士，则在方才高大青年的隔空一拳下，整个人直接爆裂开来，可谓尸骨全无，只在附近可以看到一些碎肉和血迹。
10 | 　　女童身形跃至巨石下，两手抬起时，十指蓦然长出数寸长的青色指甲，朝虬髯大汉残躯猛地一阵挥舞。
11 | 　　破空之声传来，一道道青色爪芒飞射而出，纷纷击在残躯上。
12 | 　　鲜血四溅，本就残破不堪的尸体，眨眼间被撕扯成了一堆碎肉。
13 | 　　她似乎还不解恨，又张口喷出一股碧绿火焰，将这些碎肉化为了灰烬，挫骨扬灰。
14 | 　　紧接着，她又将马脸男子的尸体也如法炮制一番，这才作罢。
15 | 　　做完了这些，女童双膝一软，再次坐倒在地，气喘吁吁起来。
16 | 　　她体内刚刚恢复一点的法力再次耗尽。
17 | 　　“阿爹，阿娘，大哥，二姐……血刀会的恶贼终于死了一个，虽非孩儿亲手所杀，不过你们的大仇总算报了一点。你们放心，只要孩儿还有一口气在，早晚有一日会杀上血芒山，让血刀会从这个世上消失。”女童稍微平复了一下心绪，便挪动身子，朝着一个方向跪了下来，嘴里喃喃说着。
18 | 　　说完此话，女童眼圈又有些发红，泫然欲泣，不过又被强行忍住。
19 | 　　“不哭，阿爹说过，经常哭的人长不大，我要快点长大！”女童
20 | 　　好一会，女童才哽咽着的将心中哭意压下，再次站了起来，看了一眼地上虬髯大汉三人遗落的储物袋。
21 | 　　她神情间有些厌恶，不过微一犹豫，还是将这些东西尽数捡起，收了起来。
22 | 　　随着骄阳西移，天色变得有些昏暗，风也逐渐大了起来，呜呜作响，气温也变凉了几分。
23 | 　　望着有些苍茫的四周，女童不觉有些害怕，微微蜷缩起了身体，下意识的朝着身旁唯一的活人，那个高大青年靠过去一些。
24 | 　　高大青年在击出那一拳之后，便再次恢复了先前呆滞模样，木然的站在原地，低首望着自己的双脚，对于女童刚刚做的那些事情，仿若未闻。
25 | 　　“这位……石头哥哥……”女童没敢靠太近，有些迟疑的轻声叫唤一声。
26 | 　　高大青年毫无反应。
27 | 　　“石头哥哥，我叫柳乐儿，刚才谢谢你杀了那三个贼人。虽然你也是人族，不过爹爹说过，人族里面也有好人的。”女童怯生生的说道。
28 | 　　高大青年身体动了一下，终于有了些反应，头稍稍抬起，看了柳乐儿一眼。
29 | 　　他瞳孔中倒映出女童的身影，呆滞的眼神中似乎闪过一丝光芒，随即又变得浑噩起来，不过眼睛始终看着柳乐儿。
30 | 　　这让柳乐儿惊了一大跳，连忙往后退了几步。
31 | 　　不过高大青年只是这么呆呆的看着她，没有其他举动，也没有动弹分毫。
32 | 　　柳乐儿心中松了一口气，更加确定青年脑子真有些问题，随之胆子渐渐大了，又尝试着走近了一些，好奇的打量起眼前之人。
33 | 　　之前慌乱之下也不及细看，此刻走近了一些，柳乐儿才看的更加清楚。
34 | 　　这青年身形高大，手指修长，身上筋肉并不如何粗大，不过却给人一种蕴含无穷力量之感。
35 | 　　其双眼虽然茫然无神，但是一对瞳孔却漆黑无比，看的久了仿佛内将人的魂魄吸进去，裸露在外的皮肤微黑光滑之极，经历刚刚一场激斗，竟是一丁点痕迹也没有留下。
36 | 　　他身上的青色衣衫看起来普普通通，刚刚被雷劈刀砍，竟然也没有受到什么损伤。
37 | 　　这一切加上先前青年一口将那不同寻常血雾吸入口内的事，都表明其绝对不是普通人，更不是一名凡人。
38 | 　　凡人又怎可能两三下就打杀了三名拥有法器的修士。
39 | 　　女童出神的打量着高大青年，高大青年仍旧没有什么反应，心情越发放松下来，许是大劫过后的童心萌发，绕着高大青年走了一圈。
40 | 　　青年眼睛始终不离柳乐儿，似乎柳乐儿身上有什么吸引他的地方。
41 | 　　或许是因为青年出现使她逃脱一劫，还帮她手刃了三个仇人，女童觉得眼前高大青年越看越亲切。
42 | 　　“咦！”
43 | 　　柳乐儿忽的轻咦一声，高大青年胸前，隐约露出一个墨绿色的小饰物，晶莹剔透，不知是什么。
44 | 　　她想要掀开衣服看个仔细，不过又有些不敢。
45 | 　　就在此刻，周围风越来越大，天空一阵风起云涌，出现大片乌云，黑压压的低垂，让周围的光线更加阴暗。
46 | 　　轰隆！
47 | 　　一道粗大的闪电撕裂乌云，照亮了半个天幕，发出巨大的雷鸣，哗啦啦的雨滴倾盆而下。
48 | 　　“啊！”
49 | 　　柳乐儿发出一声惊呼，下意识的躲到了高大青年身下，抱住了青年的大腿，娇小身体瑟瑟发抖。
50 | 　　她乃是妖狐之身，对天地雷霆有种莫名的畏惧。
51 | 　　高大青年眼睛里隐隐又亮起一丝神采，不过很快再次暗淡下去，不知是有意还是无意，身子微微躬下几分，高大的身体笼罩住柳乐儿，挡住了外面的无尽风雨。
52 | 　　女童心中不觉涌起一股暖意，此时对于外面风雨雷电不再害怕，反而心安无比，这种感觉，就好像以前在父亲怀中一样。
53 | 　　这场雨来得快，去的也快，没过多久便云开雨散，一股属于草木的清新之气散发开来。
54 | 　　柳乐儿甩了甩身上的雨水，嘻嘻一笑，拉起了高大青年的手，给他弹掉衣服上的水珠。
55 | 　　其身上的青色衣衫不知是何种材料制成，雨水落在上面，好像荷叶皮子一般滴溜溜凝成一颗颗水珠，丝毫浸不进去。
56 | 　　高大青年对女童的举动一如既往没有什么表示，不过也没有反对，任由其摆弄。
57 | 　　“对了，石头哥哥，乐儿还不知道你叫什么？”柳乐儿拉着高大青年，尝试让其坐下。
58 | 　　高大青年竟真听话的慢慢坐了下来，但还是一如既往的没有说话。
59 | 　　“石头哥哥，你为什么会出现在这里呀？”
60 | 　　“石头哥哥，那一拳好厉害啊，能教教乐儿吗？”
61 | 　　“石头哥哥……”
62 | 　　柳乐儿有些不甘心，尝试了数种方式想要和对方交流，然而不管说什么，青年都没有什么反应，她不禁再次失望起来。
63 | 　　“石头哥哥，虽然不知道你是谁，不过你杀了血刀会的人，还是和我一起离开这里吧。”女童想了想后，终于下定决心，一把拉住高大青年宽大手掌，恳求说道。
64 | 　　高大青年虽然浑浑噩噩，但在柳乐儿连说带比划了好一会后，似乎明白些什么，眨了眨眼后，终于跟着女童缓缓离开了。
65 | 　　暮色渐昏，残阳似血。
66 | 　　整片荒地在余晖的沐浴下，变得有些金光璀璨。
67 | 　　一大一小两个身影便向着落日的方向，渐行渐远，隐约随风传来柳乐儿一人几分欢喜的说话声。
68 | 　　“石头哥哥，我知道你很厉害，但那些血刀会还有很多坏人！”
69 | 　　“天色不早了，你一定饿了吧。”
70 | 　　“等离开这里，乐儿给你打几只野鸟儿烤给你吃。乐儿的手艺不错哦！”
71 | 　　“石头哥哥，以后乐儿就把你当亲哥哥吧！”
72 |         


--------------------------------------------------------------------------------
/QiDian_Story/凡人修仙之仙界篇/第五章 马兽.txt:
--------------------------------------------------------------------------------
 1 | 　　明远城，丰国境内的第三大城。
 2 | 　　此城地处平原，占地足有百里，城南一条大江绵亘蜿蜒，水陆两路交通都极为便利，也促成了此城的繁华。
 3 | 　　此时，城门口入城的人群排成一条长队，声音嘈杂。
 4 | 　　柳乐儿拉着高大青年混在人群中，心中有几分忐忑不安，目光不时瞟向数丈高的城门上方。
 5 | 　　那里悬挂着一面八角铜镜，正对着城门口方向。
 6 | 　　此时日上三竿，铜镜上刻画的一副八卦图案在阳光下光芒流转，散发出一股堂堂瑞气。
 7 | 　　入城只需向守卫缴纳一些银钱，检查看起来并不严，很快便轮到了柳乐儿和青年。
 8 | 　　两人来到城门下，正对着城门上的八卦铜镜，一股莫名的力量笼罩住了两人。
 9 | 　　柳乐儿身体显得有些僵硬，低下了头。
10 | 　　高大青年直直看向那八卦铜镜，目光呆滞，但谁也没有注意到，其瞳孔深处一缕蓝芒一闪而逝，但铜镜上面丝毫异样没有显现。
11 | 　　“你们是哪里人士？入城做些什么？”一名中年男子的城门守卫，看了二人一眼，懒洋洋问道。
12 | 　　“几位大哥，我们兄妹是城西北三百里，柳家镇人士，我叫柳乐儿，这是我兄长柳石，来城中投奔亲的，顺便给哥哥治病。”柳乐儿小脸满是笑容，口中飞快说道。
13 | 　　这些年她和高大青年虽然相依为命，但仍不免和外人接触，为了方便，便给这位“石头哥哥”取了一个柳石的名字。
14 | 　　说着话，柳乐儿飞快取出一些铜钱递给此守卫，比应缴的入城费用略多了一些。
15 | 　　中年守卫见此，脸上露出一丝满意之色，将多出铜钱不动声色塞入了自己袋中，多看有些呆滞的高大青年柳石一眼，没有再多问的挥手道：
16 | 　　“看你们兄妹也不可能是歹人，进去吧。”
17 | 　　柳乐儿答应了一声，拉着柳石快步进城，走出好一段距离，距离城门远了，才在一个无人角落处放慢了脚步，松了口气。
18 | 　　“幸好身上还有爹爹当年给我的引气符，能遮住本身妖气，没被照妖镜发现。”
19 | 　　柳乐儿看看四下无人注意，低声嘟囔几句，才从怀中取出一枚青色玉符。
20 | 　　玉符两寸长，二指宽，上面刻满了青色花纹，构成了一个繁杂的法阵，一道道柔和青光在上面流动，仿佛流水一般。
21 | 　　她看着手中之物，眼中闪过一丝伤感后，将小心的将玉符重新藏了起来。
22 | 　　转过两个巷口，二人便来到明远城主街道上。
23 | 　　只见此街道宽阔，足足能让三辆马车并排行驶，街道两旁都是高大宽敞的商铺建筑，鳞次栉比，一直连接到视野尽头。
24 | 　　不过这里建筑砖瓦用的不多，大多数都是以木料建屋，虽然房屋都不是非常高大，极少有超过十丈的高楼，不过胜在细微精巧，很是新奇。
25 | 　　柳乐儿初次来到明远城这等大城，周围熙熙攘攘的人群让她颇有些害怕，身体紧挨着柳石。
26 | 　　不过周围虽然人流如川，不过各行其是，并没有人过来干涉二人，让她心情也逐渐放松下来，开始被城内各种新鲜玩意吸引了注意，拉着柳石在街道上兴致勃勃的闲逛起来。
27 | 　　“石头哥哥快看那边！我听说过那东西，看起来果然很好吃的样子。”柳乐儿目不转睛的看着不远处一个卖冰糖葫芦的小贩。
28 | 　　柳石漆黑的眼眸中倒映着周围热闹的场景，面无表情，一副对这些完全无动于衷的模样。
29 | 　　柳乐儿正要拉着柳石过去，忽的看到青年这个样子，心中一阵失落，立刻想起了此行进城的目的，忙握紧了青年的手掌，认真说道。
30 | 　　“石头哥哥，你放心，这座城那么大，肯定有大夫能治好你的。”
31 | 　　柳石闻言，目光微微闪动了一下。
32 | 　　柳乐儿拉着柳石在街边饭摊随意的吃了些东西，找人询问了一下，很快问到了附近两家医馆的位置。
33 | 　　城西，李氏医馆。
34 | 　　李氏医馆已经在此地行医百年，算得上是老字号。
35 | 　　一个身着青布长衫的中年男子端坐木椅，三根手指按在柳石腕部，凝神细查脉象，柳乐儿有些紧张的站在一旁。
36 | 　　这中年男子名唤李长青，正是李氏医馆的当代传人，附近一代颇有名气的杏林妙手，行医已经二十余年。
37 | 　　把脉半晌之后，李长青收回手掌。
38 | 　　“令兄六脉平缓有力，气血充沛，身体显然是极好的，怎么会患上失魂症。他的这个病症是何时出现的？可有什么外力因由吗？”李长青蹙眉看向柳乐儿，问道。
39 | 　　“我和兄长多年未见，对他的病因也是一无所知。”柳乐儿摇头道。
40 | 　　“那便难办了，不知病因，便无从治起。请恕在下医道浅薄，有心无力。”李长青一捋长须，有些歉意的说道。
41 | 　　“真的毫无头绪吗？”柳乐儿急道。
42 | 　　“老朽实在无能为力。”李长青摇头道。
43 | 　　柳乐儿心中失望，朝李长青行了一礼，带着柳石走出了医馆。
44 | 　　“这明远城的医馆还有很多，我们一家家看过去，肯定能治好你。”
45 | 　　少女垂头丧气了一会，很快又打气般的对柳石说道。
46 | 　　柳石咧嘴一笑，不知是否听懂了柳乐儿的话。
47 | 　　两人朝着一个方向走去，穿过两条街区，来到另一家医馆门前。
48 | 　　这家医馆灰色外墙，黑瓦铺顶，门庭宽阔，透出一股奢华大气，比起李氏医馆气派了很多，前来求医的人也不少。
49 | 　　“这家医馆如此大，里面大夫医术应该更加高明才对。”柳乐儿满怀期待，拉着柳石走了进去。
50 | 　　半个时辰之后，两人从里面走了出来，少女仍满脸的失望之色。
51 | 　　“不急，还有其他医馆。”柳乐儿很快振奋精神。
52 | 　　接下来的大半日，两人走街串巷，几乎将明远城的大半医馆都走了一遍，但那些大夫都对柳石症状束手无策。
53 | 　　……
54 | 　　城北，野菊斋。
55 | 　　两个身影从里面缓缓踱出，正是柳乐儿和柳石。
56 | 　　柳乐儿一脸失落，低头玩弄着衣角。
57 | 　　这野菊斋虽然不是明远城最大的医馆，但据说这里的大夫对于一些疑难杂症颇有见解，可惜也未能看出柳石的病因。
58 | 　　“小姑娘留步。”就在此刻，一个声音从后面传来，一名头发灰白的青袍老者从后面快步追了上来。
59 | 　　“刘大夫。”柳乐儿微微惊讶，停下了脚步。
60 | 　　这青袍老者，正是刚刚给柳石把脉之人，野菊斋的做堂大夫。
61 | 　　“您老莫非对家兄病情，想到了些什么？”柳乐儿心中蓦然泛起了些许希望，忙问道。
62 | 　　“正是，方才老夫为令兄诊治之后，到后堂翻阅些医典，偶然看到一个病例，和令兄的情况颇为相似。”青袍老者高点了点头。
63 | 　　“大夫请讲。”柳乐儿闻言大喜。
64 | 　　“根据书中记载，令兄症状和寻常失魂症大不相同，倒向是受到诅咒，或者被人下了禁制，伤了神魂。此等伤势并非凡俗大夫所能治疗，只有找到擅长此道的仙师们出手才有治愈可能。至于无法言语之事倒是小事，令兄口舌无碍，只要神魂回复正常，自然就会开口了。”青袍老者接着说道。
65 | 　　柳乐儿听完这些，沉默了下来，半晌后才勉强挤出些可爱笑容：
66 | 　　“多谢刘大夫指点。”
67 | 　　“小姑娘客气了，治病救人本就是我们医者的本分。”青袍老者摇头晃脑一番，就自顾自的返回了屋中。
68 | 　　柳乐儿则带着青年走出了野菊斋，闷闷不乐起来。
69 | 　　“唉，石头哥哥的情况，果然是被人伤了神魂。”少女喃喃自语。
70 | 　　她乃是狐妖之身，虽然年幼，不过对于修仙炼道也有一些认识，这些年从柳石的异状，也早隐隐猜到了其可能是被人伤了神魂。
71 | 　　若要治疗，须得求助于精通神魂之道的修仙者。
72 | 　　只是这样的修仙者，实力都极为强大，单凭一枚引气符，她实在没有把握能瞒过对方。
73 | 　　二人此次来明远城寻人求医，也是抱了万一的念头，希望最好是自己猜错了，不过现在看来是事与愿违。
74 | 　　柳乐儿不禁犹豫起来。
75 | 　　就在她愁眉不展的时候，前方街道上忽的一阵骚动，街上行人乱成一团。
76 | 　　“马兽惊了！”
77 | 　　“快躲开！”
78 | 　　一片惊呼声从前面传来，人群乱成一锅粥，拼命朝着两旁奔去。
79 | 　　只见不远处，一辆银色马车被一头身披鳞片的青色怪马拉扯下，疯了似了狂奔不已，恰好冲向柳乐儿和柳石所在而来。
80 | 　　那青色怪马长声嘶叫，状若疯狂，马车在其身后左右颠簸，赶车之人脸如白纸，拼命拉着马缰，不过一点用也没有。
81 | 　　柳乐儿大惊的拉着青年想要躲避，却已然来不及了
82 | 　　在一阵腥风中，怪马就拉扯马车飞也似的到了二人近前丈许处，少女甚至都能清楚看到青色怪马的满口獠牙和从中甩出的一团团白沫。
83 |         


--------------------------------------------------------------------------------
/QiDian_Story/凡人修仙之仙界篇/第六章 白袍少年.txt:
--------------------------------------------------------------------------------
 1 | 　　“啊！”
 2 | 　　柳乐儿本欲施法阻挡青色怪马，怎奈心神动摇下，体内法力运转不灵，口中不由发出一声惊呼。
 3 | 　　千钧一发之际，她只觉眼前一暗，却是柳石蓦然一步跨出，高大身躯挡在了身前，同时单手闪电般伸出，一把扣住了怪马如水桶般粗细的脖子，身体一侧，和青色怪马撞在了一起。
 4 | 　　“轰”的一声巨响！
 5 | 　　青色怪马在高昂嘶鸣声中，犹如撞在了一堵巨墙上，庞大身躯硬生生停在了原地，由于冲势过猛，甚至附近街道上的坚硬石板都被铁蹄踏得的碎石四溅。
 6 | 　　银色马车则在惯性作用下一头撞在了青色怪马后股上，偏侧的飞出书丈远去，又“砰”的重重落在地面上。
 7 | 　　此车虽然没有翻个顶朝天，但也车身形状大变，掉落一地杂七杂八的零碎东西。
 8 | 　　赶车之人更是一个没坐稳，差点从车上直接翻落下来。
 9 | 　　高大青年却仿佛钉子一般，在原地纹丝不动一下。
10 | 　　附近人群眼见此景，顿时目瞪口呆，某个茶楼上更不知什么人发出一声“神力”的惊叹声。
11 | 　　柳乐儿拍了怕胸口，再看了看挡在身前的高大身影，则心里微微一暖。
12 | 　　但在过去几年中，每逢她遭遇什么危险，这时“石头哥哥”都会这般下意识的挺身而出的。
13 | 　　二人间关系早已不是亲人更胜亲人了。
14 | 　　青色怪马被柳石拦住，更加狂躁，口中嘶鸣下，一低头，硕大头颅又狠狠撞向柳石胸口。
15 | 　　“石头哥哥小心！”柳乐儿见状一惊。
16 | 　　柳石面无表情，扣住马脖子的手臂加力，往下一按。
17 | 　　“噗通”一声，怪马四腿一弯，巨大的身躯直接被压倒，跪倒在了地上，周围的地面石板尽数碎裂。
18 | 　　它全身似乎被一座山压住，骨架几乎要散架一般，双目血光这才褪去，流露出畏惧之色。
19 | 　　面对力气远超于其的柳石，怪马终于老实下来，乖乖卧倒在地上不敢动弹。
20 | 　　“好大力气！这马兽一撞之力恐怕不下于四五千斤，这人竟能轻易拦下！”
21 | 　　“了不起！”
22 | 　　“我说这是谁家府上的马车，竟敢在闹市随意奔驰，若不是这位壮士拦住，不知要有多少人遭殃！”
23 | 　　周围的人群终于大部分反应过来，也纷纷议论起来。
24 | 　　柳石这才木然的放开手臂，站在原地不动了。
25 | 　　青色怪马虽然没了束缚，但还是大口喘息的不敢从地上站起来。
26 | 　　“石头哥哥，你没事吧？”柳乐儿连忙上前查看柳石的身体，见其无恙，这才松了口气。
27 | 　　赶车之人早已面无人色，此刻见怪马被制服，整个人也浑身无力的瘫软在了车辕上。
28 | 　　就在此时，马车车门被推开，两个脸色发白的年轻人跳了下来。
29 | 　　当先一人是个二十来岁的男子，一身月白儒袍，面目英俊。
30 | 　　另一人看起来只有十七八岁，面如美玉，双目黑白分明，唇红齿白，穿着一件雪白长袍，腰缠玉带，头戴玉冠，上面镶嵌着一颗鸽卵大小的一颗明珠，风采远非旁边儒袍同伴可比的。
31 | 　　“你这狗奴才，怎么赶的车，差点摔死本少爷！”那儒袍青年满脸惊恐未定，夹手夺过赶车人手里的马鞭，劈头盖脸抽打。
32 | 　　赶车之人身上被打出一条条血痕，也不敢躲闪，跪地连连磕头求饶。
33 | 　　那儒袍青年见此，却脸上怒容更多，马鞭抽打得越发飞快起来。
34 | 　　“快住口，是余府的人！”
35 | 　　“这不管我们的事，别再多说话了。”
36 | 　　……
37 | 　　一看清楚下来两人的真面目，附近议论声一下嘎然而止，众人看向两名男子的目光全都变得唯唯诺诺起来，明显都认得这二人。
38 | 　　“二哥，罢了。此事也怨不得他，这青风马毕竟是也算是一头低阶妖兽，本就野性难驯。”
39 | 　　一只手臂突然伸了过来，格住了儒袍男子的手腕，马鞭立刻落不下去，却是那年纪稍轻的白袍少年，年声音好似泉水叮咚，异常悦耳动听。
40 | 　　儒袍青年看了白袍少年一眼，嘴角抽动了一下，随即哼了一声，扔掉了马鞭。
41 | 　　“多谢七少爷！”赶车之人对白袍少年连连磕头。
42 | 　　“起来吧，这些银子你拿去，赔偿一下被马车伤到的人和铺子。此事处理的好，自当减你罪责。”白袍少年取出一个袋子，交给赶车之人。
43 | 　　“是，是。”赶车人连连点头，接过银子，朝着那些被撞伤的人走去。
44 | 　　“多亏这位兄台制服了青风马，我兄弟二人受伤事小，若是再伤了其他人，就万死莫赎了。”白袍少年又转身看向柳石，微笑一礼。
45 | 　　儒袍青年见此，也看了柳石一眼，见其面容普通，皮肤黝黑，一身简陋青袍，显得有些寒酸，神情间顿时轻蔑几分，也有些勉强的拱了拱手。
46 | 　　柳石目光木然，也一言不发。
47 | 　　儒袍青年何曾被人如此无视，顿时面露愠怒的要发作，却被白袍少年伸手拦住。
48 | 　　少年细细打量柳石几眼，注意到其眼神有异，心中顿时一动。
49 | 　　周围的人越来越多，柳乐儿心中不觉有些不安，拉着柳石，低声道：“石头哥哥，我们走吧。”
50 | 　　白袍少年此刻才注意到柳乐儿，看清楚其如同瓷娃娃般的精致面容后，眼睛顿时一亮，连忙快步赶了上去，拦住道：
51 | 　　“二位请留步。”
52 | 　　“有什么事？”柳乐儿停下脚步，蹙起眉头，小脸有些凶巴巴的问道。
53 | 　　“我叫余七，刚刚府上马车失控，差点撞伤了二位，在下深感抱歉。”白袍少年满脸笑容的说道。
54 | 　　“我们没事，你让开。”柳乐儿绷着小脸的说道。
55 | 　　“今日之事多亏了这位兄台援手，若不报答，在下心中实在难安。此处离余府已不远，可否请二位到我住处坐下，让在下略尽地主之谊？”余七缓缓说道。
56 | 　　“不用，刚才的事情只是举手之劳，我们兄妹还有事情在身的。”乐儿毫不犹豫的摇摇头，拉着柳石就要绕开面前之人。
57 | 　　“且慢，所谓的要事，莫非是想为令兄求医？”余七身形一晃的又挡在了二人面前，看了高大青年一眼后，忽然神色认真的问道。
58 | 　　“你……你怎么知道此事的？”乐儿吓了一跳，不觉露出了吃惊的表情。
59 | 　　“我天生鼻子很灵敏，你们身上带有些许草药气味，应该刚刚从附近的野菊斋出来。这位兄台虽然神力惊人，但看样子应该是神慧有碍，所以我才如此猜测的，看样子应该没错了。”余七看向不远处的野菊斋，展颜一笑道，其虽然是男子装扮，却在这一笑中浮现出一丝异样的妩媚。
60 | 　　即使乐儿身为一名看起来年龄还更小的女性，也看得一呆，但马上下意识的转首看了旁边柳石一眼，见自己的“石头哥哥”仍然面无表情后，才不知为何的心中微微一松。
61 | 　　这时的白袍少年，已经将“惊艳”的笑容收敛起来，继续说道：
62 | 　　“小妹妹，我们余家在这明远城也有些势力，认得不少名医大家，若是求医，应该能帮上忙。”
63 | 　　“没错，我们是来明远城求医的，但哥哥的病一般大夫是治不了的。”柳乐儿还是摇摇头。
64 | 　　“如此看来，令兄病情并非一般了。不过没关系，我们余家有一位仙师客卿，医术了得，远非寻常世俗凡人大夫可比，不如请他给令兄看看，如何？”余七先皱了下眉，但各看了柳石和乐儿一眼后，又再次抚掌一笑。
65 | 　　“仙师……”柳乐儿眼睛一亮，有几分迟疑了。
66 | 　　“令兄妹千万不要推辞，让在下略尽绵薄之力才行。不是我自夸，整个明远城中虽然还有其他仙师，但若论医道高明，我们余府中的那位若说第二，绝无人敢说第一的。”余七双目微微转动一圈后，又傲然的说道。
67 | 　　“好吧，我们两个可以跟你回去。但若是这位仙师治不好我兄长的话，我们还是要马上离开的。”柳乐儿终于被白袍少年最后一句话打动，勉强的答应了下来。
68 | 　　“这个自然，对了，还未请教二位姓名？”余七见柳乐儿同意一喜，马上又追问了一句。
69 | 　　柳乐儿犹豫了一下，报出了自己和柳石的姓名。
70 | 　　“原来是乐儿妹妹和柳石兄。”余七连连点头。
71 | 　　“七弟，这两人来历不明，你怎么能随便就带回家，还要请真人给他看病？”那儒袍青年被晾在一旁许久，脸色不太好看，此刻忍不住开口插话。
72 | 　　“无妨，此事我自有分寸，二哥不必担心。”余七摆了摆手，随意说道。
73 | 　　儒袍青年似乎对余七这个弟弟有些畏惧，嘴唇动了几下，似乎还想说什么，最终还是没是没说出口。
74 | 　　此刻，几个穿着鲜亮，佩戴刀剑的护街道远处急匆匆的赶了过来。
75 | 　　附近围观之人见此，轰然散去，似乎对这些人很是畏惧。
76 | 　　几个护卫也没有理会周围的人，赶紧上前躬身对余七二人行了一礼。
77 | 　　“七少爷，二少爷，我们来迟，请二位少爷赐罪。”
78 | 　　“我们没事，不必大惊小怪的。你们几个先带马车回去，此事不要张扬。”余七淡淡，说道。
79 | 　　“是”
80 | 　　几个护卫唯唯诺诺，立刻七手八脚的牵起那青色怪马，飞快离开。
81 | 　　“二位，请随我来。”白袍少年处理完这些，转身对柳乐儿二人笑了笑，当先朝着前方走去。
82 | 　　柳乐儿又看了柳石一眼，紧了紧拉着他的手，跟在了余七后面。
83 | 　　那儒袍青年看着几人走远，脸色越发难看了，在原地站立一会，才顿足冷哼一声，迈步跟了上去。
84 | 　　……
85 | 　　“哈哈，有些意思！她便是那位据说拥有不错修炼资质，那位丰国宰相原准备花大力气送入冷焰宗的那人吧。”不远处街道上某个不起眼的拐角处，蓦然转过来两人，前面一名黑衣青年，双目细长，望着余七等人远去方向阴森说道，满脸都是说不出的邪气。
86 | 　　“师弟多加小心，余府据说也有散修坐镇，并且还非一人的，不可太过轻视的。”黑衣青年身后处另外一人，却是一名枯瘦如柴的灰衣汉子，腰间挂着数个鼓鼓囊囊的兽皮袋，同样看着余府等人背影，却缓缓说道。
87 | 　　“范师兄，我知道如何去做的。但这次的余府，是我的首次试炼任务，你只是派来辅助我的，没有特殊原因话，大可无需出手的。我自会带人处理好一切。”黑衣青年闻言，却不以为然。
88 | 　　灰衣汉子见此，苦笑一声的不再说什么了。
89 | 　　他可很清楚自己这位师弟的秉性，虽然修为不高，但在宗内有亲族长老作为靠山，一向不将其他同阶师兄弟放在眼内的。
90 | 　　 说话间，二人身形一个模糊，在原地就此消失不见了。
91 |         


--------------------------------------------------------------------------------
/QiDian_Story/凡人修仙之仙界篇/第四章 相依.txt:
--------------------------------------------------------------------------------
 1 | 　　(新书前两个月免费期，每天一更。正式上架后，会每天两更哦！）
 2 | 　　一片郁郁莽莽的荒原山林中。
 3 | 　　凛冽的寒风不断呼号，鹅毛大雪纷纷扬扬，举目皆白。
 4 | 　　夕阳虽还未完全落下，漫天雪幕中的山林却已显得十分昏暗。
 5 | 　　一条本就不明显的林间山路，蜿蜒曲折，在厚厚的雪层覆盖下几乎无法辨别，其延伸尽头处却亮着一丝火光，在冰天雪地中透出些许温暖气息。
 6 | 　　火光亮处，是这片方圆千里山林中唯一的一座山神庙。
 7 | 　　由于人迹罕至，这处山神庙早已失了香火，废弃多年，外院的门楼和院墙早已坍塌殆尽，仅剩一座颓圮的主殿，孤零零地立在原地。
 8 | 　　大殿的殿门早已不知所踪，门框处半搭着一张破旧的草席，草草掩住了殿外的风雪。
 9 | 　　透过草席上的破洞，能够看到空荡荡的大殿内，除了一些杂乱的枯草砖石，还有一道人影，正盘膝坐在当中。
10 | 　　那是一个身着青衣的高大青年，即使席地而坐，身形也显得异常挺拔，但脸上偏偏毫无表情，木然之极，就如同他背后那尊破败的泥塑神像一般，刻板，呆滞，缺乏生气。
11 | 　　高大青年抱臂胸前，臂弯之间正躺着一个纤细瘦小的女童，正是那小狐妖柳乐儿。
12 | 　　“呃……”
13 | 　　就在这时，青年怀中突然传来一阵低吟声。
14 | 　　柳乐儿小小的脑袋朝青年手臂上拱了拱，本来深埋在他胸膛前的脸颊向外移了几分，从他的手臂间露了出来。
15 | 　　那原本清丽可人的稚嫩小脸，此刻却是满面病态的通红，明明还处在沉睡中，一双秀眉却紧紧蹙在一起，紧闭的眼帘下眼珠不住的左右滚动，似乎正在经历极为可怕的梦境。
16 | 　　“不……不要……呜呜……”
17 | 　　伴随着一阵梦中呓语，柳乐儿环抱着青年手臂的胳膊，下意识地收紧了几分。
18 | 　　她的半只小腿也不安分地从青年怀中踢了出来，身子不时扭动几下，显得很不安稳，方才偏移出来的小脸，此刻又重新埋回了青年的胸前。
19 | 　　原本正视前方的青年，似乎略有所感，低下头朝怀中的女童望去，木然的眼神略微起了些许变化，似乎显得有些疑惑，但更多的仍是茫然。
20 | 　　“石头……哥哥……”
21 | 　　又是一阵模糊不清的梦话，从青年怀中嗡嗡响起，如蚊蝇之声一般，微不可察。
22 | 　　不知是不是火光映照的缘故，高大青年此时的面目似乎变得柔和了几分，原本空洞的眼神中也多出了几分光亮。
23 | 　　他也不起身，坐在地上蹭挪着位置，用自己的半边身子，将漏进来的寒风挡住，手臂微移了几分，将女童探出的小腿圈回自己的怀中，稍稍搂紧了几分。
24 | 　　女童身子在他怀中耸动着蹭了几下，小脑袋又朝他胸膛里拱了拱，动作慢慢停歇下来，呼吸也渐趋平稳。
25 | 　　殿外的天色早已经黑透，肆意天地间的风雪，也在不知不觉中逐渐小了几分。
26 | 　　……
27 | 　　一座百丈青翠山峰的半山腰处，高逾三丈的洞口，一名身形高大的青年背对着洞口，矗然而立。
28 | 　　柳乐儿站在青年身后，一手拽着青年衣角，一手抱着他的大腿，略微探出半张小脸望向前方，小脸由于紧张，显得有些发白。
29 | 　　但见两人身前数丈处，一头足有两个成年人高的灰毛巨熊，后爪着地前肢抬起的立在那里。
30 | 　　其头上生着一根犹如白骨的狰狞独角，一张前凸的血盆大口唇边翻起，露出寒意森然的尖利白齿，呲牙低吼着，嘴角淌出一线腥臭微黏的涎水。
31 | 　　原本身形高大的青年在这巨熊面前，竟也显得有如稚童一般瘦弱。
32 | 　　不过他脸上丝毫表情也无，只是双目直勾勾地看着巨熊，漆黑的眸子如墨浸染，没有多少光泽。
33 | 　　那巨熊盯着高大青年的面庞僵持了片刻，不知为何，脸上突然露出些许拟人的畏惧神情，猛地低吼一声，向后退开两步，转身落下前肢，四足狂奔着逃离开去。
34 | 　　柳乐儿看到这一幕，先是神色一缓，松了一口气，继而有些不解地挠了挠头，绕到高大青年身前，仰头望向他。
35 | 　　她盯着青年木然的面庞半天，也没能瞧出什么异样，不由露出些许失望之色。
36 | 　　“石头哥哥，乐儿知道你不是普通人，但可惜不会说话，否则能够和乐儿说些什么就好了，唉……”柳乐儿小大人般的叹了一口气，牵着青年的大手，转身走回山腰的洞穴。
37 | 　　青年没有言语，低垂的目光却落在了女童牵住自己的白皙小手上，身子随着对方的拉扯，渐渐走入洞中。
38 | 　　……
39 | 　　一片不知名的辽阔草原上，正值草长莺飞的盛春时节，青草新发的春芽已经抽出，整片草原都弥漫着一种青草特有的清新香气。
40 | 　　一个八九岁年纪的女童，手里拎着一簇结满淡黄色小花的纤细藤条，骑在一个身形挺拔的高大青年肩头，悠然前行。
41 | 　　与两年前相比，青年没有丝毫变化，身上依旧穿着那件青色衣衫，柳乐儿却已经与之前大不相同了。
42 | 　　女童身形拔高不少，小脸上已褪去了几分稚气，眉眼间显出些许寻常少女少有的柔媚，显然也是个不可多得的美人胚子，日后倾城倾国也犹未可知。
43 | 　　只见她十指飞快攒动，编织着手中的黄花藤条，嘴里还哼唱着一支语调轻快的小曲，声音清脆动听，如同黄莺啼鸣。
44 | 　　“成了”
45 | 　　一曲未终，柳乐儿手上的动作就停了下来，一个花朵紧蹙的美丽花环已经成型。
46 | 　　她双手捧着花环，转着圈打量了一下，满意地点了点头，满脸欢喜地将花环端端正正戴在了青年头上。
47 | 　　花环大小正合适，花朵最为紧密的一处，就落在了青年额头的正上方。
48 | 　　高大青年似有所觉，抬手轻轻碰了碰花环，又慢慢收回了手。
49 | 　　柳乐儿对高大青年的反应早已习以为常，低头瞥见他脖颈处的那道绿色细绳，掩嘴一笑，恶作剧般地探手一抓，作势就要将那细绳提起。
50 | 　　身下的青年却似是本能反应一般，一把抓向胸前，握住了细绳那端系着的墨绿色饰物，久久不肯松手。
51 | 　　“石头哥哥小气鬼，每次都这样，我只是好奇想看看嘛……”柳乐儿腮帮子鼓了起来。
52 | 　　她嘴上虽然这般说，却并未真的生气，身下这石头哥哥这两年多来，从未与她言语，除了极少对外界有所反应，只有牵扯到这怀中饰物时，才会每次主动有所反应。
53 | 　　也正因如此，柳乐儿会时不时地以此来和青年戏耍。
54 | 　　……
55 | 　　时光如白驹过隙，匆匆又是数年。
56 | 　　一名身着白色衫裙的十三四岁模样俏丽少女，黑发及腰，双手倒背，脚下藕色短靴踩着轻快的步子，走在一条黄沙铺就的官道上。
57 | 　　在其身后，还跟着一名身形高大的青衣男子，神情木讷，步伐缓慢。
58 | 　　两人走路速度一快一慢，步子却一小一大，彼此之间倒也没拉开太多距离。
59 | 　　走在前方的柳乐儿，遥遥望见官道尽头有一座青苍色的雄伟城池，城门口处可以看到许多来来往往的行人，小如麻雀。
60 | 　　她秀眉微蹙，停下了脚步。
61 | 　　“明……远城……”柳乐儿眯着眼睛眺望了好一会儿，缓缓叫出口。
62 | 　　高大青年走到她的身旁，也停了下来，如她一般远望那座雄城。
63 | 　　“看起来是座人族大城……”柳乐儿低声呢喃着，神色犹疑。
64 | 　　这五年以来，为了治好高大青年痴症，二人也进入过一些人族城镇，但像眼前这般规模大城却从未靠近过。
65 | 　　“石头哥哥，要是你完全好了，是不是就能帮乐儿报仇了？”柳乐儿仰头看着青年低声说道，却不知是在问他，还是问自己。
66 | 　　青年闻言，似乎有了些反应，远望的目光缓缓收了回来，看向女孩，但依旧没有言语。
67 | 　　“我在说什么胡话啊，就是算石头哥哥再厉害，又怎么可能打得过血刀会那么多坏人？”柳乐儿像又想起什么似的，神色黯然地垂下了头，眼泪珠子却不争气地“吧嗒吧嗒”的掉落而下，渗入了地面黄沙里。
68 | 　　就在这时，她忽然感到头顶一沉，一阵温暖的触感传来。
69 | 　　她略微抬起头，就看到她的“石头哥哥”正抬起一手轻抚着她的脑袋，眼神格外的柔和。
70 | 　　不知为何，柳乐儿这一刻感到无比安心，体内凭空多出一股难言的勇气来，似乎有再大困难也不再畏惧了。
71 | 　　她一抬手背抹去了脸上的泪水，另一只手抓起高大青年厚厚手掌，面带坚定的朝着远处城门方向大步而去。
72 |         


--------------------------------------------------------------------------------
/Qsbk/qsbk/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/why19970628/Python_Crawler/23ba5cf5ad12d5d0f9f3d2376c0c0ea32fc3d2de/Qsbk/qsbk/__init__.py


--------------------------------------------------------------------------------
/Qsbk/qsbk/__pycache__/__init__.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/why19970628/Python_Crawler/23ba5cf5ad12d5d0f9f3d2376c0c0ea32fc3d2de/Qsbk/qsbk/__pycache__/__init__.cpython-37.pyc


--------------------------------------------------------------------------------
/Qsbk/qsbk/__pycache__/pipelines.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/why19970628/Python_Crawler/23ba5cf5ad12d5d0f9f3d2376c0c0ea32fc3d2de/Qsbk/qsbk/__pycache__/pipelines.cpython-37.pyc


--------------------------------------------------------------------------------
/Qsbk/qsbk/__pycache__/settings.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/why19970628/Python_Crawler/23ba5cf5ad12d5d0f9f3d2376c0c0ea32fc3d2de/Qsbk/qsbk/__pycache__/settings.cpython-37.pyc


--------------------------------------------------------------------------------
/Qsbk/qsbk/items.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | # Define here the models for your scraped items
 4 | #
 5 | # See documentation in:
 6 | # https://doc.scrapy.org/en/latest/topics/items.html
 7 | 
 8 | import scrapy
 9 | 
10 | 
11 | class QsbkItem(scrapy.Item):
12 |     # define the fields for your item here like:
13 |     # name = scrapy.Field()
14 |     pass
15 | 


--------------------------------------------------------------------------------
/Qsbk/qsbk/middlewares.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | 
  3 | # Define here the models for your spider middleware
  4 | #
  5 | # See documentation in:
  6 | # https://doc.scrapy.org/en/latest/topics/spider-middleware.html
  7 | 
  8 | from scrapy import signals
  9 | 
 10 | 
 11 | class QsbkSpiderMiddleware(object):
 12 |     # Not all methods need to be defined. If a method is not defined,
 13 |     # scrapy acts as if the spider middleware does not modify the
 14 |     # passed objects.
 15 | 
 16 |     @classmethod
 17 |     def from_crawler(cls, crawler):
 18 |         # This method is used by Scrapy to create your spiders.
 19 |         s = cls()
 20 |         crawler.signals.connect(s.spider_opened, signal=signals.spider_opened)
 21 |         return s
 22 | 
 23 |     def process_spider_input(self, response, spider):
 24 |         # Called for each response that goes through the spider
 25 |         # middleware and into the spider.
 26 | 
 27 |         # Should return None or raise an exception.
 28 |         return None
 29 | 
 30 |     def process_spider_output(self, response, result, spider):
 31 |         # Called with the results returned from the Spider, after
 32 |         # it has processed the response.
 33 | 
 34 |         # Must return an iterable of Request, dict or Item objects.
 35 |         for i in result:
 36 |             yield i
 37 | 
 38 |     def process_spider_exception(self, response, exception, spider):
 39 |         # Called when a spider or process_spider_input() method
 40 |         # (from other spider middleware) raises an exception.
 41 | 
 42 |         # Should return either None or an iterable of Response, dict
 43 |         # or Item objects.
 44 |         pass
 45 | 
 46 |     def process_start_requests(self, start_requests, spider):
 47 |         # Called with the start requests of the spider, and works
 48 |         # similarly to the process_spider_output() method, except
 49 |         # that it doesn’t have a response associated.
 50 | 
 51 |         # Must return only requests (not items).
 52 |         for r in start_requests:
 53 |             yield r
 54 | 
 55 |     def spider_opened(self, spider):
 56 |         spider.logger.info('Spider opened: %s' % spider.name)
 57 | 
 58 | 
 59 | class QsbkDownloaderMiddleware(object):
 60 |     # Not all methods need to be defined. If a method is not defined,
 61 |     # scrapy acts as if the downloader middleware does not modify the
 62 |     # passed objects.
 63 | 
 64 |     @classmethod
 65 |     def from_crawler(cls, crawler):
 66 |         # This method is used by Scrapy to create your spiders.
 67 |         s = cls()
 68 |         crawler.signals.connect(s.spider_opened, signal=signals.spider_opened)
 69 |         return s
 70 | 
 71 |     def process_request(self, request, spider):
 72 |         # Called for each request that goes through the downloader
 73 |         # middleware.
 74 | 
 75 |         # Must either:
 76 |         # - return None: continue processing this request
 77 |         # - or return a Response object
 78 |         # - or return a Request object
 79 |         # - or raise IgnoreRequest: process_exception() methods of
 80 |         #   installed downloader middleware will be called
 81 |         return None
 82 | 
 83 |     def process_response(self, request, response, spider):
 84 |         # Called with the response returned from the downloader.
 85 | 
 86 |         # Must either;
 87 |         # - return a Response object
 88 |         # - return a Request object
 89 |         # - or raise IgnoreRequest
 90 |         return response
 91 | 
 92 |     def process_exception(self, request, exception, spider):
 93 |         # Called when a download handler or a process_request()
 94 |         # (from other downloader middleware) raises an exception.
 95 | 
 96 |         # Must either:
 97 |         # - return None: continue processing this exception
 98 |         # - return a Response object: stops process_exception() chain
 99 |         # - return a Request object: stops process_exception() chain
100 |         pass
101 | 
102 |     def spider_opened(self, spider):
103 |         spider.logger.info('Spider opened: %s' % spider.name)
104 | 


--------------------------------------------------------------------------------
/Qsbk/qsbk/pipelines.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | # Define your item pipelines here
 4 | #
 5 | # Don't forget to add your pipeline to the ITEM_PIPELINES setting
 6 | # See: https://doc.scrapy.org/en/latest/topics/item-pipeline.html
 7 | 
 8 | #import json
 9 | #class QsbkPipeline(object):
10 | #    def __init__(self):
11 | #        self.fp=open('duanzi.josn','w',encoding='utf-8')
12 | #    def open_spider(self,spider):
13 | #        print('爬虫开始')
14 | #    def process_item(self, item, spider):
15 | #        item_json=json.dumps(item)
16 | #        self.fp.write(item_json+'\n')
17 | #        return item
18 | #    def close_spider(self,spider):
19 | #        self.fp.close()
20 | #        print('爬虫结束')
21 | 
22 | import json
23 | from scrapy.exporters import JsonItemExporter,JsonLinesItemExporter
24 | #class QsbkPipeline(object):
25 | #    def __init__(self):
26 | #        self.fp=open('duanzi.josn','wb')
27 | #        self.exporter=JsonItemExporter(self.fp,ensure_ascii=False,encoding='utf-8')
28 | #        self.exporter.start_exporting()##开始导入
29 | #    def open_spider(self,spider):
30 | #        print('爬虫开始')
31 | #    def process_item(self, item, spider):
32 | #        self.exporter.export_item(item)
33 | #        return item
34 | #    def close_spider(self,spider):
35 | #        self.exporter.finish_exporting()
36 | #        self.fp.close()
37 | #        print('爬虫结束')
38 | 
39 | class QsbkPipeline(object):
40 |     def __init__(self):
41 |         self.fp=open('duanzi.josn','wb')
42 |         self.exporter=JsonItemExporter(self.fp,ensure_ascii=False,encoding='utf-8')
43 |         self.exporter.start_exporting()##开始导入
44 |     def open_spider(self,spider):
45 |         print('爬虫开始')
46 |     def process_item(self, item, spider):
47 |         self.exporter.export_item(item)
48 |         return item
49 |     def close_spider(self,spider):
50 |         self.exporter.finish_exporting()
51 |         self.fp.close()
52 |         print('爬虫结束')
53 | 
54 | 
55 | 


--------------------------------------------------------------------------------
/Qsbk/qsbk/settings.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | # Scrapy settings for qsbk project
 4 | #
 5 | # For simplicity, this file contains only settings considered important or
 6 | # commonly used. You can find more settings consulting the documentation:
 7 | #
 8 | #     https://doc.scrapy.org/en/latest/topics/settings.html
 9 | #     https://doc.scrapy.org/en/latest/topics/downloader-middleware.html
10 | #     https://doc.scrapy.org/en/latest/topics/spider-middleware.html
11 | 
12 | BOT_NAME = 'qsbk'
13 | 
14 | SPIDER_MODULES = ['qsbk.spiders']
15 | NEWSPIDER_MODULE = 'qsbk.spiders'
16 | 
17 | 
18 | # Crawl responsibly by identifying yourself (and your website) on the user-agent
19 | #USER_AGENT = 'qsbk (+http://www.yourdomain.com)'
20 | 
21 | # Obey robots.txt rules
22 | ROBOTSTXT_OBEY = False ##机器人协议
23 | 
24 | # Configure maximum concurrent requests performed by Scrapy (default: 16)
25 | #CONCURRENT_REQUESTS = 32
26 | 
27 | # Configure a delay for requests for the same website (default: 0)
28 | # See https://doc.scrapy.org/en/latest/topics/settings.html#download-delay
29 | # See also autothrottle settings and docs
30 | DOWNLOAD_DELAY = 1
31 | # The download delay setting will honor only one of:
32 | #CONCURRENT_REQUESTS_PER_DOMAIN = 16
33 | #CONCURRENT_REQUESTS_PER_IP = 16
34 | 
35 | # Disable cookies (enabled by default)
36 | #COOKIES_ENABLED = False
37 | 
38 | # Disable Telnet Console (enabled by default)
39 | #TELNETCONSOLE_ENABLED = False
40 | 
41 | # Override the default request headers:
42 | DEFAULT_REQUEST_HEADERS = {
43 |    'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
44 |    'Accept-Language': 'en',
45 |     'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537.36'
46 | }
47 | 
48 | # Enable or disable spider middlewares
49 | # See https://doc.scrapy.org/en/latest/topics/spider-middleware.html
50 | #SPIDER_MIDDLEWARES = {
51 | #    'qsbk.middlewares.QsbkSpiderMiddleware': 543,
52 | #}
53 | 
54 | # Enable or disable downloader middlewares
55 | # See https://doc.scrapy.org/en/latest/topics/downloader-middleware.html
56 | #DOWNLOADER_MIDDLEWARES = {
57 | #    'qsbk.middlewares.QsbkDownloaderMiddleware': 543,
58 | #}
59 | 
60 | # Enable or disable extensions
61 | # See https://doc.scrapy.org/en/latest/topics/extensions.html
62 | #EXTENSIONS = {
63 | #    'scrapy.extensions.telnet.TelnetConsole': None,
64 | #}
65 | 
66 | # Configure item pipelines
67 | # See https://doc.scrapy.org/en/latest/topics/item-pipeline.html
68 | ITEM_PIPELINES = {
69 |     'qsbk.pipelines.QsbkPipeline': 300,
70 | }
71 | 
72 | # Enable and configure the AutoThrottle extension (disabled by default)
73 | # See https://doc.scrapy.org/en/latest/topics/autothrottle.html
74 | #AUTOTHROTTLE_ENABLED = True
75 | # The initial download delay
76 | #AUTOTHROTTLE_START_DELAY = 5
77 | # The maximum download delay to be set in case of high latencies
78 | #AUTOTHROTTLE_MAX_DELAY = 60
79 | # The average number of requests Scrapy should be sending in parallel to
80 | # each remote server
81 | #AUTOTHROTTLE_TARGET_CONCURRENCY = 1.0
82 | # Enable showing throttling stats for every response received:
83 | #AUTOTHROTTLE_DEBUG = False
84 | 
85 | # Enable and configure HTTP caching (disabled by default)
86 | # See https://doc.scrapy.org/en/latest/topics/downloader-middleware.html#httpcache-middleware-settings
87 | #HTTPCACHE_ENABLED = True
88 | #HTTPCACHE_EXPIRATION_SECS = 0
89 | #HTTPCACHE_DIR = 'httpcache'
90 | #HTTPCACHE_IGNORE_HTTP_CODES = []
91 | #HTTPCACHE_STORAGE = 'scrapy.extensions.httpcache.FilesystemCacheStorage'
92 | 


--------------------------------------------------------------------------------
/Qsbk/qsbk/spiders/__init__.py:
--------------------------------------------------------------------------------
1 | # This package will contain the spiders of your Scrapy project
2 | #
3 | # Please refer to the documentation for information on how to create and manage
4 | # your spiders.
5 | 


--------------------------------------------------------------------------------
/Qsbk/qsbk/spiders/__pycache__/__init__.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/why19970628/Python_Crawler/23ba5cf5ad12d5d0f9f3d2376c0c0ea32fc3d2de/Qsbk/qsbk/spiders/__pycache__/__init__.cpython-37.pyc


--------------------------------------------------------------------------------
/Qsbk/qsbk/spiders/__pycache__/qsbk_spider.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/why19970628/Python_Crawler/23ba5cf5ad12d5d0f9f3d2376c0c0ea32fc3d2de/Qsbk/qsbk/spiders/__pycache__/qsbk_spider.cpython-37.pyc


--------------------------------------------------------------------------------
/Qsbk/qsbk/spiders/qsbk_spider.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | import scrapy
 3 | import re
 4 | 
 5 | class QsbkSpiderSpider(scrapy.Spider):
 6 |     name = 'qsbk_spider'
 7 |     allowed_domains = ['qiushibaike.com']
 8 |     start_urls = ['https://www.qiushibaike.com/text/page/1/']
 9 |     base_domain='https://www.qiushibaike.com'
10 | 
11 |     def parse(self, response):
12 |         duanzi_divs1=response.xpath("//div[@id='content-left']/div")
13 |         #duanzi_divs=duanzi_divs1.xpath("./div[@class='article block untagged mb15 typs_long']")
14 |         print(len(duanzi_divs1))
15 |         for duanzi_div in duanzi_divs1:
16 | 
17 |             author=duanzi_div.xpath(".//div[@class='author clearfix']/a/h2/text()").get()#.strip()#.extract_first()
18 |             print(author)
19 |             content=duanzi_div.xpath(".//div[@class='content']/span/text()").getall()#.extract_first()
20 |             content=''.join(content)
21 |             print(content)
22 |             duanzi = {'author': author, 'content': content}
23 |             yield duanzi
24 |         next_url=response.xpath("//ul[@class='pagination']/li[last()]/a/@href").get()
25 |         if not next_url:
26 |             return
27 |         else:
28 |             yield scrapy.Request(self.base_domain+next_url,callback=self.parse)
29 | 
30 |         #content = .xpath("//div[@class='col1']//a[@class='contentHerf']/div[@class='content']/span/text()").getall()
31 |         #links = response.xpath("//div[@id='content-left']/div/a/@href").extract()
32 |         ##links = 'https://www.qiushibaike.com' + links
33 |         #author=response.xpath("//div[@id='content-left']/div/div//h2/text()").get().strip()#.extract()
34 | 
35 |         #print(content)
36 |         #print(len(content))
37 |         #print(links)
38 |         #print(author)
39 |         #content
40 |         #content=self.clear_span_br(content)
41 |         #for i in range(len(author)):
42 |         #    content[i] = ''.join(content[i]).strip()
43 |         #    author[i] = ''.join(author[i]).strip()
44 |         #    duanzi={'author':author[i],'content':content[i]}
45 |         #    print(content[i])
46 |         #    print(author[i])
47 |         #yield duanzi
48 | #


--------------------------------------------------------------------------------
/Qsbk/qsbk_start.py:
--------------------------------------------------------------------------------
1 | from  scrapy  import cmdline
2 | cmdline.execute('scrapy crawl qsbk_spider'.split())


--------------------------------------------------------------------------------
/Qsbk/scrapy.cfg:
--------------------------------------------------------------------------------
 1 | # Automatically created by: scrapy startproject
 2 | #
 3 | # For more information about the [deploy] section see:
 4 | # https://scrapyd.readthedocs.io/en/latest/deploy.html
 5 | 
 6 | [settings]
 7 | default = qsbk.settings
 8 | 
 9 | [deploy]
10 | #url = http://localhost:6800/
11 | project = qsbk
12 | 


--------------------------------------------------------------------------------
/Sina_topic_spider/README.md:
--------------------------------------------------------------------------------
 1 | ﻿# sina_topic_spider
 2 | 
 3 | - 内容： 爬取某位明星的微博超话的上万条用户信息，对爬取的结果进行EDA分析与数据可视化，如分析用户年龄，性别分布、粉丝团的地区分布，词云打榜微博内容。
 4 | 
 5 | - 对应CSDN文章：《[爬取新浪微博某超话用户信息](https://blog.csdn.net/weixin_43746433/article/details/100091240)》
 6 | 
 7 | CSDN：https://blog.csdn.net/weixin_43746433
 8 | 
 9 | 微信：why19970628
10 | 
11 | 欢迎与我留言交流
12 | 
13 | 
14 | 


--------------------------------------------------------------------------------
/Sina_topic_spider/gender.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html>
 3 | <head>
 4 |     <meta charset="UTF-8">
 5 |     <title>Awesome-pyecharts</title>
 6 |             <script type="text/javascript" src="https://assets.pyecharts.org/assets/echarts.min.js"></script>
 7 | 
 8 | </head>
 9 | <body>
10 |     <div id="23519cea9fa746179dd9dda034cd36ad" style="width:900px; height:500px;"></div>
11 |     <script>
12 |         var chart_23519cea9fa746179dd9dda034cd36ad = echarts.init(
13 |             document.getElementById('23519cea9fa746179dd9dda034cd36ad'), 'white', {renderer: 'canvas'});
14 |         var option_23519cea9fa746179dd9dda034cd36ad = {
15 |     "animation": true,
16 |     "animationThreshold": 2000,
17 |     "animationDuration": 1000,
18 |     "animationEasing": "cubicOut",
19 |     "animationDelay": 0,
20 |     "animationDurationUpdate": 300,
21 |     "animationEasingUpdate": "cubicOut",
22 |     "animationDelayUpdate": 0,
23 |     "color": [
24 |         "red",
25 |         "blue"
26 |     ],
27 |     "series": [
28 |         {
29 |             "type": "pie",
30 |             "clockwise": true,
31 |             "data": [
32 |                 {
33 |                     "name": "\u7537",
34 |                     "value": 263
35 |                 },
36 |                 {
37 |                     "name": "\u5973",
38 |                     "value": 728
39 |                 }
40 |             ],
41 |             "radius": [
42 |                 "0%",
43 |                 "75%"
44 |             ],
45 |             "center": [
46 |                 "50%",
47 |                 "50%"
48 |             ],
49 |             "label": {
50 |                 "show": true,
51 |                 "position": "top",
52 |                 "margin": 8,
53 |                 "formatter": "{b}: {c}"
54 |             },
55 |             "rippleEffect": {
56 |                 "show": true,
57 |                 "brushType": "stroke",
58 |                 "scale": 2.5,
59 |                 "period": 4
60 |             }
61 |         }
62 |     ],
63 |     "legend": [
64 |         {
65 |             "data": [
66 |                 "\u7537",
67 |                 "\u5973"
68 |             ],
69 |             "selected": {},
70 |             "show": true
71 |         }
72 |     ],
73 |     "tooltip": {
74 |         "show": true,
75 |         "trigger": "item",
76 |         "triggerOn": "mousemove|click",
77 |         "axisPointer": {
78 |             "type": "line"
79 |         },
80 |         "textStyle": {
81 |             "fontSize": 14
82 |         },
83 |         "borderWidth": 0
84 |     },
85 |     "title": [
86 |         {
87 |             "text": "\u6027\u522b\u5206\u6790"
88 |         }
89 |     ]
90 | };
91 |         chart_23519cea9fa746179dd9dda034cd36ad.setOption(option_23519cea9fa746179dd9dda034cd36ad);
92 |     </script>
93 | </body>
94 | </html>
95 | 


--------------------------------------------------------------------------------
/Sina_topic_spider/stop_words.txt:
--------------------------------------------------------------------------------
1 | 大家
2 | 打榜


--------------------------------------------------------------------------------
/WangYi_Music/geci.py:
--------------------------------------------------------------------------------
 1 | import requests
 2 | import re
 3 | import json
 4 | import pandas as pd
 5 | url=''
 6 | headers={'user-agent':
 7 |         'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36'
 8 | }
 9 | def get_info(id):
10 |     res=requests.get('http://music.163.com/api/song/lyric?id={}&lv=1&kv=1&tv=-1'.format(id),headers=headers)
11 |     json_data=json.loads(res.text)
12 |     lyric=json_data['lrc']['lyric']
13 |     lyric=re.sub('\[.*\]','',lyric)
14 |     return  str(lyric)
15 | def txt():
16 |     data=pd.read_csv('music.csv')
17 |     for i in range(len(data['song_id'])):
18 | 
19 |             fp=open(r'歌词/{}.txt'.format(data['song'][i]),'w',encoding='utf-8')
20 |             fp.write(get_info(data['song_id'][i]))
21 |             fp.close()
22 | 
23 | txt()
24 | 


--------------------------------------------------------------------------------
/WangYi_Music/music.csv:
--------------------------------------------------------------------------------
 1 | song_id,song,singer,album
 2 | 167876,有何不可,许嵩,《自定义》
 3 | 167655,幻听,许嵩,《梦游计》
 4 | 167827,素颜,许嵩,《素颜》
 5 | 167850,庐州月,许嵩,《寻雾启示》
 6 | 167844,灰色头像,许嵩,《寻雾启示》
 7 | 27646687,玫瑰花的葬礼,许嵩,《许嵩单曲集》
 8 | 167937,断桥残雪,许嵩,《断桥残雪》
 9 | 28854182,惊鸿一面,许嵩,《不如吃茶去》
10 | 411214279,雅俗共赏,许嵩,《青年晚报》
11 | 167882,清明雨上,许嵩,《自定义》
12 | 428095913,江湖 ,许嵩,《江湖》
13 | 167712,拆东墙,许嵩,《苏格拉没有底》
14 | 167870,如果当时,许嵩,《自定义》
15 | 569213279,大千世界,许嵩,《寻宝游戏》
16 | 167903,我想牵着你的手,许嵩,《我想牵着你的手》
17 | 167732,千百度,许嵩,《苏格拉没有底》
18 | 167873,多余的解释,许嵩,《自定义》
19 | 167691,天龙八部之宿敌,许嵩,《天龙八部之宿敌》
20 | 5255987,你若成风,许嵩,《乐酷》
21 | 167709,河山大好,许嵩,《苏格拉没有底》
22 | 167891,内线,许嵩,《自定义》
23 | 167705,想象之中,许嵩,《苏格拉没有底》
24 | 167679,全球变冷,许嵩,《梦游计》
25 | 34040693,千古,许嵩,《千古》
26 | 167888,认错,许嵩,《自定义》
27 | 862099032,明智之举,许嵩,《寻宝游戏》
28 | 167894,星座书上,许嵩,《自定义》
29 | 27612225,违章动物,许嵩,《违章动物》
30 | 167885,城府,许嵩,《自定义》
31 | 167929,你若成风,许嵩,《许嵩单曲集》
32 | 


--------------------------------------------------------------------------------
/WangYi_Music/wangyiyun.py:
--------------------------------------------------------------------------------
 1 | from selenium import webdriver
 2 | from lxml import etree
 3 | import time
 4 | import csv
 5 | def get_info(url):
 6 |     chrome_driver=r"D:\Python\Anaconda\Lib\site-packages\selenium\webdriver\chrome\chromedriver.exe"
 7 |     driver=webdriver.Chrome(executable_path=chrome_driver)
 8 |     driver.maximize_window()
 9 |     driver.get(url)
10 |     driver.implicitly_wait(10)
11 |     iframe=driver.find_elements_by_tag_name('iframe')[0]
12 |     driver.switch_to.frame(iframe)
13 |     html=etree.HTML(driver.page_source)
14 |     infos=html.xpath('//div[@class="srchsongst"]/div')
15 |     for info in infos:
16 |         song_id=info.xpath('div[2]/div/div/a/@href')[0].split('=')[-1]
17 |         song=info.xpath('div[2]/div/div/a/b/text()')[0]
18 |         singer1=info.xpath('div[4]/div/a')[0]
19 |         singer=singer1.xpath('string(.)')
20 |         album=info.xpath('div[5]/div/a/@title')[0]
21 |         print(song_id,song,singer,album)
22 |         writer.writerow([song_id,song,singer,album])
23 | if __name__=='__main__':
24 |     fp=open('music.csv','w',newline='',encoding='utf-8')
25 |     writer=csv.writer(fp)
26 |     writer.writerow(['song_id','song','singer','album'])
27 |     url='https://music.163.com/#/search/m/?s=%E8%AE%B8%E5%B5%A9&type=1'
28 |     get_info(url)


--------------------------------------------------------------------------------
/WangYi_Music/歌词/你若成风.txt:
--------------------------------------------------------------------------------
 1 | 
 2 |  作曲 : 许嵩
 3 |  作词 : 许嵩
 4 | 你若化成风
 5 | 我幻化成雨
 6 | 守护你身边
 7 | 一笑为红颜
 8 | 你若化成风
 9 | 我幻化成雨
10 | 爱锁在眉间
11 | 似水往昔浮流年
12 | 乖乖 我的小乖乖
13 | 你的样子太可爱
14 | 追你的男生每个都超级厉害
15 | 我却在考虑怎么Say hi
16 | 害羞的我这样下去要怎么办
17 | 怎么办 爱情甜又酸
18 | 我不是Boss
19 | 没有超大的House
20 | 如果送你Rose
21 | 可不可以给我Chance
22 | 不想看时间这么一点一滴飞逝
23 | 老夫子带着假发
24 | 我不要三寸金莲胡话
25 | 想和你跳超短裙的恰恰
26 | 想带你回家见妈妈
27 | 你若化成风
28 | 我幻化成雨
29 | 守护你身边
30 | 一笑为红颜
31 | 你若化成风
32 | 我幻化成雨
33 | 爱锁在眉间
34 | 似水往昔浮流年
35 | 周末找个借口和你泛舟
36 | 一壶清酒 江水悠悠 我心悠悠
37 | 这感情Just for you
38 | 表面平静其实内心早已风起云涌
39 | 缘字诀 几番轮回 你锁眉
40 | 哎哟你的心情左右我的情绪
41 | 虽然有些问题真的很难搞定
42 | 我还是充满信心
43 | 老夫子带着假发
44 | 我不要三寸金莲胡话
45 | 想和你跳超短裙的恰恰
46 | 想带你回家见妈妈
47 | 你若化成风
48 | 我幻化成雨
49 | 守护你身边
50 | 一笑为红颜
51 | 你若化成风
52 | 我幻化成雨
53 | 爱锁在眉间
54 | 似水往昔浮流年
55 | 你千万不要装酷
56 | 呆的像大脑短路
57 | 我不收你的礼物
58 | 只想收一点点幸福
59 | 请领悟
60 | 请拿出速度奉我为公主
61 | 别磨蹭的像胖叔叔
62 | 有压力也要顶住
63 | 坚持自己的道路
64 | 真心去付出随时准备自我颠覆
65 | 这一首有点复古
66 | 不预示下首的套路
67 | 踩着Hip-Hop的鼓点陪你跳恰恰舞
68 | 嘟嘟嘟
69 | 嘟嘟嘟嘟嘟
70 | 嘟嘟嘟
71 | 嘟嘟嘟嘟嘟
72 | 嘟嘟嘟
73 | 嘟嘟嘟嘟嘟
74 | 嘟嘟嘟嘟嘟嘟嘟
75 | 嘟嘟嘟
76 | 嘟嘟嘟嘟嘟
77 | 嘟嘟嘟
78 | 嘟嘟嘟嘟嘟
79 | 嘟嘟嘟
80 | 嘟嘟嘟嘟嘟
81 | 嘟嘟嘟嘟嘟嘟嘟
82 | 你若化成风
83 | 我幻化成雨
84 | 守护你身边
85 | 一笑为红颜
86 | 你若化成风
87 | 我幻化成雨
88 | 爱锁在眉间
89 | 似水往昔浮流年
90 | 你若化成风
91 | 我幻化成雨
92 | 守护你身边
93 | 一笑为红颜
94 | 你若化成风
95 | 我幻化成雨
96 | 爱锁在眉间
97 | 似水往昔浮流年
98 | 


--------------------------------------------------------------------------------
/WangYi_Music/歌词/全球变冷.txt:
--------------------------------------------------------------------------------
 1 |  作曲 : 许嵩
 2 |  作词 : 许嵩
 3 | 风在淅淅沥沥的雨中
 4 | 撑伞走过那路口
 5 | 有人跌跌撞撞踩到我
 6 | 没说对不起借过
 7 | 表情无喜无悲的冷漠
 8 | 是这座城市的符咒
 9 | 每个人都害怕被看出内心的脆弱
10 | 看你懵懵懂懂的眼中 布满太多的困惑
11 | 自从经历过那些以后 你都没怎么笑过
12 | 不必反反复复想太多 每天都要过的更洒脱
13 | 看得透 放得下 拈花一朵
14 | 如果能够多一点点微笑
15 | 生命也会多一点点美好
16 | 何必活的那么冷酷寂寥
17 | 入夜总为小事而睡不着
18 | 如果能够多一点点微笑
19 | 快乐也会多一点点围绕
20 | 不要等到全球变冷才觉不妙
21 | 
22 | 风在淅淅沥沥的雨中
23 | 撑伞走过那路口
24 | 有人跌跌撞撞踩到我
25 | 没说对不起借过
26 | 表情无喜无悲的冷漠
27 | 是这座城市的符咒
28 | 每个人都害怕被看出内心的脆弱
29 | 看你懵懵懂懂的眼中 布满太多的困惑
30 | 自从经历过那些以后 你都没怎么笑过
31 | 不必反反复复想太多 每天都要过的更洒脱
32 | 看得透 放得下 拈花一朵
33 | 如果能够多一点点微笑
34 | 生命也会多一点点美好
35 | 何必活的那么冷酷寂寥
36 | 入夜总为小事而睡不着
37 | 如果能够多一点点微笑
38 | 快乐也会多一点点围绕
39 | 不要等到全球变冷才觉不妙
40 | 
41 | 如果能够多一点点微笑
42 | 生命也会多一点点美好
43 | 何必活的那么冷酷寂寥
44 | 入夜总为小事而睡不着
45 | 如果能够多一点点微笑
46 | 快乐也会多一点点围绕
47 | 不要等到全球变冷才觉不妙
48 | 
49 | 
50 | 


--------------------------------------------------------------------------------
/WangYi_Music/歌词/内线.txt:
--------------------------------------------------------------------------------
 1 |  作曲 : 许嵩
 2 |  作词 : 许嵩
 3 | 血腥的风放肆嘲笑漫天的黄叶
 4 | 这大街已横尸遍野
 5 | 
 6 | 而你却消失在蒙面执行任务杀气腾腾的夜
 7 | 我听见有人猜
 8 | 你是敌人潜伏的内线
 9 | 
10 | 和你相知多年
11 | 我确信对你的了解
12 | 你舍命救我画面
13 | 一一在眼前浮现
14 | 司空见惯了鲜血
15 | 你忘记你本是娇娆的红颜
16 | 感觉你我彼此都那么依恋
17 | 
18 | 你落入封闭的地牢
19 | 发不出求救的讯号
20 | 我折返这古堡 提着刀
21 | 杀红了眼 不依不饶
22 | 
23 | 你落入封闭的地牢
24 | 发不出求救的讯号
25 | 我却能感应到
26 | 打开锁链 你浅浅笑和我拥抱
27 | 
28 | 血腥的风放肆嘲笑漫天的黄叶
29 | 这大街已横尸遍野
30 | 
31 | 而你却消失在蒙面执行任务杀气腾腾的夜
32 | 我听见有人猜
33 | 你是潜伏的内线
34 | 
35 | 和你相知多年
36 | 我确信对你的了解
37 | 你舍命救我画面
38 | 一一在眼前浮现
39 | 司空见惯了鲜血
40 | 你忘记你本是娇娆的红颜
41 | 感觉你我彼此都那么依恋
42 | 
43 | 你落入封闭的地牢
44 | 发不出求救的讯号
45 | 我折返这古堡 提着刀
46 | 杀红了眼 不依不饶
47 | 
48 | 你落入封闭的地牢
49 | 发不出求救的讯号
50 | 我却能感应到
51 | 打开锁链 你浅浅笑和我拥抱
52 | 
53 | 我从来没有想到的是
54 | 
55 | 这是你我第一次拥抱 带着浅浅笑
56 | 你说会陪我一直到老 远离这尘嚣
57 | 我闭着眼感受幸福的微妙
58 | 把刀剑扔掉
59 | 你突然转身 匕首刺进我的心脏
60 | 带着浅浅笑
61 | 
62 | 


--------------------------------------------------------------------------------
/WangYi_Music/歌词/千古.txt:
--------------------------------------------------------------------------------
 1 | 
 2 |  作曲 : 许嵩
 3 |  作词 : 许嵩
 4 | 夏蝉冬雪
 5 | 不过轮回一瞥
 6 | 悟道修炼
 7 | 不问一生缘劫
 8 | 白纸画卷
 9 | 寥寥几笔绘江湖深浅
10 | 难绘你
11 | 不染纤尘的容颜
12 | 夜不成眠
13 | 心还为谁萦牵
14 | 灯火竹帘
15 | 梦里随风摇曳
16 | 月华似练
17 | 遥看万载沧海成桑田
18 | 它不言
19 | 不言命途的明灭
20 | 若流芳千古
21 | 爱的人却反目
22 | 错过了幸福
23 | 谁又为我在乎
24 | 若贻笑千古
25 | 因为爱得执迷又糊涂
26 | 也不悔做你的信徒
27 | 夜不成眠
28 | 心还为谁萦牵
29 | 灯火竹帘
30 | 梦里随风摇曳
31 | 月华似练
32 | 遥看万载沧海成桑田
33 | 它不言
34 | 不言命途的明灭
35 | 若流芳千古
36 | 爱的人却反目
37 | 错过了幸福
38 | 谁又为我在乎
39 | 若贻笑千古
40 | 因为爱得执迷又糊涂
41 | 也不悔做你的信徒
42 | 若流芳千古
43 | 爱的人却反目
44 | 错过了幸福
45 | 谁又为我在乎
46 | 若贻笑千古
47 | 因为爱得执迷又糊涂
48 | 也不悔做你的信徒
49 | 也不悔做你的信徒
50 | 


--------------------------------------------------------------------------------
/WangYi_Music/歌词/千百度.txt:
--------------------------------------------------------------------------------
 1 |  作曲 : 许嵩
 2 |  作词 : 许嵩
 3 | 编曲：杨阳
 4 | 
 5 | 关外野店 烟火绝 客怎眠
 6 | 
 7 | 寒来袖间 谁为我 添两件
 8 | 
 9 | 三四更雪 风不减 吹袭一夜
10 | 只是可怜 瘦马未得好歇
11 | 
12 | 怅然入梦 梦几月 醒几年
13 | 
14 | 往事凄艳 用情浅 两手缘
15 | 
16 | 鹧鸪清怨 听得见 飞不回堂前
17 | 
18 | 旧楹联红褪墨残谁来揭
19 | 
20 | 我寻你千百度 日出到迟暮
21 | 一瓢江湖我沉浮
22 | 
23 | 我寻你千百度 又一岁荣枯
24 | 可你从不在 灯火阑珊处
25 | 
26 | 怅然入梦 梦几月 醒几年
27 | 
28 | 往事凄艳 用情浅 两手缘
29 | 
30 | 鹧鸪清怨 听得见 飞不回堂前
31 | 旧楹联红褪墨残谁来揭
32 | 
33 | 我寻你千百度 日出到迟暮
34 | 一瓢江湖我沉浮
35 | 
36 | 我寻你千百度 又一岁荣枯
37 | 
38 | 可你从不在 灯火阑珊处
39 | 
40 | 我寻你千百度 日出到迟暮
41 | 一瓢江湖我沉浮
42 | 
43 | 我寻你千百度 又一岁荣枯
44 | 
45 | 你不在 灯火阑珊处
46 | 


--------------------------------------------------------------------------------
/WangYi_Music/歌词/城府.txt:
--------------------------------------------------------------------------------
 1 |  作曲 : 许嵩
 2 |  作词 : 许嵩
 3 | 你走之后 一个夏季熬成一个秋
 4 | 我的书上你的正楷眉清目秀
 5 | 一字一字宣告我们和平分手
 6 | 好委婉的交流 还带一点征求
 7 | 你已成风 幻化的雨下错了季候
 8 | 明媚的眼眸里温柔化为了乌有
 9 | 一层一层院墙把你的心困守
10 | 如果没法回头 这样也没不妥
11 | 你的城府有多深
12 | 我爱的有多蠢 是我太笨
13 | 还是太认真 幻想和你过一生
14 | 你的城府有多深
15 | 我爱的有多蠢 不想再问
16 | 也无法去恨 毕竟你是我最爱的人
17 | 曾经你的眼神 看起来那么单纯
18 | 嗯 指向你干净的灵魂
19 | 什么时候开始变得满是伤痕
20 | 戴上假面也好 如果不会疼
21 | 爱情这个世界 有那么多的悖论
22 | 小心翼翼不见得就会获得满分
23 | 我们之间缺少了那么多信任
24 | 最后还是没有 打开那扇心门
25 | 你的城府有多深
26 | 我爱的有多蠢 是我太笨
27 | 还是太认真 幻想和你过一生
28 | 你的城府有多深
29 | 我爱的有多蠢 不想再问
30 | 也无法去恨 毕竟你是我最爱的人
31 | 我曾经苦笑着问过我自己
32 | 在某个夜里 卸下伪装的你
33 | 是不是也会哭泣
34 | 你的城府有多深
35 | 我爱的有多蠢 是我太笨
36 | 还是太认真 幻想和你过一生
37 | 你的城府有多深
38 | 我爱的有多蠢 不想再问
39 | 也无法去恨 毕竟你是爱过我的人
40 | 


--------------------------------------------------------------------------------
/WangYi_Music/歌词/多余的解释.txt:
--------------------------------------------------------------------------------
 1 | 那阵子我们的感情出了一些问题
 2 | 可是我也不太清楚问题出在哪里
 3 | 你面无表情的话语不剩多少意义
 4 | 就当我求求你 给我一些说明
 5 | ok 我猜你只是暂时的压抑心情
 6 | 不再去追问你 多给你一些关心
 7 | 打电话请你去看最新的电影
 8 | 你说工作很忙要加班到夜里
 9 | ooook 入冬了想给你买一条围巾
10 | 怕眼光不行所以叫着紧跟潮流的妹妹和我一起
11 | 和妹妹说说笑笑 缓释最近糟糕心绪
12 | 在下一个转角却和你相遇
13 | 她只是我的妹妹 妹妹说紫色很有韵味
14 | 她只是我的妹妹 我在担心你是否误会
15 | 她只是我的妹妹 对这个解释你无所谓
16 | 我没有思想准备 看到你身旁还有一位
17 | 不知道他是谁
18 | 那阵子我们的感情出了一些问题
19 | 可是我也不太清楚问题出在哪里
20 | 你面无表情的话语不剩多少意义
21 | 就当我求求你 给我一些说明
22 | ooook 入冬了想给你买一条围巾
23 | 怕眼光不行所以叫着紧跟潮流的妹妹和我一起
24 | 和妹妹说说笑笑 缓释最近糟糕心绪
25 | 在下一个转角却和你相遇
26 | 她只是我的妹妹 妹妹说紫色很有韵味
27 | 她只是我的妹妹 我在担心你是否误会
28 | 她只是我的妹妹 对这个解释你无所谓
29 | 我没有思想准备 看到你身旁还有一位
30 | 不知道他是谁
31 | 紫色的围巾 交到你手里
32 | 你放进包里 说句谢谢你
33 | 要加班的你 却出现在这里
34 | 故事的结局不需要任何说明
35 | 她只是我的妹妹 妹妹说紫色很有韵味
36 | 她只是我的妹妹 我在担心你是否误会
37 | 她只是我的妹妹 对这个解释你无所谓
38 | 我没有思想准备 看到你身旁还有一位
39 | 不知道他是谁
40 | 她只是我的妹妹 妹妹说紫色很有韵味
41 | 她只是我的妹妹 我在担心你是否误会
42 | 她只是我的妹妹 对这个解释你无所谓
43 | 我没有思想准备 看到你身旁还有一位
44 | 不知道他是谁
45 | 
46 | 


--------------------------------------------------------------------------------
/WangYi_Music/歌词/大千世界.txt:
--------------------------------------------------------------------------------
 1 |  作曲 : 许嵩
 2 |  作词 : 许嵩
 3 | 你穿着我的T恤
 4 | 大到有些滑稽
 5 | 像沙漠连夜大雨
 6 | 规则缺席
 7 | 亦真亦假的玩具
 8 | 还握在你手里
 9 | 吃透温柔的暴力就不称奇
10 | 我的黑框眼镜在Assad湖边走火
11 | 风马牛齐聚
12 | 你被带走时我亲吻了你下颌的伤疤
13 | 表情很平静
14 | 你是大千世界一汪清泉
15 | 还是泉边那只神秘孔雀
16 | 在和你灵魂谋面之前
17 | 让贪念趁火打劫
18 | 你是大千世界尘埃等闲
19 | 也是我仅有的风花雪月
20 | 爱死或是恨终我都感谢
21 | 万花筒里消受幻影碎片
22 | 
23 | 万花筒里消受
24 | 你是大千世界一汪清泉
25 | 还是泉边那只神秘孔雀
26 | 在和你灵魂谋面之前
27 | 让贪念趁火打劫
28 | 你是大千世界过眼云烟
29 | 也是我仅有的夺目闪电
30 | 躁动或是寡言我都奉献
31 | 万花筒里留下真切纪念
32 | 


--------------------------------------------------------------------------------
/WangYi_Music/歌词/天龙八部之宿敌.txt:
--------------------------------------------------------------------------------
 1 | 
 2 |  作曲 : 许嵩
 3 |  作词 : 许嵩
 4 | 会在何处见到你
 5 | 莫非前尘已注定
 6 | 飞过时空的距离
 7 | 却囿于刀剑光影
 8 | 三月春花渐次醒
 9 | 迢迢年华谁老去
10 | 是劫是缘随我心
11 | 除了你万敌不侵
12 | 当恩怨各一半
13 | 我怎么圈揽
14 | 看灯笼血红染
15 | 寻仇已太晚
16 | 月下门童喟叹
17 | 昨夜太平长安
18 | 当天上星河转
19 | 我命已定盘
20 | 待绝笔墨痕干
21 | 宿敌已来犯
22 | 我借你的孤单
23 | 今生恐怕难还
24 | 缠扰孤岛的雪雨
25 | 飘飘洒洒谁来停
26 | 摘取一颗海上星
27 | 陪我终夜不孤寂
28 | 灵柩长埋深谷底
29 | 没有永远的秘密
30 | 染指江湖结悲局
31 | 无人逃得过宿命
32 | 当恩怨各一半
33 | 我怎么圈揽
34 | 看灯笼血红染
35 | 寻仇已太晚
36 | 月下门童喟叹
37 | 昨夜太平长安
38 | 当天上星河转
39 | 我命已定盘
40 | 待绝笔墨痕干
41 | 宿敌已来犯
42 | 我借你的孤单
43 | 今生恐怕难还
44 | 当恩怨各一半
45 | 我怎么圈揽
46 | 看灯笼血红染
47 | 寻仇已太晚
48 | 月下门童喟叹
49 | 昨夜太平长安
50 | 当天上星河转
51 | 我命已定盘
52 | 待绝笔墨痕干
53 | 宿敌已来犯
54 | 我借你的孤单
55 | 今生恐怕难还
56 | 


--------------------------------------------------------------------------------
/WangYi_Music/歌词/如果当时.txt:
--------------------------------------------------------------------------------
 1 |  作曲 : 许嵩
 2 |  作词 : 许嵩
 3 | 为什么 你当时对我好
 4 | 又为什么 现在变得冷淡了
 5 | 我知道 爱要走难阻挠
 6 | 反正不是我的 我也不该要
 7 | 你和我 曾经有共同爱好
 8 | 谁的耳边 总有绝句在萦绕
 9 | 我们俩 用文言文对话真的很搞笑
10 | 还笑那曹操贪慕着小乔
11 | 天灰了 雨坠了
12 | 视线要模糊了
13 | 此时感觉到你的重要
14 | 爱走了 心走了
15 | 你说你要走了
16 | 我为你唱最后的古谣
17 | 红雨瓢泼泛起了回忆怎么潜
18 | 你美目如当年
19 | 流转我心间
20 | 渡口边最后一面洒下了句点
21 | 与你若只如初见
22 | 何须感伤离别
23 | 你和我 曾经有共同爱好
24 | 谁的耳边 总有绝句在萦绕
25 | 我们俩 用文言文对话真的很搞笑
26 | 还笑那曹操贪慕着小乔
27 | 天灰了 雨坠了
28 | 视线要模糊了
29 | 此时感觉到你的重要
30 | 爱走了 心走了
31 | 你说你要走了
32 | 我为你唱最后的古谣
33 | 红雨瓢泼泛起了回忆怎么潜
34 | 你美目如当年
35 | 流转我心间
36 | 渡口边最后一面洒下了句点
37 | 与你若只如初见
38 | 何须感伤离别
39 | 红雨瓢泼泛起了回忆怎么潜
40 | 你美目如当年
41 | 流转我心间
42 | 渡口边最后一面洒下了句点
43 | 与你若只如初见
44 | 何须感伤离别
45 | 红雨瓢泼泛起了回忆怎么潜
46 | 你美目如当年
47 | 流转我心间
48 | 渡口边最后一面洒下了句点
49 | 与你若只如初见
50 | 何须感伤离别
51 | 


--------------------------------------------------------------------------------
/WangYi_Music/歌词/幻听.txt:
--------------------------------------------------------------------------------
 1 |  作曲 : 许嵩
 2 |  作词 : 许嵩
 3 | 在远方的时候
 4 | 又想你到泪流
 5 | 这矫情的措辞结构
 6 | 经历过的人会懂
 7 | 那些不堪言的疼痛
 8 | 也就是我自作自受
 9 | 你没有装聋
10 | 你真没感动
11 | 
12 | 一个人的时候
13 | 偷偷看你的微博
14 | 你转播的歌好耳熟
15 | 我们坐一起听过
16 | 当日嫌它的唱法做作
17 | 现在听起来竟然很生动
18 | 可能是时光让耳朵变得宽容
19 | 
20 | 如今一个人听歌总是会觉得失落
21 | 幻听你在我的耳边轻轻诉说
22 | 夜色多温柔
23 | 你有多爱我
24 | 如今一个人听歌总是会觉得难过
25 | 爱已不在这里我却还没走脱
26 | 列表里的歌
27 | 随过往流动
28 | 
29 | 一个人的时候
30 | 偷偷看你的微博
31 | 你每天做了些什么
32 | 我都了然于胸
33 | 当时嫌你的蠢话太多
34 | 现在回想起画面已泛旧
35 | 可能是孤独让情绪变得脆弱
36 | 
37 | 如今一个人听歌总是会觉得失落
38 | 幻听你在我的耳边轻轻诉说
39 | 夜色多温柔
40 | 你有多爱我
41 | 如今一个人听歌总是会觉得难过
42 | 爱已不在这里我却还没走脱
43 | 列表里的歌
44 | 随过往流动
45 | 
46 | 如今一个人听歌总是会觉得失落
47 | 幻听你在我的耳边轻轻诉说
48 | 夜色多温柔
49 | 你有多爱我
50 | 如今一个人听歌总是会觉得难过
51 | 爱已不在这里我却还没走脱
52 | 如果你回头
53 | 不要放下我
54 | 
55 | 
56 | 


--------------------------------------------------------------------------------
/WangYi_Music/歌词/庐州月.txt:
--------------------------------------------------------------------------------
 1 |  作曲 : 许嵩
 2 |  作词 : 许嵩
 3 | 儿时凿壁偷了谁家的光
 4 | 宿昔不梳 一苦十年寒窗
 5 | 如今灯下闲读 红袖添香
 6 | 半生浮名只是虚妄
 7 | 三月 一路烟霞 莺飞草长
 8 | 柳絮纷飞里看见了故乡
 9 | 不知心上的你是否还在庐阳
10 | 一缕青丝一生珍藏
11 | 桥上的恋人入对出双
12 | 桥边红药叹夜太漫长
13 | 月也摇晃 人也彷徨
14 | 乌蓬里传来了一曲离殇
15 | 庐州月光 洒在心上
16 | 月下的你不复当年模样
17 | 太多的伤 难诉衷肠
18 | 叹一句当时只道是寻常
19 | 庐州月光 梨花雨凉
20 | 如今的你又在谁的身旁
21 | 家乡月光 深深烙在我心上
22 | 却流不出当年泪光
23 | 三月 一路烟霞 莺飞草长
24 | 柳絮纷飞里看见了故乡
25 | 不知心上的你是否还在庐阳
26 | 一缕青丝一生珍藏
27 | 桥上的恋人入对出双
28 | 桥边红药叹夜太漫长
29 | 月也摇晃 人也彷徨
30 | 乌蓬里传来了一曲离殇
31 | 庐州月光 洒在心上
32 | 月下的你不复当年模样
33 | 太多的伤 难诉衷肠
34 | 叹一句当时只道是寻常
35 | 庐州月光 梨花雨凉
36 | 如今的你又在谁的身旁
37 | 家乡月光 深深烙在我心上
38 | 却流不出当年泪光
39 | 庐州的月光 在我心上
40 | 太多的伤 难诉衷肠
41 | 如今的你在谁的身旁
42 | 我流不出当年泪光
43 | 庐州月光 洒在心上
44 | 月下的你不复当年模样
45 | 太多的伤 难诉衷肠
46 | 叹一句当时只道是寻常
47 | 庐州月光 梨花雨凉
48 | 如今的你又在谁的身旁
49 | 家乡月光 深深烙在我心上
50 | 却流不出当年泪光
51 | 


--------------------------------------------------------------------------------
/WangYi_Music/歌词/惊鸿一面.txt:
--------------------------------------------------------------------------------
 1 | 
 2 |  作曲 : 许嵩
 3 |  作词 : 许嵩
 4 | 翻手为云覆手为雨
 5 | 金盆洗手止风雨
 6 | 不恋红尘却难舍回忆
 7 | 每一段都有你
 8 | 年少初遇常在我心
 9 | 多年不减你深情
10 | 江山如画又怎能比拟
11 | 你送我的风景
12 | 柳下闻瑶琴起舞和一曲
13 | 仿佛映当年翩若惊鸿影
14 | 谁三言两语撩拨了情意
15 | 谁一颦一笑摇曳了星云
16 | 纸扇藏伏笔玄机诗文里
17 | 紫烟燃心语留香候人寻
18 | 史书列豪杰功过有几许
19 | 我今生何求惟你
20 | 年少初遇常在我心
21 | 多年不减你深情
22 | 江山如画又怎能比拟
23 | 你送我的风景
24 | 柳下闻瑶琴起舞和一曲
25 | 仿佛映当年翩若惊鸿影
26 | 谁三言两语撩拨了情意
27 | 谁一颦一笑摇曳了星云
28 | 纸扇藏伏笔玄机诗文里
29 | 紫烟燃心语留香候人寻
30 | 史书列豪杰功过有几许
31 | 我今生何求惟你
32 | 远山传来清晨悠然的曲笛
33 | 晓风掠走光阴
34 | 残月沉霜鬓里
35 | 有了你
36 | 恩怨都似飞鸿踏雪泥
37 | 柳下闻瑶琴起舞和一曲
38 | 仿佛映当年翩若惊鸿影
39 | 谁三言两语撩拨了情意
40 | 谁一颦一笑摇曳了星云
41 | 纸扇藏伏笔玄机诗文里
42 | 紫烟燃心语留香候人寻
43 | 史书列豪杰功过有几许
44 | 我今生何求惟你
45 | 我今生何求惟你
46 | 


--------------------------------------------------------------------------------
/WangYi_Music/歌词/想象之中.txt:
--------------------------------------------------------------------------------
 1 |  作曲 : 许嵩
 2 |  作词 : 许嵩
 3 | 
 4 | 想象之中雨过一道彩虹
 5 | 抬起了头   瑟瑟灰色天空
 6 | 想象之中 付出会有结果
 7 | 毫无保留 信奉你的承诺
 8 | 想象之中 这次要爱很久
 9 | 我领略过   你眼里的温柔热烈以后
10 | 你忽然的冰冻 判若两人 丢给我去承受
11 | 想象中 很不同
12 |  想象中一切都和后来不同
13 | 我承认 曾经那么心动
14 | 你没想象中那么恋旧
15 | 回忆唤不回你的温柔
16 | 最后也不是故作冷漠
17 | 转过头 我怎么有一滴泪落
18 | 我没想象中那么脆弱
19 | 分开后形容也没消瘦
20 | 一起踏过了几座春秋
21 | 领悟了爱不是追逐占有
22 | 
23 | 想象之中 这次要爱很久
24 | 我领略过 你眼里的温柔 oh 热烈以后
25 | 你忽然的冰冻 判若两人 丢给我去承受
26 | 想象中 很不同
27 |  想象中一切都和后来不同
28 | 我承认 曾经那么心动
29 | 你没想象中那么恋旧
30 | 回忆唤不回你的温柔
31 | 最后也不是故作冷漠
32 | 转过头 我怎么有一滴泪落
33 | 我没想象中那么脆弱
34 | 分开后形容也没消瘦
35 | 一起踏过了几座春秋
36 | 领悟了爱不是追逐占有
37 | 
38 | 你没想象中那么恋旧
39 | 回忆唤不回你的温柔
40 | 最后也不是故作冷漠
41 | 转过头 我怎么有一滴泪落
42 | 我没想象中那么脆弱
43 | 分开后形容也没消瘦
44 | 一起踏过了几座春秋
45 | 领悟了爱不是追逐占有
46 | 


--------------------------------------------------------------------------------
/WangYi_Music/歌词/我想牵着你的手.txt:
--------------------------------------------------------------------------------
 1 |  作曲 : 许嵩
 2 |  作词 : 许嵩
 3 | Vae他有一些烦恼
 4 | 反正现在的年轻人
 5 | 都有许多烦恼
 6 | 那么多要思考
 7 | 那么多要寻找
 8 | 诱惑太多 不坚定就犯错了
 9 | 
10 | 朋友说 尘世被你夸那么美
11 | 可是现实挺倒霉
12 | 领导天天要开会
13 | 上班迟到几分钟就被扣薪水
14 | 同事就像敌人要小心翼翼防备
15 | 
16 | 老师不喜欢男生长头发
17 | 妈妈不喜欢女儿长指甲
18 | 什么都被管 什么都看不惯
19 | 什么都没力量推翻
20 | 学习生存之道又不安
21 | 
22 | ho 地球太寒冷
23 | ho 距离产生美
24 | ho 远走八十八万公里
25 | ho hoo
26 | 
27 | 我想牵着你的手
28 | 两个人去宇宙
29 | 没引力左右
30 | 夜光映出你的温柔
31 | 我想牵着你的手
32 | 逃离这颗星球
33 | 剥落了忧愁
34 | 快乐就在十指相扣
35 | 
36 | 大家好我是Vae 我打一下岔
37 | 请你和我一起跟着节奏拍拍手
38 | 拍拍手晚上睡觉就能梦游
39 | 梦见跟爱的人去公园走走
40 | 走来走去不知道有什么走头
41 | 这世界让你和她觉得不爽
42 | 我想你其实也想离开这星球
43 | 
44 | ho 地球太寒冷
45 | ho 距离产生美
46 | ho 远走八十八万公里
47 | ho hoo
48 | 
49 | 我想牵着你的手
50 | 两个人去宇宙
51 | 没引力左右
52 | 夜光映出你的温柔
53 | 我想牵着你的手
54 | 逃离这颗星球
55 | 剥落了忧愁
56 | 快乐就在十指相扣
57 | 
58 | 我想牵着你的手
59 | 两个人去宇宙
60 | 没引力左右
61 | 夜光映出你的温柔
62 | 我想牵着你的手
63 | 逃离这颗星球
64 | 剥落了忧愁
65 | 快乐就在十指相扣
66 | 
67 | 我想牵着你的手
68 | 两个人去宇宙
69 | 没引力左右
70 | 夜光映出你的温柔
71 | 我想牵着你的手
72 | 逃离这颗星球
73 | 剥落了忧愁
74 | 快乐就在十指相扣
75 | 
76 | 我想牵着你的手
77 | 
78 | Kistory for my Gui
79 | 


--------------------------------------------------------------------------------
/WangYi_Music/歌词/拆东墙.txt:
--------------------------------------------------------------------------------
 1 |  作曲 : 许嵩
 2 |  作词 : 许嵩
 3 | 公元六五九年 十九岁 他接他爹的班
 4 | 考不取功名的后果是接手自家的酒馆
 5 | 又听说同乡谁已经赴京做上小官
 6 | 他的梦 往来客谁能买单
 7 | 
 8 | 代代叹世道难 人心乱 可又能怎么办
 9 | 他女人的美丽对比映衬出他的难看
10 | 朋友说 他不爱 没有爱 只是贪他小财
11 | 可他爱 连菜都自己去买
12 | 
13 | 掌柜的小破酒馆被人拆了东墙
14 | 后来衙门说按一平米八吊钱来跟他折算
15 | 他不干 他不干 百年招牌祖祖辈辈流传下来
16 | 挣的并不快 但人熟地熟 还算落得个自在
17 | 
18 | 掌柜的小破酒馆被人拆了东墙
19 | 后来有人看见他冒雪背着行囊暗夜离开
20 | 丢下老 丢下少 他是否也曾无奈
21 | 一去若回来 老家的酒香还在不在
22 | 
23 | 代代叹世道难 人心乱 可又能怎么办
24 | 他女人的美丽对比映衬出他的难看
25 | 朋友说 他不爱 没有爱 只是贪他小财
26 | 可他爱 连菜都自己去买
27 | 
28 | 掌柜的小破酒馆被人拆了东墙
29 | 后来衙门说按一平米八吊钱来跟他折算
30 | 他不干 他不干 百年招牌祖祖辈辈流传下来
31 | 挣的并不快 但人熟地熟 还算落得个自在
32 | 
33 | 掌柜的小破酒馆被人拆了东墙
34 | 后来有人看见他冒雪背着行囊暗夜离开
35 | 丢下老 丢下少 他是否也曾无奈
36 | 一去若回来 老家的酒香还在不在
37 | 
38 | 掌柜的小破酒馆被人拆了东墙
39 | 后来衙门说按一平米八吊钱来跟他折算
40 | 他不干 他不干 百年招牌祖祖辈辈流传下来
41 | 挣的并不快 但人熟地熟 还算落得个自在
42 | 
43 | 掌柜的小破酒馆被人拆干净了
44 | 后来有人说那夜他被揍到走路一瘸一拐
45 | 兴也苦 亡也苦 青史总让人无奈
46 | 更迭了朝代 当时的明月换拨人看
47 | 
48 | 西墙补不来
49 | 可东墙面子上还得拆
50 | 
51 | 


--------------------------------------------------------------------------------
/WangYi_Music/歌词/断桥残雪.txt:
--------------------------------------------------------------------------------
 1 |  作曲 : 许嵩
 2 |  作词 : 许嵩
 3 | 寻不到花的折翼枯叶蝶
 4 | 永远也看不见凋谢
 5 | 江南夜色下的小桥屋檐
 6 | 读不懂塞北的荒野
 7 | 
 8 | 梅开时节因寂寞而缠绵
 9 | 春归后又很快湮灭
10 | 独留我赏烟花飞满天
11 | 摇曳后就随风飘远
12 | 
13 | 断桥是否下过雪
14 | 我望着湖面
15 | 水中寒月如雪
16 | 指尖轻点融解
17 | 
18 | 断桥是否下过雪
19 | 又想起你的脸
20 | 若是无缘再见
21 | 白堤柳帘垂泪好几遍
22 | 
23 | 寻不到花的折翼枯叶蝶
24 | 永远也看不见凋谢
25 | 江南夜色下的小桥屋檐
26 | 读不懂塞北的荒野
27 | 
28 | 梅开时节因寂寞而缠绵
29 | 春归后又很快湮灭
30 | 独留我赏烟花飞满天
31 | 摇曳后就随风飘远
32 | 
33 | 断桥是否下过雪
34 | 我望着湖面
35 | 水中寒月如雪
36 | 指尖轻点融解
37 | 
38 | 断桥是否下过雪
39 | 又想起你的脸
40 | 若是无缘再见
41 | 白堤柳帘垂泪好几遍
42 | 


--------------------------------------------------------------------------------
/WangYi_Music/歌词/明智之举.txt:
--------------------------------------------------------------------------------
 1 |  作曲 : 许嵩
 2 |  作词 : 许嵩
 3 | 
 4 | 你在北方某城 很偶尔下雨
 5 | 我在天南海北 很偶尔想你
 6 | 写不来十八九岁煽情字句
 7 | 孤单喂饱了理性
 8 | 
 9 | 想必你也看过了一些风景
10 | 才明白什么样的适合自己
11 | 翻着你朋友圈的幸福合影
12 | 由衷的为你高兴
13 | 
14 | 我曾在意的你
15 | 想说声对不起
16 | 年少时的任性
17 | 有些话伤人不轻
18 | 也怀疑自己
19 | 不是理想伴侣
20 | 你的离开也许是个明智之举
21 | 
22 | 我曾在意的你
23 | 给过太多悲喜
24 | 承蒙时光洗礼
25 | 往事已云淡风轻
26 | 当我们老去
27 | 品尝丰盛回忆
28 | 每一道失去都是醇厚的赐予
29 | 
30 | 你在北方某城 很偶尔下雨
31 | 我在天南海北 很偶尔想你
32 | 写不来十八九岁煽情字句
33 | 孤单喂饱了理性
34 | 
35 | 想必你也看过了一些风景
36 | 才明白什么样的适合自己
37 | 翻着你朋友圈的幸福合影
38 | 由衷的为你高兴
39 | 
40 | 我曾在意的你
41 | 想说声对不起
42 | 年少时的任性
43 | 有些话伤人不轻
44 | 也怀疑自己
45 | 不是理想伴侣
46 | 你的离开也许是个明智之举
47 | 
48 | 我曾在意的你
49 | 给过太多悲喜
50 | 承蒙时光洗礼
51 | 往事已云淡风轻
52 | 当我们老去
53 | 品尝丰盛回忆
54 | 每一道失去
55 | 
56 | 你曾笑着问我
57 | 如若重新来过
58 | 结局会不会不同
59 | 我出神了许久
60 | 神游在初见的午后
61 | 
62 | 我曾在意的你
63 | 想说声对不起
64 | 年少时的任性
65 | 有些话伤人不轻
66 | 也怀疑自己
67 | 不是理想伴侣
68 | 你的离开也许是个明智之举
69 | 
70 | 我曾在意的你
71 | 给过太多悲喜
72 | 承蒙时光洗礼
73 | 往事已云淡风轻
74 | 
75 | 当我们老去
76 | 品尝丰盛回忆
77 | 每一道失去
78 | 都是醇厚的赐予
79 | 
80 | 


--------------------------------------------------------------------------------
/WangYi_Music/歌词/星座书上.txt:
--------------------------------------------------------------------------------
 1 |  作曲 : 许嵩
 2 |  作词 : 许嵩
 3 | 星光点亮了
 4 | 海水泛起皱褶
 5 | 晚风咸咸的
 6 | 吹散你我身旁余热
 7 | 不够彼此信任
 8 | 还是有了裂痕
 9 | 为什么感觉有些陌生了
10 | 沿海岸奔跑
11 | 寻找属于我们的岛
12 | 有一些问号
13 | 也许对你并不重要
14 | 可很久没深聊
15 | 也很久没拥抱
16 | 翻开书本把答案寻找
17 | 星座书上说我们不合
18 | 金牛座的我配不上你的好
19 | 难过后想想也许只是碰巧
20 | 我们的故事写书人怎明了
21 | 星座书上说我们不合
22 | 最后我偷偷把那页撕掉
23 | 真的爱情没法预料
24 | 何必让你知道
25 | 就算你早知道
26 | 沿海岸奔跑
27 | 寻找属于我们的岛
28 | 有一些问号
29 | 也许对你并不重要
30 | 可很久没深聊
31 | 也很久没拥抱
32 | 翻开书本把答案寻找
33 | 星座书上说我们不合
34 | 金牛座的我配不上你的好
35 | 难过后想想也许只是碰巧
36 | 我们的故事写书人怎明了
37 | 星座书上说我们不合
38 | 最后我偷偷把那页撕掉
39 | 真的爱情没法预料
40 | 何必让你・・・
41 | 星座书上说我们不合
42 | 金牛座的我配不上你的好
43 | 难过后想想也许只是碰巧
44 | 我们的故事写书人怎明了
45 | 星座书上说我们不合
46 | 最后我偷偷把那页撕掉
47 | 真的爱情没法预料
48 | 何必让你知道
49 | 就算你早知道
50 | 
51 | 


--------------------------------------------------------------------------------
/WangYi_Music/歌词/有何不可.txt:
--------------------------------------------------------------------------------
 1 |  作曲 : 许嵩
 2 |  作词 : 许嵩
 3 | 天空好想下雨
 4 | 我好想住你隔壁
 5 | 傻站在你家楼下
 6 | 抬起头数乌云
 7 | 如果场景里出现一架钢琴
 8 | 我会唱歌给你听
 9 | 哪怕好多盆水往下淋
10 | 夏天快要过去
11 | 请你少买冰淇淋
12 | 天凉就别穿短裙
13 | 别再那么淘气
14 | 如果有时不那么开心
15 | 我愿意将格洛米借给你
16 | 你其实明白我心意
17 | 为你唱这首歌没有什么风格
18 | 它仅仅代表着我想给你快乐
19 | 为你解冻冰河为你做一只扑火的飞蛾
20 | 没有什么事情是不值得
21 | 为你唱这首歌没有什么风格
22 | 它仅仅代表着我希望你快乐
23 | 为你辗转反侧为你放弃世界有何不可
24 | 夏末秋凉里带一点温热有换季的颜色
25 | 
26 | 天空好想下雨
27 | 我好想住你隔壁
28 | 傻站在你家楼下
29 | 抬起头数乌云
30 | 如果场景里出现一架钢琴
31 | 我会唱歌给你听
32 | 哪怕好多盆水往下淋
33 | 夏天快要过去
34 | 请你少买冰淇淋
35 | 天凉就别穿短裙
36 | 别再那么淘气
37 | 如果有时不那么开心
38 | 我愿意将格洛米借给你
39 | 你其实明白我心意
40 | 为你唱这首歌没有什么风格
41 | 它仅仅代表着我想给你快乐
42 | 为你解冻冰河为你做一只扑火的飞蛾
43 | 没有什么事情是不值得
44 | 为你唱这首歌没有什么风格
45 | 它仅仅代表着我希望你快乐
46 | 为你辗转反侧为你放弃世界有何不可
47 | 夏末秋凉里带一点温热
48 | 
49 | 为你解冻冰河为你做一只扑火的飞蛾
50 | 没有什么事情是不值得
51 | 为你唱这首歌没有什么风格
52 | 它仅仅代表着我希望你快乐
53 | 为你辗转反侧为你放弃世界有何不可
54 | 夏末秋凉里带一点温热有换季的颜色
55 | 


--------------------------------------------------------------------------------
/WangYi_Music/歌词/江湖 .txt:
--------------------------------------------------------------------------------
 1 | 
 2 |  作曲 : 许嵩
 3 |  作词 : 许嵩
 4 | 今夕是何夕
 5 | 晚风过花庭
 6 | 飘零 予人乐后飘零
 7 | 故地是何地
 8 | 死生不复回
 9 | 热血 风干在旧恨里
10 | 衣锦夜行 当一生尘埃落定
11 | 飞鸽来急 那落款沾染血迹
12 | 夜半嘱小徒复信 言师已故去
13 | 星云沉默江湖里
14 | 孤雁飞去 红颜来相许
15 | 待到酒清醒 她无影 原来是梦里
16 | 恩怨散去 刀剑已归隐
17 | 敬属江上雨 寒舟里 我独饮
18 | 衣锦夜行 当一生尘埃落定
19 | 飞鸽来急 那落款沾染血迹
20 | 夜半嘱小徒复信 言师已故去
21 | 星云沉默江湖里
22 | 孤雁飞去 红颜来相许
23 | 待到酒清醒 她无影 原来是梦里
24 | 恩怨散去 刀剑已归隐
25 | 敬属江上雨 寒舟里 我独饮
26 | 孤雁飞去 红颜来相许
27 | 待到酒清醒 她无影 原来是梦里
28 | 恩怨散去 刀剑已归隐
29 | 敬属江上雨 寒舟里 我独饮
30 | 我独饮
31 | 


--------------------------------------------------------------------------------
/WangYi_Music/歌词/河山大好.txt:
--------------------------------------------------------------------------------
 1 |  作曲 : 许嵩
 2 |  作词 : 许嵩
 3 | 最近亚健康状态
 4 | 坐久了腿发软
 5 | 电脑看久了脖子它也会酸
 6 | 数字时代貌似把生活节奏加快
 7 | 也让人变得行动迟缓
 8 | 
 9 | 忙忙忙
10 | 忙出个什么所以然
11 | 地球离了谁它都照样公转自转
12 | 叹叹叹
13 | 弹指一挥人生苦短
14 | 终点不明沿途风景要好好看
15 | 你可以隐隐期待
16 | 途中佳缘到来
17 | 保持浪漫心态
18 | 活着就不算坏
19 | 家国大好河山
20 | 不必崇洋媚外
21 | 好地方一生都看不完
22 | 峨眉山庐山黄山嵩山
23 | 抓紧周末带爸爸妈妈去转一转
24 | 北京西安洛阳开封安阳南京杭州
25 | 睹一睹古都的风采
26 | 心情大好 出去走走
27 | 碧海蓝天 吹吹风
28 | 河山大好 出去走走
29 | 别窝在家 当懒虫
30 | 心情大好 出去走走
31 | 碧海蓝天 吹吹风
32 | 河山大好 出去走走
33 | 别窝在家 当懒虫
34 | 
35 | 忙忙忙
36 | 忙出个什么所以然
37 | 叹叹叹
38 | 弹指一挥人生苦短
39 | 终点不明沿途风景要好好看
40 | 你可以隐隐期待
41 | 途中佳缘到来
42 | 保持浪漫心态
43 | 活着就不算坏
44 | 家国大好河山
45 | 不必崇洋媚外
46 | 好地方一生都看不完
47 | 峨眉山庐山黄山嵩山
48 | 抓紧周末带爸爸妈妈去转一转
49 | 北京西安洛阳开封安阳南京杭州
50 | 睹一睹古都的风采
51 | 心情大好 出去走走
52 | 碧海蓝天 吹吹风
53 | 河山大好 出去走走
54 | 别窝在家 当懒虫
55 | 心情大好 出去走走
56 | 碧海蓝天 吹吹风
57 | 河山大好 出去走走
58 | 别窝在家 当懒虫
59 | 
60 | 心情大好 河山大好
61 | 心情大好 河山大好
62 | 心情大好 出去走走
63 | 碧海蓝天 吹吹风
64 | 河山大好 出去走走
65 | 只不过是 河山大好
66 | 


--------------------------------------------------------------------------------
/WangYi_Music/歌词/清明雨上.txt:
--------------------------------------------------------------------------------
 1 |  作曲 : 许嵩
 2 |  作词 : 许嵩
 3 | 窗透初晓 日照西桥 云自摇
 4 | 想你当年荷风微摆的衣角
 5 | 木雕流金 岁月涟漪 七年前封笔
 6 | 因为我今生挥毫只为你
 7 | 雨打湿了眼眶 年年倚井盼归堂
 8 | 最怕不觉泪已拆两行
 9 | 我在人间彷徨 寻不到你的天堂
10 | 东瓶西镜放 恨不能遗忘
11 | 又是清明雨上 折菊寄到你身旁
12 | 把你最爱的歌来轻轻唱
13 | 远方有琴 愀然空灵 声声催天雨
14 | 涓涓心事说给自己听
15 | 月影憧憧 烟火几重 烛花红
16 | 红尘旧梦 梦断都成空
17 | 雨打湿了眼眶 年年倚井盼归堂
18 | 最怕不觉泪已拆两行
19 | 我在人间彷徨 寻不到你的天堂
20 | 东瓶西镜放 恨不能遗忘
21 | 又是清明雨上 折菊寄到你身旁
22 | 把你最爱的歌来轻轻唱
23 | 我在人间彷徨 寻不到你的天堂
24 | 东瓶西镜放 恨不能遗忘
25 | 又是清明雨上 折菊寄到你身旁
26 | 把你最爱的歌来轻轻唱
27 | 
28 | 
29 | 
30 | 


--------------------------------------------------------------------------------
/WangYi_Music/歌词/灰色头像.txt:
--------------------------------------------------------------------------------
 1 | 
 2 |  作曲 : 许嵩
 3 |  作词 : 许嵩
 4 | 昨夜做了一个梦
 5 | 梦里我们回到手牵着手
 6 | 醒来的失落 无法言说
 7 | 打开了OICQ
 8 | 聊天记录停步去年的深秋
 9 | 最后的挽留 没有说出口
10 | 我们还是朋友
11 | 是那种最遥远的朋友
12 | 你给过的温柔
13 | 在记录之中 全部都保有
14 | 你灰色头像不会再跳动
15 | 哪怕是一句简单的问候
16 | 心贴心的交流一页页翻阅多难过
17 | 是什么 坠落 升空
18 | 又想起你曾说的陪我到最后
19 | 暖色的梦变冰凉的枷锁
20 | 如果时光倒流我们又能抓得住什么
21 | 打开了OICQ
22 | 聊天记录停步去年的深秋
23 | 最后的挽留 没有说出口
24 | 我们还是朋友
25 | 是那种最遥远的朋友
26 | 你给过的温柔
27 | 在记录之中 全部都保有
28 | 你灰色头像不会再跳动
29 | 哪怕是一句简单的问候
30 | 心贴心的交流一页页翻阅多难过
31 | 是什么 坠落 升空
32 | 又想起你曾说的陪我到最后
33 | 暖色的梦变冰凉的枷锁
34 | 如果时光倒流我们又能抓得住什么
35 | 当我发现所谓醒来其实是另一个梦 你不在这世界
36 | 梦的出口散不开的浓雾太沉重 你不在这世界
37 | 就算当初声嘶力竭作苦苦的求你留下别走
38 | 也没用
39 | 灰色头像静静悄悄不会再跳动
40 | 我的绝望溢出胸口
41 | 是什么 坠落 升空
42 | 你灰色头像不会再跳动
43 | 暖色的梦变冰凉的枷锁
44 | 如果时光倒流我们又能抓得住什么
45 | 


--------------------------------------------------------------------------------
/WangYi_Music/歌词/玫瑰花的葬礼.txt:
--------------------------------------------------------------------------------
 1 |  作曲 : 李毅杰
 2 |  作词 : 许嵩/李毅杰
 3 | 离开你一百个星期
 4 | 我回到了这里
 5 | 寻找我们爱过的证据
 6 | 没有人愿意提起
 7 | 玫瑰花它的过去
 8 | 今天这里的主题
 9 | 我把它叫作 回忆
10 | 我知道 爱情这东西
11 | 他没什么道理
12 | 过去我和你在一起
13 | 是我太叛逆
14 | 现在只剩我自己
15 | 偷偷的想你
16 | 
17 | 玫瑰花的葬礼
18 | 埋葬关于你的回忆
19 | 感觉双手麻痹
20 | 不能自已
21 | 已拉不住你
22 | 
23 | 真的好美丽
24 | 那天的烟花雨
25 | 
26 | 我说要娶穿碎花洋裙的你
27 | 
28 | 玫瑰花的葬礼
29 | 埋葬深深爱着的你
30 | 
31 | 残朵停止呼吸
32 | 渗入大地
33 | 没人会注意
34 | 
35 | 一片小雨滴
36 | 陪着我等天明
37 | 
38 | 我用这最后一分钟怀念你
39 | 
40 | 我在夜幕笼罩的天桥上潜行
41 | 每一级阶梯
42 | 都留着你我昔日印迹
43 | 温存迷醉 吵闹清醒
44 | 都还在我的脚畔
45 | 兜兜兜兜兜转不清
46 | 没来得及把红色玫瑰递给你
47 | 爱就像是一场雨
48 | 已经离我而去
49 | 你说过
50 | 太过鲜艳的爱情 终将凋零
51 | 
52 | 玫瑰花的葬礼
53 | 埋葬关于你的回忆
54 | 感觉双手麻痹
55 | 不能自已
56 | 已拉不住你
57 | 真的好美丽
58 | 那天的烟花雨
59 | 我说要娶穿碎花洋裙的你
60 | 
61 | 玫瑰花的葬礼
62 | 埋葬深深爱着的你
63 | 残朵停止呼吸
64 | 渗入大地
65 | 没人会注意
66 | 
67 | 一片小雨滴
68 | 陪着我等天明
69 | 
70 | 我用这最后一分钟怀念你
71 | 
72 | 总是回想过去 埋怨我自己
73 | 
74 | 总是不经意间 想起了你
75 | 
76 | 现在的你 已经太遥不可及
77 | 
78 | 只能留在我记忆
79 | 玫瑰花的葬礼
80 | 埋葬关于你的回忆
81 | 感觉双手麻痹
82 | 不能自已
83 | 已拉不住你
84 | 
85 | 真的好美丽
86 | 那天的烟花雨
87 | 我说要娶穿碎花洋裙的你
88 | 
89 | 玫瑰花的葬礼
90 | 埋葬深深爱着的你
91 | 残朵停止呼吸
92 | 渗入大地
93 | 没人会注意
94 | 一片小雨滴
95 | 陪着我等天明
96 | 我用这最后一分钟怀念你
97 | 
98 | 我用这最后一分钟
99 | 


--------------------------------------------------------------------------------
/WangYi_Music/歌词/素颜.txt:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 |  作曲 : 许嵩
 4 |  作词 : 许嵩
 5 | 又是一个安静的晚上
 6 | 一个人窝在摇椅里乘凉
 7 | 我承认这样真的很安详
 8 | 和楼下老爷爷一样
 9 | 听说你还在搞什么原创
10 | 搞来搞去好像也就这样
11 | 不如花点时间想想
12 | 琢磨一下模样
13 | 今夜化了美美的妆
14 | (我相信是很美美的妆)
15 | 我摇晃在舞池中央
16 | (那种体态可以想象)
17 | 我做我的改变 又何必纠结
18 | 那就拜托别和我碰面
19 | 如果再看你一眼
20 | 是否还会有感觉
21 | 当年素面朝天要多纯洁就有多纯洁
22 | 不画扮熟的眼线
23 | 不用抹匀粉底液
24 | 暴雨天 照逛街
25 | 偷笑别人花了脸
26 | 如果再看你一眼
27 | 是否还会有感觉
28 | 最真实的喜怒哀乐全都埋葬在昨天
29 | 不掺任何的表演
30 | 轰轰烈烈那几年
31 | 我怀念 别怀念
32 | 怀念也回不到从前
33 | 又是一个安静的晚上
34 | 一个人窝在摇椅里乘凉
35 | 我承认这样真的很安详
36 | 和楼下老爷爷一样
37 | 听说你还在搞什么原创
38 | 搞来搞去好像也就这样
39 | 不如花点时间想想
40 | 琢磨一下模样
41 | 今夜化了美美的妆
42 | (我相信是很美美的妆)
43 | 我摇晃在舞池中央
44 | (那种体态可以想象)
45 | 我做我的改变 又何必纠结
46 | 那就拜托别和我碰面
47 | 如果再看你一眼
48 | 是否还会有感觉
49 | 当年素面朝天要多纯洁就有多纯洁
50 | 不画扮熟的眼线
51 | 不用抹匀粉底液
52 | 暴雨天 照逛街
53 | 偷笑别人花了脸
54 | 如果再看你一眼
55 | 是否还会有感觉
56 | 最真实的喜怒哀乐全都埋葬在昨天
57 | 不掺任何的表演
58 | 轰轰烈烈那几年
59 | 我怀念 别怀念
60 | 怀念也回不到从前
61 | 曾经对上的瞬间
62 | 难道是一种错觉
63 | 那些流逝了的就永远不会复现
64 | 不掺任何的表演
65 | 轰轰烈烈那几年
66 | 有遗憾的感觉 为何感觉
67 | 那消失不见的素颜
68 | 


--------------------------------------------------------------------------------
/WangYi_Music/歌词/认错.txt:
--------------------------------------------------------------------------------
 1 |  作曲 : 许嵩
 2 |  作词 : 许嵩
 3 | 那天午后 我站在你家门口
 4 | 你咬咬嘴唇还是说出了分手
 5 | 我的挽留和眼泪全都没有用
 6 | 或许我应该自食这苦果
 7 | 
 8 | 你的迁就 我一直领悟不够
 9 | 以为爱已强大的不要理由
10 | 心开始颤抖 明白了你的难受
11 | 但你的表情已经冷漠
12 | 
13 | 全是我的错
14 | 现在认错有没有用
15 | 你说你已经不再爱我
16 | 我带你回忆曾经快乐的时空
17 | 你只是劝我别再执着
18 | 
19 | 全是我的错
20 | 现在认错有没有用
21 | 你说你喜欢如今的生活
22 | 你带我回忆爱里互相的折磨
23 | 还告诉了我 别再来认错 认结果
24 | 
25 | 那天午后 我站在你家门口
26 | 你咬咬嘴唇还是说出了分手
27 | 我的挽留和眼泪全都没有用
28 | 或许我应该自食这苦果
29 | 
30 | 你的迁就 我一直领悟不够
31 | 以为爱已强大的不要理由
32 | 心开始颤抖 明白了你的难受
33 | 但你的表情已经冷漠
34 | 
35 | 全是我的错
36 | 现在认错有没有用
37 | 你说你已经不再爱我
38 | 我带你回忆曾经快乐的时空
39 | 你只是劝我别再执着
40 | 
41 | 全是我的错
42 | 现在认错有没有用
43 | 你说你喜欢如今的生活
44 | 你带我回忆爱里互相的折磨
45 | 还告诉了我 别再来认错
46 | 
47 | 全是我的错
48 | 现在认错有没有用
49 | 你说你已经不再爱我
50 | 我带你回忆曾经快乐的时空
51 | 你只是劝我别再执着
52 | 
53 | 全是我的错
54 | 现在认错有没有用
55 | 你说你喜欢如今的生活
56 | 你带我回忆爱里互相的折磨
57 | 还告诉了我 别再来认错 认结果
58 | 
59 | 


--------------------------------------------------------------------------------
/WangYi_Music/歌词/违章动物.txt:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | 
 4 | 
 5 |  作曲 : 许嵩
 6 |  作词 : 许嵩
 7 | 我目睹街角的蝴蝶飞上了青天
 8 | 要上访青天大老爷
 9 | 相关衙门提出了一些指导性意见
10 | 街坊玩命转播真相与流言
11 | 卖红薯的姑娘想在学堂门前摆摊
12 | 那不可以没钱 也不可以不陪笑脸
13 | 有点小权的 时时刻刻都想要用上小权
14 | 而有大权的 脑子坏了才和你站一边
15 | 一群高贵气质的差人在处罚违章动物
16 | 她一身尘土 在街角迷了路
17 | 一群高贵气质的差人在处罚违章动物
18 | 缄默的泪 没有人在乎
19 | 这繁华的城池有时让人感到陌生
20 | 当乌云不断堆叠 暴雨也就如期而至
21 | 幸福 的定义连番升级
22 | 拒绝回到初始版本
23 | 就买个红薯吧 否则夜太寒冷
24 | 一群高贵气质的差人在处罚违章动物
25 | 她一身尘土 在街角迷了路
26 | 一群高贵气质的差人在处罚违章动物
27 | 缄默的泪 没有人在乎
28 | 一群高贵气质的差人在处罚违章动物
29 | 她一身尘土 在街角迷了路
30 | 一群高贵气质的差人在处罚违章动物
31 | 缄默的泪 汇成这方土地的湖
32 | 


--------------------------------------------------------------------------------
/WangYi_Music/歌词/雅俗共赏.txt:
--------------------------------------------------------------------------------
 1 | 
 2 |  作曲 : 许嵩
 3 |  作词 : 许嵩
 4 | 是否每一部戏都看得完整场
 5 | 是否每一天过得都有多难忘
 6 | 表情迟钝可能因为比较爱想
 7 | 不擅长眉目表达
 8 | 总在盼望 总在失望
 9 | 日子还不都这样
10 | 俗的无畏 雅的轻狂
11 | 还不都是一副臭皮囊
12 | 他们说快写一首情歌雅俗共赏
13 | 落笔传神还要容易传唱
14 | 上得厅堂也下得厨房
15 | 就像我一直在找的姑娘
16 | 快写一首情歌雅俗共赏
17 | 打完字谜还要接着打榜
18 | 如果胡同弄堂全都播放
19 | 气韵里居然添了些孤芳自赏
20 | 是否每一场美梦醒来都很爽
21 | 是否每一次成熟都徒增了业障
22 | 比痛和痒更多的
23 | 是不痛不痒
24 | 所以我爱进剧场
25 | 总在盼望 总在失望
26 | 日子还不都这样
27 | 俗的无畏 雅的轻狂
28 | 还不都是一副臭皮囊
29 | 他们说快写一首情歌雅俗共赏
30 | 落笔传神还要容易传唱
31 | 上得厅堂也下得厨房
32 | 就像我一直在找的姑娘
33 | 快写一首情歌雅俗共赏
34 | 打完字谜还要接着打榜
35 | 如果胡同弄堂全都播放
36 | 气韵里居然添了些孤芳自赏
37 | 谁的故事有营养
38 | 大俗或大雅的都在理直气壮
39 | 洒狗血或白雪的现场
40 | 都邀我观赏
41 | 还真是大方
42 | 快写一首情歌雅俗共赏
43 | 落笔传神还要容易传唱
44 | 上得厅堂也下得厨房
45 | 就像我一直在找的姑娘
46 | 有没有一种生活雅俗共赏
47 | 情节起伏跌宕让人向往
48 | 满纸荒唐中窥见满脸沧桑
49 | 触到神经就要懂得鼓掌
50 | 别说一不在乎二没期望
51 | 太超脱 中枪中奖感觉会一样
52 | 


--------------------------------------------------------------------------------
/coffee.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/why19970628/Python_Crawler/23ba5cf5ad12d5d0f9f3d2376c0c0ea32fc3d2de/coffee.png


--------------------------------------------------------------------------------
/dangdang_book/README.md:
--------------------------------------------------------------------------------
 1 | # scrapy-redis-dangdang.cm
 2 | scrapy-redis分布式爬虫，爬取当当网图书信息
 3 | 
 4 | 前期的准备
 5 | 虚拟机下乌班图下redis：url去重，持久化
 6 | mongodb：保存数据
 7 | PyCharm：写代码
 8 | 谷歌浏览器：分析要提取的数据
 9 | 爬取图书每个分类下的小分类下的图书信息（分类标题，小分类标题，图书标题，作者，图书简介，价格，电子书价格，出版社，封面，图书链接）
10 | 思路：按每个大分类分组，再按小分类分组，再按每本书分组，最后提取数据
11 | 


--------------------------------------------------------------------------------
/dangdang_book/dangdang_book/items.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | # Define here the models for your scraped items
 4 | #
 5 | # See documentation in:
 6 | # https://docs.scrapy.org/en/latest/topics/items.html
 7 | 
 8 | import scrapy
 9 | 
10 | 
11 | class DangdangBookItem(scrapy.Item):
12 |     # define the fields for your item here like:
13 |     # item = scrapy.Field()
14 |     pass
15 | 


--------------------------------------------------------------------------------
/dangdang_book/dangdang_book/middlewares.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | # Define here the models for your spider middleware
 4 | #
 5 | # See documentation in:
 6 | # https://docs.scrapy.org/en/latest/topics/spider-middleware.html
 7 | 
 8 | import random
 9 | 
10 | 
11 | class DangdangBookDownloaderMiddleware:
12 | 
13 |     def process_request(self, request, spider):
14 |         """添加随机UA跟代理IP"""
15 |         ua = random.choice(spider.settings.get("UA_LIST"))
16 |         request.headers["User-Agent"] = ua
17 | 
18 |         # request.meta["proxy"] = "https://125.115.126.114:888"
19 | 
20 |     def process_response(self, request, response, spider):
21 |         """查看UA有没有设置成功"""
22 |         # print("777", request.headers["User-Agent"])
23 |         return response
24 | 
25 | 
26 | 


--------------------------------------------------------------------------------
/dangdang_book/dangdang_book/pipelines.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | # Define your item pipelines here
 4 | #
 5 | # Don't forget to add your pipeline to the ITEM_PIPELINES setting
 6 | # See: https://docs.scrapy.org/en/latest/topics/item-pipeline.html
 7 | import pymysql
 8 | db = pymysql.connect("localhost", "root", "root123", "chat")
 9 | # db = client["dangdang_db"]
10 | cursor = db.cursor()
11 | 
12 | 
13 | class DangdangBookPipeline:
14 |     def process_item(self, item, spider):
15 |         """保存数据到mongodb"""
16 |         print("8888"*10, item)
17 |         sql = "insert into dangdang(content) values(%s)" % item
18 |         print(sql)
19 |         print("666")
20 |         cursor.execute(sql)
21 |         db.commit()
22 |         db.close
23 |         return item
24 | 


--------------------------------------------------------------------------------
/dangdang_book/dangdang_book/settings.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | # Scrapy settings for dangdang_book project
 4 | #
 5 | # For simplicity, this file contains only settings considered important or
 6 | # commonly used. You can find more settings consulting the documentation:
 7 | #
 8 | #     https://docs.scrapy.org/en/latest/topics/settings.html
 9 | #     https://docs.scrapy.org/en/latest/topics/downloader-middleware.html
10 | #     https://docs.scrapy.org/en/latest/topics/spider-middleware.html
11 | 
12 | BOT_NAME = 'dangdang_book'
13 | 
14 | SPIDER_MODULES = ['dangdang_book.spiders']
15 | NEWSPIDER_MODULE = 'dangdang_book.spiders'
16 | 
17 | # 一个去重的类，用来将url去重
18 | DUPEFILTER_CLASS = "scrapy_redis.dupefilter.RFPDupeFilter"
19 | # 一个队列
20 | SCHEDULER = "scrapy_redis.scheduler.Scheduler"
21 | # 是否持久化
22 | SCHEDULER_PERSIST = True
23 | # redis地址
24 | REDIS_URL = "redis://127.0.0.1:6379"
25 | # REDIS_HOST = '127.0.0.1'
26 | # REDIS_PORT = 6379
27 | 
28 | 
29 | LOG_LEVEL = "DEBUG"
30 | # user-agent
31 | UA_LIST = [
32 |     "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/22.0.1207.1 Safari/537.1",
33 |     "Mozilla/5.0 (X11; CrOS i686 2268.111.0) AppleWebKit/536.11 (KHTML, like Gecko) Chrome/20.0.1132.57 Safari/536.11",
34 |     "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.6 (KHTML, like Gecko) Chrome/20.0.1092.0 Safari/536.6",
35 |     "Mozilla/5.0 (Windows NT 6.2) AppleWebKit/536.6 (KHTML, like Gecko) Chrome/20.0.1090.0 Safari/536.6",
36 |     "Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/19.77.34.5 Safari/537.1",
37 |     "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/536.5 (KHTML, like Gecko) Chrome/19.0.1084.9 Safari/536.5",
38 |     "Mozilla/5.0 (Windows NT 6.0) AppleWebKit/536.5 (KHTML, like Gecko) Chrome/19.0.1084.36 Safari/536.5",
39 |     "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1063.0 Safari/536.3",
40 |     "Mozilla/5.0 (Windows NT 5.1) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1063.0 Safari/536.3",
41 |     "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; Trident/4.0; SE 2.X MetaSr 1.0; SE 2.X MetaSr 1.0; .NET CLR 2.0.50727; SE 2.X MetaSr 1.0)",
42 |     "Mozilla/5.0 (Windows NT 6.2) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1062.0 Safari/536.3",
43 |     "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1062.0 Safari/536.3",
44 |     "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; 360SE)",
45 |     "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1061.1 Safari/536.3",
46 |     "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1061.1 Safari/536.3",
47 |     "Mozilla/5.0 (Windows NT 6.2) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1061.0 Safari/536.3",
48 |     "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/535.24 (KHTML, like Gecko) Chrome/19.0.1055.1 Safari/535.24",
49 |     "Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/535.24 (KHTML, like Gecko) Chrome/19.0.1055.1 Safari/535.24"
50 | ]
51 | 
52 | # Obey robots.txt rules
53 | ROBOTSTXT_OBEY = False
54 | 
55 | # 下载延迟
56 | DOWNLOAD_DELAY = 0
57 | 
58 | # The download delay setting will honor only one of:
59 | # Enable or disable downloader middlewares
60 | # See https://docs.scrapy.org/en/latest/topics/downloader-middleware.html
61 | DOWNLOADER_MIDDLEWARES = {
62 |     'dangdang_book.middlewares.DangdangBookDownloaderMiddleware': 543,
63 | }
64 | 
65 | # Configure item pipelines
66 | ITEM_PIPELINES = {
67 |     # 'dangdang_book.pipelines.DangdangBookPipeline': 300,
68 |     'scrapy_redis.pipelines.RedisPipeline': 300
69 | }
70 | 
71 | 


--------------------------------------------------------------------------------
/dangdang_book/dangdang_book/spiders/__init__.py:
--------------------------------------------------------------------------------
1 | # This package will contain the spiders of your Scrapy project
2 | #
3 | # Please refer to the documentation for information on how to create and manage
4 | # your spiders.
5 | 


--------------------------------------------------------------------------------
/dangdang_book/dangdang_book/spiders/dd_book.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | import scrapy
 3 | # 额外导入以下类
 4 | from scrapy_redis.spiders import RedisSpider
 5 | from copy import deepcopy
 6 | import time
 7 | # 继承导入的类
 8 | class DdBookSpider(RedisSpider):
 9 |     name = 'dd_book'
10 |     allowed_domains = ['dangdang.com']
11 |     redis_key = "dd_book"   # redis中插入（lpush dd_book http://category.dangdang.com/?ref=www-0-C）
12 |     # start_urls = ["http://category.dangdang.com/"]
13 |     def parse(self, response):
14 |         """图书大类"""
15 |         # 先分组
16 |         div_list = response.xpath('//div[@class="classify_books"]/div[@class="classify_kind"]')
17 |         for div in div_list:
18 |             item = {}
19 |             item["大标题"] = div.xpath('.//a/text()').extract_first()
20 |             li_list = div.xpath('.//ul[@class="classify_kind_detail"]/li')
21 |             for li in li_list:
22 |                 item["小标题"] = li.xpath('./a/text()').extract_first()
23 |                 sm_url = li.xpath('./a/@href').extract_first()
24 |                 #print(sm_url, item["小标题"])
25 |                 time.sleep(2)
26 | 
27 |                 # 请求详情页
28 |                 if sm_url != "javascript:void(0);":
29 |                     print("请求详情页:" ,sm_url)
30 |                     yield scrapy.Request(sm_url, callback=self.book_details, meta={"item": deepcopy(item)})
31 | 
32 |     def book_details(self, response):
33 |         """提取图书数据"""
34 |         item = response.meta["item"]
35 |         # 给每本书分组
36 |         li_list = response.xpath('//ul[@class="bigimg"]/li')
37 |         for li in li_list:
38 |             item["图书标题"] = li.xpath('./a/@title').extract_first()
39 |             item["作者"] = li.xpath('./p[@class="search_book_author"]/span[1]/a/@title').extract_first()
40 |             item["图书简介"] = li.xpath('./p[@class="detail"]/text()').extract_first()
41 |             item["价格"] = li.xpath('./p[@class="price"]/span[@class="search_now_price"]/text()').extract_first()
42 |             item["电子书价格"] = li.xpath('./p[@class="price"]/a[@class="search_e_price"]/i/text()').extract_first()
43 |             item["日期"] = li.xpath('./p[@class="search_book_author"]/span[2]/text()').extract_first()
44 |             item["出版社"] = li.xpath('./p[@class="search_book_author"]/span[3]/a/@title').extract_first()
45 |             item["图片"] = li.xpath('./a/img/@src').extract_first()
46 |             item["图书链接"] = li.xpath('./a/@href').extract_first()
47 | 
48 |             yield item
49 | 
50 |         # 翻页
51 |         next_url = response.xpath('//a[text()="下一页"]/@href').extract_first()
52 |         if next_url is not None:
53 |             next_url = "http://category.dangdang.com" + next_url
54 |             yield scrapy.Request(next_url, callback=self.book_details, meta={"item": deepcopy(item)})
55 | 
56 | # lpush dd_book http://category.dangdang.com/?ref=www-0-C
57 | 
58 | # ╭─mac@huayang ~/Stardust/scrapy_project/scrapy-redis-dangdang.cm-master  
59 | # ╰─➤  PYTHONPATH=$(pwd) python3 -m scrapy runspider spiders/dd_book.py


--------------------------------------------------------------------------------
/dangdang_book/scrapy.cfg:
--------------------------------------------------------------------------------
 1 | # Automatically created by: scrapy startproject
 2 | #
 3 | # For more information about the [deploy] section see:
 4 | # http://doc.scrapy.org/topics/scrapyd.html
 5 | 
 6 | [settings]
 7 | default = dangdang_book.settings
 8 | 
 9 | # [deploy]
10 | # #url = http://localhost:6800/
11 | # project = example
12 | 


--------------------------------------------------------------------------------
/ele_me/1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/why19970628/Python_Crawler/23ba5cf5ad12d5d0f9f3d2376c0c0ea32fc3d2de/ele_me/1.png


--------------------------------------------------------------------------------
/ele_me/README.md:
--------------------------------------------------------------------------------
 1 | ﻿爬取饿了么某地区的外卖信息
 2 | 
 3 | 数据生成josn格式的csv文件，生成词云以及食物信息统计图
 4 | 
 5 | 博客地址：https://blog.csdn.net/weixin_43746433
 6 | 
 7 | 爬虫：https://blog.csdn.net/weixin_43746433/article/details/91906540
 8 | 
 9 | 微信：why19970628
10 | 
11 | 欢迎与我交流
12 | 


--------------------------------------------------------------------------------
/ele_me/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/why19970628/Python_Crawler/23ba5cf5ad12d5d0f9f3d2376c0c0ea32fc3d2de/ele_me/__init__.py


--------------------------------------------------------------------------------
/ele_me/eleme_bar.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/why19970628/Python_Crawler/23ba5cf5ad12d5d0f9f3d2376c0c0ea32fc3d2de/ele_me/eleme_bar.png


--------------------------------------------------------------------------------
/ele_me/eleme_wordcloud.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/why19970628/Python_Crawler/23ba5cf5ad12d5d0f9f3d2376c0c0ea32fc3d2de/ele_me/eleme_wordcloud.png


--------------------------------------------------------------------------------
/ele_me/elemedata.csv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/why19970628/Python_Crawler/23ba5cf5ad12d5d0f9f3d2376c0c0ea32fc3d2de/ele_me/elemedata.csv


--------------------------------------------------------------------------------
/ele_me/fooddic.txt:
--------------------------------------------------------------------------------
1 | 黄焖鸡


--------------------------------------------------------------------------------
/finance.eastmoney.com/README.md:
--------------------------------------------------------------------------------
1 | ﻿之前帮客户做的爬虫, 爬取东方财富网的每日的股票、可转债的数据
2 | 
3 | 网址：http://finance.eastmoney.com/
4 | 
5 | 数据：每日运行生成一个csv文件
6 | 
7 | 博客地址：https://blog.csdn.net/weixin_43746433
8 | 
9 | 测试：代码截止2020/04/23测试无误


--------------------------------------------------------------------------------
/finance.eastmoney.com/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/why19970628/Python_Crawler/23ba5cf5ad12d5d0f9f3d2376c0c0ea32fc3d2de/finance.eastmoney.com/__init__.py


--------------------------------------------------------------------------------
/finance.eastmoney.com/可还债/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/why19970628/Python_Crawler/23ba5cf5ad12d5d0f9f3d2376c0c0ea32fc3d2de/finance.eastmoney.com/可还债/__init__.py


--------------------------------------------------------------------------------
/finance.eastmoney.com/股票/gupiao.py:
--------------------------------------------------------------------------------
  1 | import re
  2 | import os
  3 | import requests
  4 | import json
  5 | import pandas as pd
  6 | 
  7 | pd.set_option('display.max_columns', None)
  8 | pd.set_option('display.max_rows', None)
  9 | pd.set_option('display.unicode.ambiguous_as_wide', True)
 10 | pd.set_option('display.unicode.east_asian_width', True)
 11 | pd.set_option('display.width', 5000)
 12 | 
 13 | # 字符类型的时间:
 14 | def get_time(time_str):
 15 |     # 转为时间数组
 16 |     timeArray = time.strptime(time_str, "%Y%m%d")
 17 |     # 转为时间戳
 18 |     timeStamp = int(time.mktime(timeArray))
 19 |     return timeStamp
 20 | 
 21 | # 坐拥: 解析每个网页的数据
 22 | # 输入:字符与每个网页所需的地址,请求的参数
 23 | # 输出: 网页解析所获得的股票数据
 24 | def HTML(time_str,url, params):
 25 |     gupiao_list = []
 26 |     headers = {
 27 |         "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.149 Safari/537.36"}
 28 |     try:
 29 |         r = requests.get(url, headers=headers, timeout=30, params=params)
 30 |         r.raise_for_status()
 31 |         r.encoding = r.apparent_encoding
 32 |         html = r.text
 33 |     except Exception as e:
 34 |         print("wrong:" + e)
 35 |     # pat = re.compile("\[\{.*?\}\]")
 36 |     pat = re.compile("({.*?})")
 37 |     data = pat.findall(html)
 38 |     # print(data)
 39 |     js = []
 40 |     for d in data:
 41 |         try:
 42 |             d1=eval(d+"]}}").get("data").get("diff")[0]
 43 |         except:
 44 |             d1 = eval(d)
 45 |         js.append(d1)
 46 |     for i in range(len(js)):
 47 |         zhenfu = str(js[i]["f7"]) + "%"
 48 |         gupiao_list.append((
 49 |             js[i]["f12"], js[i]["f14"], js[i]["f2"], zhenfu, js[i]["f4"], js[i]["f5"], js[i]["f6"],
 50 |                             zhenfu, js[i]["f15"], js[i]["f16"], js[i]["f17"], js[i]["f18"], js[i]["f10"]))
 51 |     title = ["代码", "名称", "最新价", "涨跌幅", "涨跌额", "成交量", "成交额",
 52 |              "振幅", "最高", "最低", "今开", "昨收", "量比"]
 53 |     df = pd.DataFrame(gupiao_list, columns=title)
 54 |     to_csv(df, f"result_{time_str}.csv")
 55 | 
 56 | # 保存csv图片
 57 | def to_csv(df, csv_file):
 58 |     if os.path.exists(csv_file) == False:
 59 |         df.to_csv(csv_file, index=False)
 60 |     else:
 61 |         df.to_csv(csv_file, mode='a+', header=False, index=False)
 62 | 
 63 | 
 64 | import time
 65 | # 主函数入
 66 | # 输入:时间与时间字符
 67 | # 输出:解析网页 所需的header请求
 68 | def main(time_str,time_):
 69 |     time_ = str(time_) +"000"
 70 |     # 爬出249个网页
 71 |     for i in range(1, 250):
 72 |         print(i)
 73 |         url = 'http://push2.eastmoney.com/api/qt/clist/get'
 74 |         params = {
 75 |             'cb': f'jQuery112407955974158503321_{str(time_)}',
 76 |             'pn': str(i),
 77 |             'pz': '20',
 78 |             'po': '1',
 79 |             'np': '1',
 80 |             'ut': 'bd1d9ddb04089700cf9c27f6f7426281',
 81 |             'fltt': '2',
 82 |             'invt': '2',
 83 |             'fid': 'f3',
 84 |             'fs': 'm:0 t:6,m:0 t:13,m:0 t:80,m:1 t:2,m:1 t:23',
 85 |             'fields': 'f1,f2,f3,f4,f5,f6,f7,f8,f9,f10,f12,f13,f14,f15,f16,f17,f18,f20,f21,f23,f24,f25,f22,f11,f62,f128,f136,f115,f152',
 86 |             '_': str(time_)
 87 |         }
 88 |         # 解析网页入口
 89 |         HTML(time_str, url,params)
 90 |         # 睡眠
 91 |         time.sleep(6)
 92 | 
 93 | 
 94 | if __name__ == '__main__':
 95 |     # 输入时间
 96 |     for time_str in ["20200417"]:
 97 |         time_ = get_time(time_str)
 98 |         # 程序入口
 99 |         main(time_str,time_)
100 | 


--------------------------------------------------------------------------------
/live.bible.is.com/README.md:
--------------------------------------------------------------------------------
1 | 下载http://www.bible.is 音频的文字内容，一种类型对应一个 dictionary， 一个音频对应一个txt，有断点续传的功能
2 | 
3 | 网址: http://www.bible.is/radio
4 | 
5 | 微信：why19970628
6 | 
7 | 欢迎与我交流
8 | 


--------------------------------------------------------------------------------
/live.bible.is.com/live.bible.is.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf8 -*-
  2 | import re
  3 | from functools import wraps
  4 | import datetime
  5 | import sys
  6 | import json
  7 | import os
  8 | import requests
  9 | from lxml import etree
 10 | from fake_useragent import UserAgent
 11 | from urllib.parse import quote, urlencode
 12 | import urllib
 13 | import time
 14 | import string
 15 | 
 16 | ua = UserAgent()
 17 | 
 18 | 
 19 | 
 20 | def handel_single_country(country_name_en, country_name_folder, country_name):
 21 |     link = f'https://live.bible.is/bible/{country_name_en}/MAT/1?audio_type=audio'
 22 |     print(country_name_folder, link)
 23 |     response = requests.get(link, headers={
 24 |         "User_Agent": ua.chrome})
 25 | 
 26 |     tree = etree.HTML(response.text)
 27 |     vedio_page_urls = tree.xpath('/html/body/script[1]/text()')
 28 | 
 29 |     # chapter = re.findall('"testaments":(.*?),"audioType',
 30 |     #                      str(vedio_page_urls))[0]
 31 |     # chapter = eval(chapter)
 32 |     # parts = [k for k, v in chapter.items() if v == "NT"]
 33 | 
 34 |     chapters = re.findall(r'"\w{2,3}":"\w{2,3}"', str(vedio_page_urls))
 35 |     chapters = chapters[1:]
 36 |     parts = []
 37 |     for chapter in chapters:
 38 |         [k, v] = chapter.replace('"', '').split(':')
 39 |         if k not in parts and v == 'NT':
 40 |             parts.append(k)
 41 |     for part in parts:
 42 |         # success = 0
 43 |         failed = 0
 44 |         for i in range(1, 35):
 45 |             txt_page_url = f'https://live.bible.is/bible/{country_name_en}/{part}/{i}?audio_type=audio'
 46 |             if failed >= 5:
 47 |                 break
 48 |             try:
 49 |                 print(txt_page_url)
 50 |                 page_response = requests.get(txt_page_url, headers={
 51 |                                         "User_Agent": ua.chrome})
 52 |                 title = re.findall('book-chapter-text">(.+)</h1', page_response.text)[0]
 53 |                 title = title.strip().split(" ")[0]
 54 |                 part_folder = os.path.join(country_name_folder, title)
 55 |                 os.makedirs(part_folder, exist_ok=True)
 56 | 
 57 | 
 58 |                 txt_save_path = os.path.join(part_folder, str(i) + '.txt')
 59 |                 print(txt_save_path)
 60 | 
 61 |                 get_txt_page(page_response, txt_save_path)
 62 |                 time.sleep(2)
 63 |             except Exception as e:
 64 |                 failed += 1
 65 |                 print(e)
 66 | 
 67 | def get_txt_page(response, txt_save_path):
 68 |     # print(response.text)
 69 |     tree = etree.HTML(response.text)
 70 |     # page_content = tree.xpath('//*[@id="text-container-parent"]/div/main/div[1]/div/p/span/text()')
 71 |     page_content_list = tree.xpath('//div//text()')
 72 |     page_content = '' .join(page_content_list)
 73 |     cut_list = ['\xa0', '']
 74 |     for i in cut_list:
 75 |         page_content = re.sub(i, '', page_content)
 76 |     page_content.replace("Copyrighted Material|Learn More©Bible.is, a ministry ofFaith Comes By Hearing®.Terms and Conditions", "")
 77 | 
 78 |     # print(page_content)
 79 |     with open(txt_save_path, 'w', encoding='utf8', newline='') as txt_io:
 80 |         txt_io.write(page_content)
 81 | 
 82 | 
 83 | names = ['瑞典语']
 84 | 
 85 | links = ['SWESFV']
 86 | 
 87 | # links = 'view-source:https://live.bible.is/bible/SWESFV/1TH/1'
 88 | 
 89 | 
 90 | def get_language_links(result_path):
 91 |     for index, link in enumerate(links):
 92 |         print(index, names[index].strip(), link.strip())
 93 |         country_name = names[index].strip()
 94 |         # 每个国家语言的链接
 95 |         country_name_folder = os.path.join(result_path, country_name)
 96 |         handel_single_country(link, country_name_folder, country_name)
 97 |         time.sleep(5)
 98 | 
 99 | 
100 | def process():
101 |     result_path = '/Volumes/Elements/数据交付/bible'
102 |     os.makedirs(result_path, exist_ok=True)
103 |     get_language_links(result_path)
104 | 
105 | 
106 | if __name__ == '__main__':
107 |     process()
108 | 


--------------------------------------------------------------------------------
/minority_language/jike.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import requests
 3 | import docx
 4 | from lxml import etree
 5 | from fake_useragent import UserAgent
 6 | 
 7 | 
 8 | def write_doc(file_path, content_list):
 9 |     if os.path.isfile(file_path):
10 |         doc = docx.Document(file_path)
11 |     else:
12 |         doc = docx.Document()
13 | 
14 |     for content in content_list:
15 |         doc.add_paragraph(content)
16 |     doc.save(file_path)
17 | 
18 | 
19 | ua = UserAgent()
20 | # base_url = "https://www.idnes.cz/sport/archiv/{}"
21 | base_url = "https://www.idnes.cz/finance/archiv/{}"
22 | start = int(input("起始页码>>>"))
23 | end = int(input("终止页码>>>"))
24 | for num in range(start, end + 1):
25 |     print(f"第{num}页")
26 |     headers = ua.chrome
27 |     response = requests.get(base_url.format(str(num)),
28 |                             headers={"User_Agent": ua.chrome})
29 |     tree = etree.HTML(response.text)
30 |     article_list = tree.xpath('//div[@class="art"]/a/@href')
31 |     for article in article_list:
32 |         article_url = article
33 |         print(article_url)
34 |         for i in range(5):
35 |             try:
36 |                 article_detail = requests.get(
37 |                     article_url, headers={"User_Agent": ua.chrome})
38 |                 if article_detail.status_code == 200:
39 |                     break
40 |             except requests.exceptions.ProxyError:
41 |                 continue
42 |         else:
43 |             print(f"{article_url}\t\t失败!!!")
44 |             continue
45 |         article_tree = etree.HTML(article_detail.text)
46 |         content = article_tree.xpath('//div[@class="bbtext"]/p//text()')
47 |         content = "".join(content).split(".")
48 |         print(content)
49 |         for index, item in enumerate(content):
50 |             if not item:
51 |                 continue
52 |             content[index] = item + "."
53 | 
54 |         try:
55 |             write_doc("D:\stardata\小语种爬虫\捷克语/" + "res3.docx", content)
56 |         except FileNotFoundError:
57 |             print(f"{article_url}\t\t出错了!!!")
58 |             continue
59 |         print(f"{article_url}\t\t完成!!!")
60 |     print(f"第{num}页完成!!!")
61 | 


--------------------------------------------------------------------------------
/minority_language/saier.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import requests
 3 | import docx
 4 | from lxml import etree
 5 | from fake_useragent import UserAgent
 6 | 
 7 | 
 8 | def write_doc(file_path, content_list):
 9 |     if os.path.isfile(file_path):
10 |         doc = docx.Document(file_path)
11 |     else:
12 |         doc = docx.Document()
13 | 
14 |     for content in content_list:
15 |         doc.add_paragraph(content)
16 |     doc.save(file_path)
17 | 
18 | 
19 | 
20 | ua = UserAgent()
21 | # base_url = "https://www.idnes.cz/sport/archiv/{}"
22 | base_url = "https://www.blic.rs/"
23 | 
24 | 
25 | def get_data():
26 |     headers = ua.chrome
27 |     response = requests.get(base_url, headers={"User_Agent": ua.chrome})
28 |     # print(response.text)
29 |     tree = etree.HTML(response.text)
30 |     article_list = tree.xpath('//*[@id="top"]/div[4]/div/nav/ul/li/a/@href')
31 |     print(article_list)
32 |     for article in article_list[1:2]:  # 类型
33 |         article = article.lower()
34 |         print(article)
35 |         type_url = f"https://www.blic.rs/{article}"
36 |         print(type_url)
37 |         article_type_list = requests.get(
38 |             type_url, headers={"User_Agent": ua.chrome})
39 |         res = article_type_list.text
40 |         tree1 = etree.HTML(res)
41 |         page_data = tree1.xpath(
42 |             '//div[@class="pagination__list"]/ul/li/a/@href')[-2]
43 |         page_res = page_data[:8]
44 |         page = page_data[8:]
45 |         print(f"{article}共有{page}页")
46 |         for i in range(1, int(page)+1):  # 遍历每一页
47 |             print(f"{article}类型，第{i}页", "*"*20)
48 |             url = type_url + page_res+str(i)
49 |             article_list = requests.get(
50 |                 url, headers={"User_Agent": ua.chrome}).text
51 |             tree1 = etree.HTML(article_list)
52 |             page_detail_url_list = tree1.xpath(
53 |                 '/html/body/main/div/section/div[2]/section/article/div/h3/a/@href')
54 |             print(page_detail_url_list)
55 |             for detail_url in page_detail_url_list:
56 |                 for i in range(5):
57 |                     try:
58 |                         article_detail = requests.get(
59 |                             detail_url, headers={"User_Agent": ua.chrome})
60 |                         if article_detail.status_code == 200:
61 |                             break
62 |                     except requests.exceptions.ProxyError:
63 |                         continue
64 |                 else:
65 |                     print(f"{detail_url}\t\t失败!!!")
66 |                     continue
67 |                 tree2 = etree.HTML(article_detail.text)
68 |                 content = tree2.xpath(
69 |                     '/html/body/main/div/article/div/div/p/text()')
70 | 
71 |                 content = "".join(content).split(".")
72 |                 print(content)
73 |                 for index, item in enumerate(content):
74 |                     if not item:
75 |                         continue
76 |                     content[index] = item + "."
77 | 
78 |                 try:
79 |                     write_doc("D:\stardata\小语种爬虫\赛尔/" +
80 |                               "res_saier2.docx", content)
81 |                 except FileNotFoundError:
82 |                     print(f"{detail_url}\t\t出错了!!!")
83 |                     continue
84 |                 print(f"{detail_url}\t\t完成!!!")
85 | 
86 | 
87 | if __name__ == '__main__':
88 |     get_data()
89 | 


--------------------------------------------------------------------------------
/reward.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/why19970628/Python_Crawler/23ba5cf5ad12d5d0f9f3d2376c0c0ea32fc3d2de/reward.jpg


--------------------------------------------------------------------------------
/taobao/README.md:
--------------------------------------------------------------------------------
 1 | ﻿爬取淘宝美食的所有页面，mysql需要手动配置数据库。 
 2 | 
 3 | 数据：大约4500条数据，sql为mysql文件，josn文件为Mongodb文件
 4 | 
 5 | 博客地址：https://blog.csdn.net/weixin_43746433
 6 | 
 7 | 爬虫详情：https://blog.csdn.net/weixin_43746433/article/details/97623511
 8 | 
 9 | 后续的数据分析：https://blog.csdn.net/weixin_43746433/article/details/97688169
10 | 
11 | 微信：why19970628
12 | 
13 | 欢迎与我交流
14 | 


--------------------------------------------------------------------------------
/taobao/test.py:
--------------------------------------------------------------------------------
 1 | import requests
 2 | import pprint
 3 | from lxml import etree
 4 | import json
 5 | import urllib
 6 | from  urllib import request
 7 | headers={
 8 | "Accept":"text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3",
 9 | "Accept-Encoding":"gzip, deflate, br",
10 | "Accept-Language":"zh-CN,zh;q=0.9",
11 | "Cache-Control":"max-age=0",
12 | "Connection":"keep-alive",
13 | "Cookie":"t=4f8e0f1ccaf38c3b87096409eeb1fd52; cna=FOKyFcj3bnQCAdocR8ZpjqDM; tracknick=%5Cu6211%5Cu53EB%5Cu738B%5Cu5927%5Cu9633%5Cu554A; lgc=%5Cu6211%5Cu53EB%5Cu738B%5Cu5927%5Cu9633%5Cu554A; tg=0; thw=cn; cookie2=1515a17feb4817e0b121ec61c57bdebd; _tb_token_=f346348eef015; _m_h5_tk=60367eb5c8d7c62be5befdf974af4201_1564038274783; _m_h5_tk_enc=9e7a36f5498b1568032367f248ae47ee; uc3=lg2=UIHiLt3xD8xYTw%3D%3D&id2=UUGrdwHsJB6u%2BQ%3D%3D&nk2=rUszGXlaengSz%2BTL&vt3=F8dBy3zbW%2FWdhdBl7NE%3D; uc4=nk4=0%40r7q1NfecRXnYVq4toteFS9tFPfXPIO4%3D&id4=0%40U2OcR2VRIfPxS27lnuSvz1%2BkOUiV; _cc_=U%2BGCWk%2F7og%3D%3D; enc=4wio677EOfwVE4ZtLJjx3w0OUX9gNfrhOPqVwF%2B6OyFs7QlbFG02LVHBZ7Ap4D9cFy7VZetUXAs0oBAAYBuTDQ%3D%3D; mt=ci=104_1; swfstore=307564; x=e%3D1%26p%3D*%26s%3D0%26c%3D0%26f%3D0%26g%3D0%26t%3D0%26__ll%3D-1%26_ato%3D0; hng=CN%7Czh-CN%7CCNY%7C156; whl=-1%260%260%261564031467353; uc1=cookie14=UoTaHPgonNb53g%3D%3D; v=0; pnm_cku822=098%23E1hvrvvUvbpvUvCkvvvvvjiPRFFvsjiRnLdU1jD2PmPW0jrPP2zO1jDnRLLZtjY8iQhvChCvCCptvpvhphvvv8yCvv3vpvo1y6cQtOyCvvXmp99hetutvpvIphvvvvvvphCvpv3cvvChXZCvjvUvvhL6phvwv9vvBW1vpCQmvvChsvyCvh1hAXyvI1QaUjCwiNoOejaPJHLXSfpAOHCqVUcn%2B3C1osEc6aZtn0vHVA3lYb8rwo1%2Bm7zhdigDN5WK%2BE7reB69EcqhaB4AVAWaUExrvphvCyCCvvvvvvGCvvpvvPMM; l=cBTrwufVqMp97_ASXOCwourza77OSIRAguPzaNbMi_5BU6L1UuQOkVGcNFp6VjWd9hYB4sdB3ey9-etkiWMuGiuXppgF.; isg=BFRUAAxi9T5QyWF9SrxZjDUFJZLGrXiXR5zNBu414F9i2fQjFr1IJwpb2ZFkIbDv",
14 | "Host":"shop130809627.taobao.com",
15 | "Upgrade-Insecure-Requests":"1",
16 | "User-Agent":"Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.142 Safari/537.36",
17 | }
18 | url='https://shop130809627.taobao.com/i/asynSearch.htm?_ksTS=1564031496810_573&callback=jsonp574&mid=w-18789199391-0&wid=18789199391&path=/category.htm&spm=a1z10.3-c-s.w4002-18789199391.25.300342a2PK2mOr&orderType=hotsell_desc'
19 | #req = request.Request(url, headers=headers)
20 | #html = request.urlopen(req).read().decode()
21 | #print(html)
22 | s= requests.session()
23 | res =s.get(url,headers=headers,verify=False)
24 | res.encoding='utf-8'
25 | #res=json.load(res)
26 | html=res.text
27 | print(html)
28 | import re
29 | a=r'.*?<span class=\"c-price\">(.*?)</span></div>.*?'
30 | con=re.compile(a,re.S)
31 | links = re.findall(con, html)
32 | print(links)


--------------------------------------------------------------------------------
/utils/crawlerHelper.py:
--------------------------------------------------------------------------------
 1 | import socket
 2 | import redis
 3 | import urllib
 4 | import sys
 5 | from functools import wraps
 6 | import datetime
 7 | import os
 8 | import requests
 9 | from lxml import etree
10 | from fake_useragent import UserAgent
11 | from urllib.parse import quote, urlencode
12 | import time
13 | import string
14 | import socket
15 | socket.setdefaulttimeout(6)
16 | 
17 | 
18 | def con_redis():
19 |     # 连接池
20 |     pool = redis.ConnectionPool(
21 |         host="123.56.153.183", port=6379, max_connections=1024)
22 |     conn = redis.Redis(connection_pool=pool)
23 |     return conn
24 | 
25 | 
26 | def download(url, filename, callback):
27 |     """
28 |     封装了 urlretrieve()的自定义函数，递归调用urlretrieve(),当下载失败时，重新下载，三次下载失败结束
29 |     download file from internet
30 |     :param url: path to download from
31 |     :param savepath: path to save files
32 |     :return: None
33 |     """
34 | 
35 |     count = 1
36 |     try:
37 |         urllib.request.urlretrieve(url, filename, callback)
38 |     except socket.timeout:
39 |         while count <= 2:
40 |             try:
41 |                 urllib.request.urlretrieve(url, filename, callback)
42 |                 break
43 |             except socket.timeout:
44 |                 err_info = 'Reloading for %d time' % count if count == 1 else 'Reloading for %d times' % count
45 |                 print(err_info)
46 |                 count += 1
47 |     if count > 2:
48 |         print("downloading picture fialed!")
49 | 
50 |     # try:
51 |     #     urllib.request.urlretrieve(url, filename, callback)
52 |     # # except urllib.ContentTooShortError:
53 |     # #     print('Network conditions is not good.Reloading.')
54 |     # #     download(url, filename, callback, header)
55 |     # except Exception as e:
56 |     #     print(e)
57 |     #     print('Network conditions is not good.\nReloading.....')
58 |         # download(url, filename, callback)
59 | 
60 | 
61 | def download2(url, filename, callback):
62 |     try:
63 |         res = requests.get(url, timeout=10)
64 |         with open(filename, 'ab') as file:  # 保存到本地的文件名
65 |             file.write(res.content)
66 |             file.flush()
67 |     except socket.timeout:
68 |         print('timeouut')
69 | 
70 | 
71 | # 下载进度
72 | def callback(num, consumed_bytes, total_bytes):
73 |     """
74 |     显示下载文件的进度
75 |     :param @num:目前为此传递的数据块数量
76 |     :param @consumed_bytes:每个数据块的大小，单位是byte,字节
77 |     :param @total_bytes:远程文件的大小
78 |     :return: None
79 |     """
80 |     # if a3:
81 |     rate = int(100 * (float(num * consumed_bytes) / float(total_bytes)))
82 |     print('\r{0}% '.format(rate), end='')
83 |     sys.stdout.flush()
84 | 
85 | 
86 | def cost(func):
87 |     @wraps(func)
88 |     def wrapper(*args, **kwargs):
89 |         start = time.time()
90 |         res = func(*args, **kwargs)
91 |         end = time.time()
92 |         print('花费', start - end, 's')
93 |         return res
94 | 
95 |     return wrapper
96 | 


--------------------------------------------------------------------------------
/yingjieshneg.com/README.md:
--------------------------------------------------------------------------------
 1 | ﻿爬取应届生求职网 招聘网站 链接、职位、公司信息等数据
 2 | 
 3 | 网址: http://www.yingjiesheng.com/
 4 | 
 5 | 博客地址：https://blog.csdn.net/weixin_43746433
 6 | 
 7 | 微信：why19970628
 8 | 
 9 | 欢迎与我交流
10 | 


--------------------------------------------------------------------------------
/yingjieshneg.com/yingjieshneg.py:
--------------------------------------------------------------------------------
  1 | # coding = utf-8
  2 | import requests, pymysql, json
  3 | import time
  4 | from fake_useragent import UserAgent
  5 | import time, request
  6 | from multiprocessing import Pool
  7 | import re
  8 | import pandas as pd
  9 | from datetime import datetime, timedelta
 10 | from lxml import etree
 11 | 
 12 | date = (datetime.now() - timedelta(days=0)).strftime('%Y-%m-%d')
 13 | 
 14 | # date = time.strftime('%Y-%m-%d', time.localtime(time.time()))
 15 | ua = UserAgent()
 16 | 
 17 | 
 18 | def get_data(url):
 19 |     print(url)
 20 | 
 21 |     res = requests.get(url, headers={"User-Agent": ua.chrome})
 22 |     res.encoding = 'gb2312'
 23 |     company_pattern = re.compile(
 24 |         'class="jobli".*?<a href="(.*?)" target="_blank"(.*?)</a>.*?<td width="100">.*?span(.*?)</span>', re.S)
 25 |     items = re.findall(company_pattern, res.text)
 26 |     content = []
 27 |     for item in items:
 28 |         dataset = {}
 29 |         if len(item[2]) > 1 and str(item[2][1:].strip()) == str(date):
 30 |             time_ = item[2][1:]
 31 |         else:
 32 |             continue
 33 | 
 34 |         if len(item[0]) > 1:
 35 |             if "http" not in item[0]:
 36 |                 company_url = "http://www.yingjiesheng.com" + item[0]
 37 |             else:
 38 |                 company_url = item[0]
 39 |         else:
 40 |             continue
 41 |         dataset['url'] = company_url
 42 |         print(company_url)
 43 | 
 44 |         job_content = requests.get(company_url, headers={"User-Agent": ua.chrome})
 45 |         if job_content.status_code == 200:
 46 |             job_content.encoding = 'gb2312'
 47 |             html = etree.HTML(job_content.text)
 48 |             jobs = html.xpath("//div[@class='info clearfix']/ol/li/u/text()")
 49 |             try:
 50 |                 job = jobs[-1].replace('<em class="icontop">置顶</em>"', '').strip()
 51 |             except:
 52 |                 job = ""
 53 |             des_list = html.xpath("//div[@class='jobIntro']/div//text()")
 54 |             if len(des_list) == 0:
 55 |                 des = ''
 56 |             else:
 57 |                 des = ''
 58 |                 for i in des_list:
 59 |                     i = i.strip().replace("\n", "").replace(" ", "")
 60 |                     des = des + " " + i
 61 |             print(des)
 62 | 
 63 |         else:
 64 |             job = ""
 65 |             des = ""
 66 | 
 67 |         dataset['company'] = item[1][1:].replace('<em class="icontop">置顶</em>','').replace("&nbsp;","").strip() if len(item[1]) > 1 else ''
 68 |         dataset["job"] = job
 69 |         dataset["des"] = des
 70 |         dataset['time'] = (datetime.now() - timedelta(days=0)).strftime('%d/%m/%Y')
 71 |         print("*" * 10)
 72 |         print(dataset)
 73 |         content.append(dataset)
 74 |     time.sleep(5)
 75 |     # print(content)
 76 |     return content
 77 | 
 78 | 
 79 | def write_to_file(content):
 80 |     df = pd.DataFrame(content)
 81 |     time.time()
 82 |     df.to_csv(f'{date}_company.csv', index=False, mode='a+', header=False)
 83 | 
 84 | 
 85 | def run(page):
 86 |     url = f"http://www.yingjiesheng.com/commend-fulltime-{page}.html"
 87 |     data = get_data(url=url)
 88 |     write_to_file(data)
 89 | 
 90 | 
 91 | def run2(page):
 92 |     url = f"http://www.yingjiesheng.com/commend-parttime-{page}.html"
 93 |     data = get_data(url=url)
 94 |     write_to_file(data)
 95 | 
 96 | 
 97 | def quchong():
 98 |     data = pd.read_csv(f'{date}_company.csv')
 99 |     data.columns = ["目标网页", "公司信息", "招聘岗位", "职位描述", "发布日期"]
100 |     a = data.drop_duplicates(subset=['目标网页'], keep='first')
101 |     a.to_csv(f'{date}_company.csv', index=False)
102 | 
103 | 
104 | if __name__ == '__main__':
105 |     start = time.time()
106 |     pool = Pool()
107 |     pool.map(run, [i for i in range(1, 8)])
108 |     pool.map(run2, [i for i in range(1, 8)])
109 |     quchong()
110 | 
111 |     print('花费时间:', time.time() - start)
112 | 


--------------------------------------------------------------------------------
/yixuela.com/README.md:
--------------------------------------------------------------------------------
 1 | 下载易学啦 各个版本（人教版、鲁人版、苏教版、沪教版、北师大版等）、年级（小、初、高）、文章中 所有图片信息, 需手动指定下载图片的本地存放目录
 2 | 
 3 | 网址: https://www.yixuela.com/
 4 | 
 5 | 数据：平均每个版本大约1G数据量, 一共6G的数据, 保存方式为图片
 6 | 
 7 | 微信：why19970628
 8 | 
 9 | 欢迎与我交流
10 | 


--------------------------------------------------------------------------------
/yixuela.com/poetry.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf8 -*-
 2 | import sys
 3 | import os
 4 | import requests
 5 | from lxml import etree
 6 | from fake_useragent import UserAgent
 7 | from urllib.parse import quote, urlencode
 8 | import urllib
 9 | import time
10 | import string
11 | version_links = ['hjb', 'ljb', 'bsd', 'rjb'] # 'sjb',
12 | admin = 'https://www.yixuela.com/'
13 | subject = 'yuwen/'
14 | 
15 | ua = UserAgent()
16 | 
17 | 
18 | def get_url_link():
19 |     for version in version_links:
20 |         # 版本
21 |         version_subject_url = admin + 'books/' + version + '/' + subject
22 |         # 年级
23 |         # for i in range(1,13):
24 |         #     grade = f'g{i}/'
25 |         # print(version_subject_url)
26 |         yield version_subject_url, version
27 | 
28 | 
29 | def crwal_artile_content(artitle_content_url, article_folder):
30 |     """
31 |     文章详情页爬取图片
32 |     """
33 |     response = requests.get(artitle_content_url, headers={
34 |                             "User_Agent": ua.chrome})
35 |     # print(response.text)
36 |     tree = etree.HTML(response.text)
37 |     image_name = tree.xpath(
38 |         '/html/body/section/div[3]/div[1]/div[2]/img/@src')
39 |     for index, name in enumerate(image_name):
40 |         name = name.split('/')[-1]
41 |         image_save_path = os.path.join(article_folder, name)
42 |         ori_url = image_name[index]
43 |         url = quote(ori_url, safe='/:?=')
44 |         urllib.request.urlretrieve(url, image_save_path)
45 |         print(f'{image_save_path} 爬取成功！')
46 | 
47 | 
48 | def crwal_artile(content_link, result_folder):
49 |     response = requests.get(content_link, headers={"User_Agent": ua.chrome})
50 |     tree = etree.HTML(response.text)
51 |     article_name = tree.xpath(
52 |         '//div[@class="right-menu bg-white mt-10"]/nav/ul/li/a/text()')
53 |     article_link = tree.xpath(
54 |         '//div[@class="right-menu bg-white mt-10"]/nav/ul/li/a/@href')
55 |     for index, name in enumerate(article_name):
56 |         article_folder = os.path.join(result_folder, name)
57 |         os.makedirs(article_folder, exist_ok=True)
58 |         artitle_content_url = admin + article_link[index]
59 |         try:
60 |             crwal_artile_content(artitle_content_url, article_folder)
61 |         except Exception as e:
62 |             print(e)
63 |         time.sleep(1)
64 | 
65 | 
66 | def run(url, result_path, version):
67 |     response = requests.get(url, headers={"User_Agent": ua.chrome})
68 |     tree = etree.HTML(response.text)
69 |     title = tree.xpath('//div[@class="list-warp"]/div//a/text()')
70 |     title_link = tree.xpath('//div[@class="list-warp"]/div//a/@href')
71 |     title_ = [i for i in title if len(i.replace(" ", '')) > 2]
72 | 
73 |     for index, title1 in enumerate(title_):
74 |         content_link = admin + title_link[index * 2]
75 |         result_folder = os.path.join(result_path, f'{version}/{title1}')
76 |         os.makedirs(result_folder, exist_ok=True)
77 |         try:
78 |             crwal_artile(content_link, result_folder)
79 |             time.sleep(2)
80 |         except Exception as e:
81 |             print(e)
82 | 
83 |     # if len(title_) != len(title_link):
84 |     #     raise Exception('title length error')
85 | 
86 | 
87 | def process(result_path):
88 |     for url, version in get_url_link():
89 |         print(url)
90 |         run(url, result_path, version)
91 |         time.sleep(5)
92 | 
93 | 
94 | if __name__ == '__main__':
95 |     result_path = sys.argv[1]
96 |     process(result_path)
97 | 


--------------------------------------------------------------------------------
/微博热搜/人物.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/why19970628/Python_Crawler/23ba5cf5ad12d5d0f9f3d2376c0c0ea32fc3d2de/微博热搜/人物.xlsx


--------------------------------------------------------------------------------
/微博热搜/名词.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/why19970628/Python_Crawler/23ba5cf5ad12d5d0f9f3d2376c0c0ea32fc3d2de/微博热搜/名词.xlsx


--------------------------------------------------------------------------------
/微博热搜/婚恋.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/why19970628/Python_Crawler/23ba5cf5ad12d5d0f9f3d2376c0c0ea32fc3d2de/微博热搜/婚恋.xlsx


--------------------------------------------------------------------------------
/爬取中彩网彩票/3D.xls:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/why19970628/Python_Crawler/23ba5cf5ad12d5d0f9f3d2376c0c0ea32fc3d2de/爬取中彩网彩票/3D.xls


--------------------------------------------------------------------------------
/爬取中彩网彩票/test_CaiPiao.py:
--------------------------------------------------------------------------------
 1 | #!python3
 2 | import random
 3 | from urllib.request import ProxyHandler, build_opener
 4 | from bs4 import BeautifulSoup
 5 | import xlwt
 6 | def get_ip():
 7 |     fr=open('D:\软件（学习）\Python\PyCharm\kaoshi\ip.txt','r')
 8 |     ips=fr.readlines()
 9 |     new=[]
10 |     for line in ips:
11 |         temp=line.strip()
12 |         new.append(temp)
13 |     ip=random.choice(new)
14 |     return ip
15 |     print(ip)
16 | proxy =get_ip()
17 | proxy_handler = ProxyHandler({
18 | 'http': 'http://' + proxy,
19 | 'https': 'https://' + proxy
20 | })
21 | fo=open('2.html','r',encoding='utf-8')
22 | html=fo.read()
23 | soup=BeautifulSoup(html,'lxml')
24 | fo.close()
25 | #opener = build_opener(proxy_handler)
26 | #for i in range(1):
27 | #    url='http://kaijiang.zhcw.com/zhcw/html/3d/list_'+str(i)+'.html'
28 | #    res=request.Request(url)
29 | #    response=opener.open(res).read().decode('utf-8')
30 | #    soup=BeautifulSoup(response,'lxml')
31 |     #print(soup.select('tr'))
32 | #print(soup.select('tr')[2:-1])
33 | pat=''
34 | a=soup.find_all('td',{'style':'padding-left:20px;'})
35 | #print(a)
36 | #for i in a:
37 | #    print(i.text)
38 | 
39 | def parse_one_page():
40 |     for item in soup.select('tr')[2:-1]:
41 |         i = 0
42 |         yield {
43 | 
44 |             'time': item.select('td')[i].text,
45 |             'issue': item.select('td')[i + 1].text,
46 |             'digits': item.select('td em')[0].text,
47 |             'ten_digits': item.select('td em')[1].text,
48 |             'hundred_digits': item.select('td em')[2].text,
49 |             'single_selection': item.select('td')[i + 3].text,
50 |             'group_selection_3': item.select('td')[i + 4].text,
51 |             'group_selection_6': item.select('td')[i + 5].text,
52 |             'sales': item.select('td')[i + 6].text,
53 |             'return_rates': item.select('td')[i + 7].text
54 |         }
55 | parse_one_page()
56 | def write_to_excel():
57 | 
58 | 
59 |     f = xlwt.Workbook()
60 | 
61 |     sheet1 = f.add_sheet('3D',cell_overwrite_ok=True)
62 | 
63 |     row0 = ["开奖日期","期号","个位数","十位数","百位数","单数","组选3","组选6","销售额","返奖比例"]  #写入第一行
64 | 
65 |     for j in range(0,len(row0)):
66 | 
67 |         sheet1.write(0,j,row0[j])
68 |     #依次爬取每一页内容的每一期信息，并将其依次写入Excel
69 | 
70 |         #写入每一期的信息
71 |         i = 0
72 | 
73 | 
74 |     for item in parse_one_page():
75 | 
76 |         sheet1.write(i+1,0,item['time'])
77 | 
78 |         sheet1.write(i+1,1,item['issue'])
79 | 
80 |         sheet1.write(i+1,2,item['digits'])
81 | 
82 |         sheet1.write(i+1,3,item['ten_digits'])
83 | 
84 |         sheet1.write(i+1,4,item['hundred_digits'])
85 | 
86 |         sheet1.write(i+1,5,item['single_selection'])
87 | 
88 |         sheet1.write(i+1,6,item['group_selection_3'])
89 | 
90 |         sheet1.write(i+1,7,item['group_selection_6'])
91 | 
92 |         sheet1.write(i+1,8,item['sales'])
93 | 
94 |         sheet1.write(i+1,9,item['return_rates'])
95 |         i+=1
96 | 
97 |     f.save('3D.xls')
98 | #write_to_excel()
99 | 


--------------------------------------------------------------------------------
/高考志愿网/README.md:
--------------------------------------------------------------------------------
1 | ﻿高考志愿填报网
2 | 
3 | 网址 https://gkcx.eol.cn/school/search
4 | 
5 | gkzy.py，通过api接口，抓取该网站所有学校的信息，如学校类型，位置，历年分数线，排名等等
6 | 
7 | gkzy2.py 抓取各个院校的招生人数、招生计划、开放专业名称与介绍，五年的文理科的分数线、五年的历年学校批次等维度
8 | 
9 | 数据量约为2500个


--------------------------------------------------------------------------------
/高考志愿网/gkzy.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/why19970628/Python_Crawler/23ba5cf5ad12d5d0f9f3d2376c0c0ea32fc3d2de/高考志愿网/gkzy.py


--------------------------------------------------------------------------------
/高考志愿网/gkzy2.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/why19970628/Python_Crawler/23ba5cf5ad12d5d0f9f3d2376c0c0ea32fc3d2de/高考志愿网/gkzy2.py


--------------------------------------------------------------------------------
/高考网/main.py:
--------------------------------------------------------------------------------
 1 | 1# coding = utf-8
 2 | 
 3 | import requests
 4 | from bs4 import BeautifulSoup
 5 | import os
 6 | import time
 7 | 
 8 | 
 9 | def get_data():
10 |     for i in range(1, 108):
11 |         print("正在下载第%s页数据" % i)
12 |         url = 'http://college.gaokao.com/schlist/p%s' % i
13 |         res = requests.get(url).text
14 |         content = BeautifulSoup(res, "html.parser")
15 |         college_list = content.find('div', attrs={'class': 'scores_List'}).find_all('dl')
16 |         items = map(parse_item, college_list)
17 |         save_to_csv(items)
18 |         time.sleep(1)
19 | 
20 | 
21 | def parse_item(item):
22 |     college_name = item.find('strong')['title']
23 |     college_attr = item.find_all('li')
24 |     college_site = college_attr[0].text[6:]
25 |     college_title = college_attr[1].text[5:]
26 |     college_type = college_attr[2].text[5:]
27 |     college_belong = college_attr[3].text[5:]
28 |     college_nature = college_attr[4].text[5:]
29 |     college_website = college_attr[5].text[5:]
30 |     result = {
31 |         'college_name': college_name,
32 |         'college_site': college_site,
33 |         'college_title': college_title,
34 |         'college_type': college_type,
35 |         'college_belong': college_belong,
36 |         'college_nature': college_nature,
37 |         'college_website': college_website
38 |     }
39 |     print(result)
40 |     return result
41 | 
42 | 
43 | def save_to_csv(data):
44 |     if not os.path.exists(r'college_data.csv'):
45 |         with open('college_data.csv', 'a+', encoding='utf-8') as f:
46 |             f.write('name,site,title,type,belong,nature,website\n')
47 |             for d in data:
48 |                 try:
49 |                     row = '{},{},{},{},{},{},{}'.format(d['college_name'],
50 |                                                         d['college_site'],
51 |                                                         d['college_title'],
52 |                                                         d['college_type'],
53 |                                                         d['college_belong'],
54 |                                                         d['college_nature'],
55 |                                                         d['college_website'])
56 |                     f.write(row)
57 |                     f.write('\n')
58 |                 except:
59 |                     continue
60 |     else:
61 |         with open('college_data.csv', 'a+', encoding='utf-8') as f:
62 |             for d in data:
63 |                 try:
64 |                     row = '{},{},{},{},{},{},{}'.format(d['college_name'],
65 |                                                         d['college_site'],
66 |                                                         d['college_title'],
67 |                                                         d['college_type'],
68 |                                                         d['college_belong'],
69 |                                                         d['college_nature'],
70 |                                                         d['college_website'])
71 |                     f.write(row)
72 |                     f.write('\n')
73 |                 except:
74 |                     continue
75 | 
76 | 
77 | if __name__ == '__main__':
78 |     get_data()
79 | 


--------------------------------------------------------------------------------
/高考网/readme.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/why19970628/Python_Crawler/23ba5cf5ad12d5d0f9f3d2376c0c0ea32fc3d2de/高考网/readme.md


--------------------------------------------------------------------------------
/高考网/北京上海江苏高质量高校占比.html:
--------------------------------------------------------------------------------
  1 | <!DOCTYPE html>
  2 | <html>
  3 | <head>
  4 |     <meta charset="UTF-8">
  5 |     <title>Awesome-pyecharts</title>
  6 |             <script type="text/javascript" src="https://assets.pyecharts.org/assets/echarts.min.js"></script>
  7 |         <script type="text/javascript" src="https://assets.pyecharts.org/assets/echarts-liquidfill.min.js"></script>
  8 | 
  9 | </head>
 10 | <body>
 11 |     <div id="60c6281eea2e4b788f8e904e47c4807c" class="chart-container" style="width:900px; height:500px;"></div>
 12 |     <script>
 13 |         var chart_60c6281eea2e4b788f8e904e47c4807c = echarts.init(
 14 |             document.getElementById('60c6281eea2e4b788f8e904e47c4807c'), 'white', {renderer: 'canvas'});
 15 |         var option_60c6281eea2e4b788f8e904e47c4807c = {
 16 |     "animation": true,
 17 |     "animationThreshold": 2000,
 18 |     "animationDuration": 1000,
 19 |     "animationEasing": "cubicOut",
 20 |     "animationDelay": 0,
 21 |     "animationDurationUpdate": 300,
 22 |     "animationEasingUpdate": "cubicOut",
 23 |     "animationDelayUpdate": 0,
 24 |     "color": [
 25 |         "#c23531",
 26 |         "#2f4554",
 27 |         "#61a0a8",
 28 |         "#d48265",
 29 |         "#749f83",
 30 |         "#ca8622",
 31 |         "#bda29a",
 32 |         "#6e7074",
 33 |         "#546570",
 34 |         "#c4ccd3",
 35 |         "#f05b72",
 36 |         "#ef5b9c",
 37 |         "#f47920",
 38 |         "#905a3d",
 39 |         "#fab27b",
 40 |         "#2a5caa",
 41 |         "#444693",
 42 |         "#726930",
 43 |         "#b2d235",
 44 |         "#6d8346",
 45 |         "#ac6767",
 46 |         "#1d953f",
 47 |         "#6950a1",
 48 |         "#918597"
 49 |     ],
 50 |     "series": [
 51 |         {
 52 |             "type": "liquidFill",
 53 |             "data": [
 54 |                 0.39849624060150374
 55 |             ],
 56 |             "waveAnimation": true,
 57 |             "animationDuration": 2000,
 58 |             "animationDurationUpdate": 1000,
 59 |             "color": [
 60 |                 "#294D99",
 61 |                 "#156ACF",
 62 |                 "#1598ED",
 63 |                 "#45BDFF"
 64 |             ],
 65 |             "shape": "circle",
 66 |             "outline": {
 67 |                 "show": true
 68 |             },
 69 |             "label": {
 70 |                 "show": true,
 71 |                 "position": "inside",
 72 |                 "margin": 8,
 73 |                 "fontSize": 50
 74 |             }
 75 |         }
 76 |     ],
 77 |     "legend": [
 78 |         {
 79 |             "data": [],
 80 |             "selected": {},
 81 |             "show": true
 82 |         }
 83 |     ],
 84 |     "tooltip": {
 85 |         "show": true,
 86 |         "trigger": "item",
 87 |         "triggerOn": "mousemove|click",
 88 |         "axisPointer": {
 89 |             "type": "line"
 90 |         },
 91 |         "textStyle": {
 92 |             "fontSize": 14
 93 |         },
 94 |         "borderWidth": 0
 95 |     },
 96 |     "title": [
 97 |         {
 98 |             "text": "\u5317\u4eac\u4e0a\u6d77\u6c5f\u82cf\u9ad8\u8d28\u91cf\u9ad8\u6821\u5360\u6bd4"
 99 |         }
100 |     ]
101 | };
102 |         chart_60c6281eea2e4b788f8e904e47c4807c.setOption(option_60c6281eea2e4b788f8e904e47c4807c);
103 |     </script>
104 | </body>
105 | </html>
106 | 


--------------------------------------------------------------------------------
/高考网/北京高质量高校占比.html:
--------------------------------------------------------------------------------
  1 | <!DOCTYPE html>
  2 | <html>
  3 | <head>
  4 |     <meta charset="UTF-8">
  5 |     <title>Awesome-pyecharts</title>
  6 |             <script type="text/javascript" src="https://assets.pyecharts.org/assets/echarts.min.js"></script>
  7 |         <script type="text/javascript" src="https://assets.pyecharts.org/assets/echarts-liquidfill.min.js"></script>
  8 | 
  9 | </head>
 10 | <body>
 11 |     <div id="6cce30b69bea4c08b60418a6acffddf9" class="chart-container" style="width:900px; height:500px;"></div>
 12 |     <script>
 13 |         var chart_6cce30b69bea4c08b60418a6acffddf9 = echarts.init(
 14 |             document.getElementById('6cce30b69bea4c08b60418a6acffddf9'), 'white', {renderer: 'canvas'});
 15 |         var option_6cce30b69bea4c08b60418a6acffddf9 = {
 16 |     "animation": true,
 17 |     "animationThreshold": 2000,
 18 |     "animationDuration": 1000,
 19 |     "animationEasing": "cubicOut",
 20 |     "animationDelay": 0,
 21 |     "animationDurationUpdate": 300,
 22 |     "animationEasingUpdate": "cubicOut",
 23 |     "animationDelayUpdate": 0,
 24 |     "color": [
 25 |         "#c23531",
 26 |         "#2f4554",
 27 |         "#61a0a8",
 28 |         "#d48265",
 29 |         "#749f83",
 30 |         "#ca8622",
 31 |         "#bda29a",
 32 |         "#6e7074",
 33 |         "#546570",
 34 |         "#c4ccd3",
 35 |         "#f05b72",
 36 |         "#ef5b9c",
 37 |         "#f47920",
 38 |         "#905a3d",
 39 |         "#fab27b",
 40 |         "#2a5caa",
 41 |         "#444693",
 42 |         "#726930",
 43 |         "#b2d235",
 44 |         "#6d8346",
 45 |         "#ac6767",
 46 |         "#1d953f",
 47 |         "#6950a1",
 48 |         "#918597"
 49 |     ],
 50 |     "series": [
 51 |         {
 52 |             "type": "liquidFill",
 53 |             "data": [
 54 |                 0.22556390977443608
 55 |             ],
 56 |             "waveAnimation": true,
 57 |             "animationDuration": 2000,
 58 |             "animationDurationUpdate": 1000,
 59 |             "color": [
 60 |                 "#294D99",
 61 |                 "#156ACF",
 62 |                 "#1598ED",
 63 |                 "#45BDFF"
 64 |             ],
 65 |             "shape": "circle",
 66 |             "outline": {
 67 |                 "show": true
 68 |             },
 69 |             "label": {
 70 |                 "show": true,
 71 |                 "position": "inside",
 72 |                 "margin": 8,
 73 |                 "fontSize": 50
 74 |             }
 75 |         }
 76 |     ],
 77 |     "legend": [
 78 |         {
 79 |             "data": [],
 80 |             "selected": {},
 81 |             "show": true
 82 |         }
 83 |     ],
 84 |     "tooltip": {
 85 |         "show": true,
 86 |         "trigger": "item",
 87 |         "triggerOn": "mousemove|click",
 88 |         "axisPointer": {
 89 |             "type": "line"
 90 |         },
 91 |         "textStyle": {
 92 |             "fontSize": 14
 93 |         },
 94 |         "borderWidth": 0
 95 |     },
 96 |     "title": [
 97 |         {
 98 |             "text": "\u5317\u4eac\u9ad8\u8d28\u91cf\u9ad8\u6821\u5360\u6bd4"
 99 |         }
100 |     ]
101 | };
102 |         chart_6cce30b69bea4c08b60418a6acffddf9.setOption(option_6cce30b69bea4c08b60418a6acffddf9);
103 |     </script>
104 | </body>
105 | </html>
106 | 


--------------------------------------------------------------------------------
/高考网/占比前十城市高质量高校占比.html:
--------------------------------------------------------------------------------
  1 | <!DOCTYPE html>
  2 | <html>
  3 | <head>
  4 |     <meta charset="UTF-8">
  5 |     <title>Awesome-pyecharts</title>
  6 |             <script type="text/javascript" src="https://assets.pyecharts.org/assets/echarts.min.js"></script>
  7 |         <script type="text/javascript" src="https://assets.pyecharts.org/assets/echarts-liquidfill.min.js"></script>
  8 | 
  9 | </head>
 10 | <body>
 11 |     <div id="82647ac178e643ed9540bdc898967748" class="chart-container" style="width:900px; height:500px;"></div>
 12 |     <script>
 13 |         var chart_82647ac178e643ed9540bdc898967748 = echarts.init(
 14 |             document.getElementById('82647ac178e643ed9540bdc898967748'), 'white', {renderer: 'canvas'});
 15 |         var option_82647ac178e643ed9540bdc898967748 = {
 16 |     "animation": true,
 17 |     "animationThreshold": 2000,
 18 |     "animationDuration": 1000,
 19 |     "animationEasing": "cubicOut",
 20 |     "animationDelay": 0,
 21 |     "animationDurationUpdate": 300,
 22 |     "animationEasingUpdate": "cubicOut",
 23 |     "animationDelayUpdate": 0,
 24 |     "color": [
 25 |         "#c23531",
 26 |         "#2f4554",
 27 |         "#61a0a8",
 28 |         "#d48265",
 29 |         "#749f83",
 30 |         "#ca8622",
 31 |         "#bda29a",
 32 |         "#6e7074",
 33 |         "#546570",
 34 |         "#c4ccd3",
 35 |         "#f05b72",
 36 |         "#ef5b9c",
 37 |         "#f47920",
 38 |         "#905a3d",
 39 |         "#fab27b",
 40 |         "#2a5caa",
 41 |         "#444693",
 42 |         "#726930",
 43 |         "#b2d235",
 44 |         "#6d8346",
 45 |         "#ac6767",
 46 |         "#1d953f",
 47 |         "#6950a1",
 48 |         "#918597"
 49 |     ],
 50 |     "series": [
 51 |         {
 52 |             "type": "liquidFill",
 53 |             "data": [
 54 |                 0.7293233082706769
 55 |             ],
 56 |             "waveAnimation": true,
 57 |             "animationDuration": 2000,
 58 |             "animationDurationUpdate": 1000,
 59 |             "color": [
 60 |                 "#294D99",
 61 |                 "#156ACF",
 62 |                 "#1598ED",
 63 |                 "#45BDFF"
 64 |             ],
 65 |             "shape": "circle",
 66 |             "outline": {
 67 |                 "show": true
 68 |             },
 69 |             "label": {
 70 |                 "show": true,
 71 |                 "position": "inside",
 72 |                 "margin": 8,
 73 |                 "fontSize": 50
 74 |             }
 75 |         }
 76 |     ],
 77 |     "legend": [
 78 |         {
 79 |             "data": [],
 80 |             "selected": {},
 81 |             "show": true
 82 |         }
 83 |     ],
 84 |     "tooltip": {
 85 |         "show": true,
 86 |         "trigger": "item",
 87 |         "triggerOn": "mousemove|click",
 88 |         "axisPointer": {
 89 |             "type": "line"
 90 |         },
 91 |         "textStyle": {
 92 |             "fontSize": 14
 93 |         },
 94 |         "borderWidth": 0
 95 |     },
 96 |     "title": [
 97 |         {
 98 |             "text": "\u5360\u6bd4\u524d\u5341\u57ce\u5e02\u9ad8\u8d28\u91cf\u9ad8\u6821\u5360\u6bd4"
 99 |         }
100 |     ]
101 | };
102 |         chart_82647ac178e643ed9540bdc898967748.setOption(option_82647ac178e643ed9540bdc898967748);
103 |     </script>
104 | </body>
105 | </html>
106 | 


--------------------------------------------------------------------------------
/高考网/高校属性分析pie.html:
--------------------------------------------------------------------------------
  1 | <!DOCTYPE html>
  2 | <html>
  3 | <head>
  4 |     <meta charset="UTF-8">
  5 |     <title>Awesome-pyecharts</title>
  6 |             <script type="text/javascript" src="https://assets.pyecharts.org/assets/echarts.min.js"></script>
  7 | 
  8 | </head>
  9 | <body>
 10 |     <div id="492994741d8d4a4fb12e275e87a729cc" class="chart-container" style="width:900px; height:500px;"></div>
 11 |     <script>
 12 |         var chart_492994741d8d4a4fb12e275e87a729cc = echarts.init(
 13 |             document.getElementById('492994741d8d4a4fb12e275e87a729cc'), 'white', {renderer: 'canvas'});
 14 |         var option_492994741d8d4a4fb12e275e87a729cc = {
 15 |     "animation": true,
 16 |     "animationThreshold": 2000,
 17 |     "animationDuration": 1000,
 18 |     "animationEasing": "cubicOut",
 19 |     "animationDelay": 0,
 20 |     "animationDurationUpdate": 300,
 21 |     "animationEasingUpdate": "cubicOut",
 22 |     "animationDelayUpdate": 0,
 23 |     "color": [
 24 |         "#c23531",
 25 |         "#2f4554",
 26 |         "#61a0a8",
 27 |         "#d48265",
 28 |         "#749f83",
 29 |         "#ca8622",
 30 |         "#bda29a",
 31 |         "#6e7074",
 32 |         "#546570",
 33 |         "#c4ccd3",
 34 |         "#f05b72",
 35 |         "#ef5b9c",
 36 |         "#f47920",
 37 |         "#905a3d",
 38 |         "#fab27b",
 39 |         "#2a5caa",
 40 |         "#444693",
 41 |         "#726930",
 42 |         "#b2d235",
 43 |         "#6d8346",
 44 |         "#ac6767",
 45 |         "#1d953f",
 46 |         "#6950a1",
 47 |         "#918597"
 48 |     ],
 49 |     "series": [
 50 |         {
 51 |             "type": "pie",
 52 |             "clockwise": true,
 53 |             "data": [
 54 |                 {
 55 |                     "name": "\u9ad8\u804c\u4e13\u79d1",
 56 |                     "value": 1184
 57 |                 },
 58 |                 {
 59 |                     "name": "\u672c\u79d1",
 60 |                     "value": 776
 61 |                 },
 62 |                 {
 63 |                     "name": "\u72ec\u7acb\u5b66\u9662",
 64 |                     "value": 313
 65 |                 },
 66 |                 {
 67 |                     "name": "\u5176\u5b83",
 68 |                     "value": 135
 69 |                 }
 70 |             ],
 71 |             "radius": [
 72 |                 "0%",
 73 |                 "75%"
 74 |             ],
 75 |             "center": [
 76 |                 "50%",
 77 |                 "50%"
 78 |             ],
 79 |             "label": {
 80 |                 "show": true,
 81 |                 "position": "top",
 82 |                 "margin": 8,
 83 |                 "formatter": "{b}: {c}: {d}%"
 84 |             },
 85 |             "rippleEffect": {
 86 |                 "show": true,
 87 |                 "brushType": "stroke",
 88 |                 "scale": 2.5,
 89 |                 "period": 4
 90 |             }
 91 |         }
 92 |     ],
 93 |     "legend": [
 94 |         {
 95 |             "data": [
 96 |                 "\u9ad8\u804c\u4e13\u79d1",
 97 |                 "\u672c\u79d1",
 98 |                 "\u72ec\u7acb\u5b66\u9662",
 99 |                 "\u5176\u5b83"
100 |             ],
101 |             "selected": {},
102 |             "type": "scroll",
103 |             "show": true,
104 |             "left": "80%",
105 |             "orient": "vertical"
106 |         }
107 |     ],
108 |     "tooltip": {
109 |         "show": true,
110 |         "trigger": "item",
111 |         "triggerOn": "mousemove|click",
112 |         "axisPointer": {
113 |             "type": "line"
114 |         },
115 |         "textStyle": {
116 |             "fontSize": 14
117 |         },
118 |         "borderWidth": 0
119 |     },
120 |     "title": [
121 |         {
122 |             "text": "\u9ad8\u6821\u5c5e\u6027\u6392\u884c"
123 |         }
124 |     ]
125 | };
126 |         chart_492994741d8d4a4fb12e275e87a729cc.setOption(option_492994741d8d4a4fb12e275e87a729cc);
127 |     </script>
128 | </body>
129 | </html>
130 | 


--------------------------------------------------------------------------------