├── .github
├── FUNDING.yml
└── ISSUE_TEMPLATE
│ ├── bug_report.md
│ └── feature_request.md
├── .gitignore
├── LICENSE
├── README.md
├── README_EN.md
├── Spiders
├── A12306
│ └── main12306.py
├── JdSpider
│ └── jd_more_info.py
├── __init__.py
├── alipay
│ └── main.py
├── bilibili
│ └── main.py
├── browser
│ └── main.py
├── chsi
│ └── main.py
├── cloudmusic
│ └── main.py
├── cnblog
│ └── main.py
├── csdn
│ └── main.py
├── ctrip
│ └── main.py
├── github
│ └── main.py
├── jianshu
│ └── main.py
├── mail
│ └── main.py
├── moments_album
│ └── main.py
├── oschina
│ └── main.py
├── qqfriend
│ └── main.py
├── qqqun
│ └── main.py
├── shgjj
│ └── main.py
├── taobao
│ ├── spider.py
│ └── taobao_cookies.json
├── telephone
│ └── main.py
├── yidong
│ └── main.py
└── zhihu
│ └── main.py
├── docs
├── .nojekyll
├── QuickStart.md
├── README.md
├── _coverpage.md
├── _media
│ ├── JetBrains.png
│ ├── acnblog1.png
│ ├── acnblog2.png
│ ├── alimail1.png
│ ├── alimail2.png
│ ├── alimail3.png
│ ├── alimail4.png
│ ├── alipay1.png
│ ├── alipay2.png
│ ├── alipay3.png
│ ├── alipay4.png
│ ├── bilibili1.png
│ ├── bilibili2.png
│ ├── bilibili3.png
│ ├── bilibili4.png
│ ├── chrome1.png
│ ├── chrome2.png
│ ├── chrome3.png
│ ├── cloudmusic1.png
│ ├── cloudmusic2.png
│ ├── cloudmusic3.png
│ ├── cloudmusic4.png
│ ├── cnblog1.png
│ ├── cnblog2.png
│ ├── cnblog3.png
│ ├── cnblog4.png
│ ├── csdn1.png
│ ├── csdn2.png
│ ├── csdn3.png
│ ├── csdn4.png
│ ├── favicon.ico
│ ├── favicon.png
│ ├── github1.png
│ ├── github2.png
│ ├── github3.png
│ ├── github4.png
│ ├── hotmail1.png
│ ├── hotmail2.png
│ ├── hotmail3.png
│ ├── hotmail4.png
│ ├── infospider-16x16-icon.png
│ ├── infospider-logo-mini.png
│ ├── infospider-logo.png
│ ├── infospider.png
│ ├── infospider.svg
│ ├── infospider2.png
│ ├── jd1.png
│ ├── jd2.png
│ ├── jd3.png
│ ├── jd4.png
│ ├── jianshu1.png
│ ├── jianshu2.png
│ ├── jianshu3.png
│ ├── jianshu4.png
│ ├── liantong1.png
│ ├── liantong2.png
│ ├── liantong3.png
│ ├── liantong4.png
│ ├── logo-100px.png
│ ├── logo-50px.png
│ ├── logo-transparent-100px.png
│ ├── logo-transparent-50px.png
│ ├── logo.png
│ ├── logo_tr.png
│ ├── momentsalbum1.png
│ ├── momentsalbum2.png
│ ├── momentsalbum3.png
│ ├── momentsalbum4.png
│ ├── oschina1.png
│ ├── oschina2.png
│ ├── oschina3.png
│ ├── oschina4.png
│ ├── qqfriend1.png
│ ├── qqfriend2.png
│ ├── qqfriend3.png
│ ├── qqfriend4.png
│ ├── qqfriend5.png
│ ├── qqfriend6.png
│ ├── qqfriend7.png
│ ├── qqmail1.png
│ ├── qqmail2.png
│ ├── qqmail3.png
│ ├── qqmail4.png
│ ├── qqqun1.png
│ ├── qqqun2.png
│ ├── qqqun3.png
│ ├── qqqun4.png
│ ├── qqqun5.png
│ ├── qqqun6.png
│ ├── qqqun7.png
│ ├── screenshot.png
│ ├── sina1.png
│ ├── sina2.png
│ ├── sina3.png
│ ├── sina4.png
│ ├── taobao1.png
│ ├── taobao2.png
│ ├── taobao3.png
│ ├── taobao4.png
│ ├── tielu1.png
│ ├── tielu2.png
│ ├── tielu3.png
│ ├── tielu4.png
│ ├── wangyiemail1.png
│ ├── wangyiemail2.png
│ ├── wangyiemail3.png
│ ├── wangyiemail4.png
│ ├── yidong1.png
│ ├── yidong2.png
│ ├── yidong3.png
│ ├── yidong4.png
│ ├── zhihu1.png
│ ├── zhihu2.png
│ ├── zhihu3.png
│ └── zhihu4.png
├── ads.txt
├── gif
│ ├── 12306.gif
│ ├── alimail.gif
│ ├── alipay.gif
│ ├── bilibili.gif
│ ├── chrome.gif
│ ├── cloudmusic.gif
│ ├── cnblog.gif
│ ├── csdn.gif
│ ├── document.gif
│ ├── github.gif
│ ├── hotmail.gif
│ ├── jd.gif
│ ├── jianshu.gif
│ ├── oschina.gif
│ ├── qqmail.gif
│ ├── qqqun.gif
│ ├── sinamail.gif
│ ├── taobao.gif
│ ├── wangyimail.gif
│ ├── wechatalbum.gif
│ └── zhihu.gif
└── index.html
├── extension
├── img
│ ├── chrome-logo.png
│ ├── cnblog.png
│ ├── github.png
│ ├── jianshu.png
│ ├── logo-50px.png
│ └── oschina.png
├── index.css
├── index.html
├── js
│ ├── FileSaver.js
│ ├── cnblog
│ │ ├── cnblogrun0.js
│ │ ├── cnblogrun1.js
│ │ └── cnblogrun2.js
│ ├── github
│ │ ├── githubrun1.js
│ │ ├── githubrun2.js
│ │ ├── githubrun3.js
│ │ ├── githubrun4.js
│ │ └── githubrun5.js
│ ├── index.js
│ ├── jianshu
│ │ ├── jianshurun1.js
│ │ └── jianshurun2.js
│ ├── jquery.js
│ └── oschina
│ │ └── oschinarun0.js
└── manifest.json
├── install_deps.sh
├── requirements.txt
├── tests
├── DeepAnalysis
│ ├── dataprocess.py
│ ├── model.py
│ └── trainer.py
├── blog_analyse
│ ├── cnblog.ipynb
│ ├── cnblog_article.json
│ ├── postdate_line.html
│ ├── stop_word.txt
│ └── topic_wordcloud.html
└── ctrip
│ ├── blog_analyse
│ └── cnblog_article.json
│ └── main.py
├── tools
├── main.py
├── resource
│ ├── .DS_Store
│ └── icon
│ │ ├── 12306.png
│ │ ├── alimail.png
│ │ ├── alipay-logo.png
│ │ ├── bilibili.png
│ │ ├── chrome-logo.png
│ │ ├── cnblog.png
│ │ ├── csdn.png
│ │ ├── ctrip.png
│ │ ├── dianxin.png
│ │ ├── github.png
│ │ ├── gjj.png
│ │ ├── hotmail.png
│ │ ├── jd.png
│ │ ├── jianshu.png
│ │ ├── liantong.png
│ │ ├── netease_cloudmusic.png
│ │ ├── oschina.png
│ │ ├── qmail.png
│ │ ├── qq.png
│ │ ├── qqqun.png
│ │ ├── sina.png
│ │ ├── taobao.png
│ │ ├── wangyi.png
│ │ ├── wechat-moments-album.png
│ │ ├── wechat-moments.png
│ │ ├── wechat.png
│ │ ├── xiecheng.png
│ │ ├── xuexin.png
│ │ ├── yidong.png
│ │ ├── zhihu-logo.svg
│ │ └── zhihu.png
└── stop_word.txt
└── uitest
└── main.py
/.github/FUNDING.yml:
--------------------------------------------------------------------------------
1 | # These are supported funding model platforms
2 | ko_fi: kangvcar
3 | custom: ['https://afdian.net/a/kangvcar']
4 |
--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/bug_report.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kangvcar/InfoSpider/aa5f730396db33f36cff17148ae9a7a1e25cea77/.github/ISSUE_TEMPLATE/bug_report.md
--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/feature_request.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kangvcar/InfoSpider/aa5f730396db33f36cff17148ae9a7a1e25cea77/.github/ISSUE_TEMPLATE/feature_request.md
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | *.pyc
2 | *.json
3 | *.xlsx
4 | *.swp
5 | data
6 | .idea
7 | *.log
8 | __pycache__
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | ***
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 | 一个神奇的工具箱,拿回你的个人信息。
28 | 👉⚡使用说明 ⚡| 视频演示 | English | 获取最新维护版本 | TG交流群 | 👍免费代理IP获取
29 |
30 | ### 🗣️ TG交流群:[加入群组](https://t.me/+b-Rdy7_9QuwyMGI1)
31 |
32 | ### 开发者回忆录
33 |
34 | 点击展开👉 开发者回忆录
35 |
36 | #### 场景一
37 | 小明一如往常打开 Chrome 浏览器逛着论坛,贴吧,一不小心点开了网页上的广告,跳转到了京东商城,下意识去关闭窗口时发现 (**OS:咦?京东怎么知道我最近心心念念的宝贝呢?刚好我正需要呢!**),既然打开了那就看看商品详情吧 (**OS:哎哟不错哦**),那就下单试试吧!
38 |
39 | #### 场景二
40 | 小白听着网易云音乐的每日推荐歌单无法自拔 (**OS:哇!怎么播放列表里都是我喜欢的音乐风格?网易云音乐太棒了吧!深得我心啊!黑胶会员必须来一个!**),逛着知乎里的“如何优雅的XXX?”,“XXX是怎样一种体验?”,“如何评价XXX?” (**OS:咦?这个问题就是我刚好想问的,原来早已有人提问!什么???还有几千条回答!!进去逛逛看!**)
41 |
42 | #### 场景三
43 | 小达上班时不忘充实自己,逛着各大技术论坛博客园、CSDN、开源中国、简书、掘金等等,发现首页的内容推荐太棒了(**OS:这些技术博文太棒了,不用找就出来了**),再打开自己的博客主页发现不知不觉地自己也坚持写博文也有三年了,自己的技术栈也越来越丰富(**OS:怎么博客后台都不提供一个数据分析系统呢?我想看看我这几年来的发文数量,发文时间,想知道哪些博文比较热门,想看看我在哪些技术上花费的时间更多,想看看我过去的创作高峰期时在晚上呢?还是凌晨?我希望系统能给我更多指引数据让我更好的创作!**)
44 |
45 | 看到以上几个场景你可能会感叹科技在进步,技术在发展,极大地改善了我们的生活方式。
46 |
47 | 但当你深入思考,你浏览的每个网站,注册的每个网站,他们都记录着你的信息你的足迹。
48 |
49 | 细思恐极的背后是自己的个人数据被赤裸裸的暴露在互联网上并且被众多的公司利用用户数据获得巨额利益,如对用户的数据收集分析后进行定制的广告推送,收取高额广告费。但作为数据的生产者却没能分享属于自己的数据收益。
50 |
51 | #### 想法
52 |
53 | 如果有一个这样的工具,它能帮你拿回你的个人信息,它能帮你把分散在各种站点的个人信息聚合起来,它能帮你分析你的个人数据并给你提供建议,它能帮你把个人数据可视化让你更清楚地了解自己。
54 |
55 | > 你是否会需要这样的工具呢? 你是否会喜欢这样的工具呢?
56 |
57 | 基于以上,我着手开发了 **[INFO-SPIDER](https://github.com/kangvcar/InfoSpider)** 👇👇👇
58 |
59 |
60 |
61 | ### What is INFO-SPIDER
62 |
63 | INFO-SPIDER 是一个集众多数据源于一身的爬虫工具箱,旨在安全快捷的帮助用户拿回自己的数据,工具代码开源,流程透明。并提供数据分析功能,基于用户数据生成图表文件,使得用户更直观、深入了解自己的信息。
64 | 目前支持数据源包括GitHub、QQ邮箱、网易邮箱、阿里邮箱、新浪邮箱、Hotmail邮箱、Outlook邮箱、京东、淘宝、支付宝、中国移动、中国联通、中国电信、知乎、哔哩哔哩、网易云音乐、QQ好友、QQ群、生成朋友圈相册、浏览器浏览历史、12306、博客园、CSDN博客、开源中国博客、简书。
65 |
66 | 详细使用说明参照[使用说明文档](https://infospider.vercel.app)或[视频教程](https://www.bilibili.com/video/BV14f4y1R7oF/)
67 |
68 | 你可以在 [](https://gitter.im/Info-Spider/community?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge) 与我们一起交流学习
69 |
70 | ### Features
71 |
72 | - 安全可靠:本项目为开源项目,代码简洁,所有源码可见,本地运行,安全可靠。
73 | - 使用简单:提供 GUI 界面,只需点击所需获取的数据源并根据提示操作即可。
74 | - 结构清晰:本项目的所有数据源相互独立,可移植性高,**所有爬虫脚本在项目的 [Spiders](https://github.com/kangvcar/InfoSpider/tree/master/Spiders) 文件下**。
75 | - 数据源丰富:本项目目前支持多达24+个数据源,持续更新。
76 | - 数据格式统一:爬取的所有数据都将存储为json格式,方便后期数据分析。
77 | - 个人数据丰富:本项目将尽可能多地为你爬取个人数据,后期数据处理可根据需要删减。
78 | - 数据分析:本项目提供个人数据的可视化分析,目前仅部分支持。
79 | - 文档丰富:本项目包含完整全面的[使用说明文档](https://infospider.vercel.app)和[视频教程](https://www.bilibili.com/video/BV14f4y1R7oF/)
80 |
81 | ### Screenshot
82 |
83 | 
84 |
85 | ### QuickStart
86 |
87 | #### 依赖安装
88 |
89 | 1. 安装[python3](https://www.python.org/downloads/)和Chrome浏览器
90 |
91 | 2. 安装与Chrome浏览器相同版本的[驱动](http://chromedriver.storage.googleapis.com/index.html)
92 |
93 | 3. 安装依赖库 `pip install -r requirements.txt`
94 |
95 | > 如果您在这一步操作遇到问题,可以获取[免安装版InfoSpider](https://mbd.pub/o/bread/aZiTlJo=)
96 |
97 | #### 工具运行
98 |
99 | 1. 进入 tools 目录
100 |
101 | 2. 运行 `python3 main.py`
102 |
103 | 3. 在打开的窗口**点击数据源按钮**, 根据提示**选择数据保存路径**
104 |
105 | 4. 弹出的浏览器**输入用户密码**后会自动开始爬取数据, 爬取完成浏览器会自动关闭.
106 |
107 | 5. 在对应的目录下可以**查看下载下来的数据**(xxx.json), **数据分析图表**(xxx.html)
108 |
109 | ### 购买服务
110 |
111 | > ***限量发售中...***,[去看看](https://mbd.pub/o/bread/aZiTlJo=)
112 |
113 | 1. InfoSpider 最新维护版本
114 | 2. 更全面的个人数据分析
115 | 3. 免去安装程序的所有依赖环境,便捷,适合小白
116 | 4. 已打包好的程序,双击即可运行程序
117 | 5. 手把手教你如何打包 InfoSpider
118 | 6. 开发者一对一技术支持
119 | 7. ***购买后即可免费获得即将发布的全新2.0版本***
120 |
121 |
122 |
123 |
124 | 购买链接
125 |
126 |
127 | ### 数据源
128 | - [x] GitHub
129 | - [x] QQ邮箱
130 | - [x] 网易邮箱
131 | - [x] 阿里邮箱
132 | - [x] 新浪邮箱
133 | - [x] Hotmail邮箱
134 | - [x] Outlook邮箱
135 | - [x] 京东
136 | - [x] 淘宝
137 | - [x] 支付宝
138 | - [x] 中国移动
139 | - [x] 中国联通
140 | - [x] 中国电信
141 | - [x] 知乎
142 | - [x] 哔哩哔哩
143 | - [x] 网易云音乐
144 | - [x] QQ好友([cjh0613](https://github.com/cjh0613/python-pub/blob/1c308fe90386f6d6e69e2202bb0c4acd4857576f/%E8%8E%B7%E5%8F%96QQ%E5%A5%BD%E5%8F%8B%E5%88%97%E8%A1%A8.py))
145 | - [x] QQ群([cjh0613](https://github.com/cjh0613/python-pub/blob/1c308fe90386f6d6e69e2202bb0c4acd4857576f/%E8%8E%B7%E5%8F%96QQ%E5%A5%BD%E5%8F%8B%E5%88%97%E8%A1%A8.py))
146 | - [x] 生成朋友圈相册
147 | - [x] 浏览器浏览历史
148 | - [x] 12306
149 | - [x] 博客园
150 | - [x] CSDN博客
151 | - [x] 开源中国博客
152 | - [x] 简书
153 |
154 | ### 数据分析
155 |
156 | - [x] 博客园
157 | - [x] CSDN博客
158 | - [x] 开源中国博客
159 | - [x] 简书
160 |
161 | ### 计划
162 |
163 | - 提供web界面操作,适应多平台
164 | - 对爬取的个人数据进行统计分析
165 | - 融合机器学习技术、自然语言处理技术等对数据深入分析
166 | - 把分析结果绘制图表直观展示
167 | - 添加更多数据源...
168 |
169 | ### Visitors
170 |
171 | 
172 |
173 | ### Developers want to say
174 |
175 | 1. 该项目解决了个人数据分散在各种各样的公司之间,经常形成数据孤岛,多维数据无法融合的痛点。
176 | 2. 作者认为该项目的最大潜力在于能把多维数据进行融合并对个人数据进行分析,是个人数据效益最大化。
177 | 3. 该项目使用爬虫手段获取数据,所以程序存在时效问题(需要持续维护,根据网站的更新做出修改)。
178 | 4. 该项目的结构清晰,所有数据源相互独立,可移植性高,所有爬虫脚本在项目的[Spiders](https://github.com/kangvcar/InfoSpider/tree/master/Spiders)文件下,可移植到你的程序中。
179 | 5. 目前该项目v1.0版本仅在Windows平台上测试,Python 3.7,未适配多平台。
180 | 6. 计划在v2.0版本对项目进行重构,提供web端操作与数据可视化,以适配多平台。
181 | 7. 本项目[INFO-SPIDER](https://github.com/kangvcar/InfoSpider)代码已开源,欢迎star支持。
182 |
183 | ### Contributors
184 |
185 |
186 |
187 |
188 |
189 | ### Sponsors
190 |
191 | #### Swiftproxy
192 |
193 | [](https://www.swiftproxy.net/?ref=kangvcar)
194 |
195 | 推荐IP代理商,价格和质量都不错,9000万+全球高质量纯净动态、静态住宅IP,支持免费测试,动态流量不过期,使用折扣码GHB5立享九折优惠。👉❤ [免费试用](https://www.swiftproxy.net/?ref=kangvcar)
196 |
197 | ### License
198 | GPL-3.0
199 |
200 | ### Star History
201 |
202 | [](https://star-history.com/#kangvcar/InfoSpider&Date)
203 |
204 |
--------------------------------------------------------------------------------
/README_EN.md:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | ***
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 | A magic toolbox to get back your personal information.
28 | Documentation | Video Demo
29 |
30 | ### Donate
31 |
32 | Support Me!
33 |
34 | [Paypal](https://paypal.me/kangvcar?locale.x=zh_XC)
35 |
36 | ### Developer Memoir🌈
37 |
38 | Click to expand👉 Developer Memoir🌈
39 |
40 | #### Scenes 1
41 |
42 | As usual, Xiao Ming opened the Chrome browser to browse the BBS, Tieba. Accidentally, Xiaoming opened the advertisement on the web page and jumped to JingDong Mall. When he went to close the window subconsciously, he found that (OS: it was just the product I needed!) How would JD know?Now that I've opened it, let's see the details of the product! Not bad. (OS: Give it a try!)
43 |
44 | #### Scenes 2
45 |
46 | Bai listens to the netease cloud music daily recommended song list can not get out of it (OS: wow! Why the playlist full of my favorite music styles? How great the netease cloud music! Love it so much! I have to buy a mumbership), strolling through ZhiHu's "How elegant XXX?, "What kind of experience is XXX?, "How do you evaluate XXX? (OS: Huh? This question is just what I want to ask, it has already been asked! What?? Thousands of answers!! Go inside and have a look!)
47 |
48 | #### Scenes 3
49 |
50 | Xiao Da never forget to enrich himself at work. As the major technical cnblog, CSDN, OSChina, JianShu, JueJin, etc., he find the homepage content recommendation is great (OS: these technical net posts are so great. I don't have to look for it as it came out). When he open the blog home page unconsciously,he found himself stick to write blog for three years, its technology stack is becoming more and more rich (OS: how to blog background does not provide a data analysis system? I want to see how many posts I've done over the years, when I've done it, which posts are hot, which technologies I've spent more time on, and which times I've been at my peak in the evenings? In the wee hours? I hope the system can give me more guidance data so that I can create better! Looking at the above scenes, you may sigh over the progress of technology, which has greatly improved our way of life. )
51 |
52 | #### Idea
53 |
54 | If you have a tool like this, it can help you get your personal information back, it can help you aggregate your personal information from various sites, it can help you analyze your personal data and give you Suggestions, it can help you visualize your personal data so that you can know yourself better.
55 |
56 | > Would you need such a tool? Would you like such a tool?
57 |
58 | Based on the above, I started to develop **[INFO-SPIDER](https://github.com/kangvcar/InfoSpider)** 👇👇👇
59 |
60 |
61 |
62 | ### What is INFO-SPIDER
63 |
64 | INFO-SPIDER is a crawler toolbox with numerous data sources. It aims to help users get their data back safely and quickly. The tool code is open source and the process is transparent.
65 | It also provides data analysis function and generates chart files based on user data, so that users can have a more intuitive and in-depth understanding of their own information.
66 | Currently supported data sources including GitHub, QQ mailbox, NetEase mailbox, Ali mailbox, Sina mailbox, Hotmail mailbox, Outlook mailbox, JingDong, TaoBao, Alipay, China Mobile, China Unicom, China Telecom, ZhiHu, Bilibili, NetEase Cloud Music, QQ Friends, QQ Groups, WeChat Moments Album, Browser History, 12306, Cnblog, CSDN, OSCHINA, JianShu.
67 |
68 | Refer to the [document](https://infospider.vercel.app) or [Video Demo](https://www.bilibili.com/video/BV14f4y1R7oF/) for details
69 |
70 | You can communicate with us on [](https://gitter.im/Info-Spider/community?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge)
71 |
72 | ### Features
73 | - Safe and Reliable: this project is open source, all source code visible, local operation, safe and reliable.
74 | - Easy to Use: to provide a GUI interface, just click the data source you want to get and follow the prompts.
75 | - Clear Structure: All the data sources of the project are independent from each other, and their portability is high. All crawler scripts are under the [Spiders](https://github.com/kangvcar/InfoSpider/tree/master/Spiders) catalogue.
76 | - Rich Data Sources: This project currently supports up to 24+ data sources, which are constantly updated.
77 | - Uniform Data Format: All crawled data will be stored in JSON format.
78 | - Rich Personal Data: This project will crawl as much personal data as possible for you, and later data processing can be reduced as needed.
79 | - Data Analysis: This project provides visual analysis of personal data, which is currently only partially supported.
80 | - Documentation: This project contains complete [document](https://infospider.vercel.app) or [Video Demo](https://www.bilibili.com/video/BV14f4y1R7oF/) .
81 |
82 | ### Screenshot
83 |
84 | 
85 |
86 | ### QuickStart
87 |
88 | #### Requirements
89 | - Step1: Install python3 and Chrome
90 | - Step2: Install the same driver as the Chrome browser
91 | - Step3: Run `pip install -r requirements.txt`
92 |
93 | #### Run the project
94 | - Step1: `cd tools`
95 | - Step2: `python3 main.py`
96 | - Step3: Click the Data Source button in the open window and select the data save path as prompted
97 | - Step4: The popup browser will automatically start crawling data after entering the user password, and the browser will automatically close after crawling.
98 | - Step5: In the corresponding directory, you can view the downloaded data (xxx. JSON), data analysis chart (XXx. HTML)
99 |
100 | ### Plan
101 | - Provide web interface operation, adapt to multi-platform
102 | - Conduct statistical analysis of personal data
103 | - It integrates machine learning technology and natural language processing technology to analyze the data in depth
104 | - Chart the analysis results visually
105 | - Add more data sources...
106 |
107 | ### Visitors
108 |
109 | 
110 |
111 | ### Contributors
112 |
113 |
114 |
115 |
116 |
117 | ### Sponsors
118 |
119 | 
120 |
121 | Thank you to JetBrains, who provide Open Source License for PyCharm!
122 |
123 | ### License
124 |
125 | GPL-3.0
--------------------------------------------------------------------------------
/Spiders/A12306/main12306.py:
--------------------------------------------------------------------------------
1 | import json
2 | import datetime
3 | import os
4 | import sys
5 | import requests
6 | from tkinter.filedialog import askdirectory
7 |
8 | # session = requests.session()
9 | # cookie_dict = {
10 | # 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.86 Safari/537.36',
11 | # 'Cookie': 'JSESSIONID=4C46731EE8AC434BD50749C80CFCF67F; tk=dwjpjiuCb4bW06qo4oJMr5MKOw-4IRX_s1zBI-KyKSUq7-Ialm1110; RAIL_EXPIRATION=1554990611687; RAIL_DEVICEID=DNgKpgqbWzSopAqvFXfNXT3opSKTcwfTxGEIB_s60TyBtq6xHTqC1XAjQUm57eeWNoksjoHBbHDLx5HTeC_5lomXnDhs5MQ0Sv8XOOrSe2TBpQo4nlBQTR9GXc286CHhhprU0rQccB5BQ9kL5O4bfEcJADAKZq52; BIGipServerpassport=786956554.50215.0000; route=6f50b51faa11b987e576cdb301e545c4; BIGipServerotn=3973513482.24610.0000'
12 | #
13 | # }
14 | # requests.utils.add_dict_to_cookiejar(session.cookies, cookie_dict)
15 | # resp = session.post('https://kyfw.12306.cn/otn/index/initMy12306Api')
16 |
17 |
18 | class Info(object):
19 | def __init__(self, cookie):
20 | self.path = askdirectory(title='选择信息保存文件夹')
21 | if str(self.path) == "":
22 | sys.exit(1)
23 | self.session = requests.session()
24 | self.headers = {
25 | 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.86 Safari/537.36',
26 | }
27 | cookie_dict = {}
28 | list = cookie.split(';')
29 | for i in list:
30 | try:
31 | cookie_dict[i.split('=')[0]] = i.split('=')[1]
32 | except Exception:
33 | pass
34 | requests.utils.add_dict_to_cookiejar(self.session.cookies, cookie_dict)
35 |
36 | # 个人信息,json格式
37 | def get_user_info(self):
38 | url = 'https://kyfw.12306.cn/otn/modifyUser/initQueryUserInfoApi'
39 | resp = self.session.get(url)
40 | json_data = json.loads(resp.content.decode())
41 | self.save_json('user_info.json', resp.content.decode())
42 | return 0
43 |
44 | # 未完成订单 https://kyfw.12306.cn/otn/queryOrder/queryMyOrderNoComplete
45 | def get_OrderNoComplete(self):
46 | url = 'https://kyfw.12306.cn/otn/queryOrder/queryMyOrderNoComplete'
47 | data = '_json_att='
48 | resp = self.session.post(url, data=data, verify=False)
49 | json_data = json.loads(resp.content.decode())
50 | self.save_json('user_order_no_complete.json', resp.content.decode())
51 | return 0
52 |
53 | # 未出行订单 https://kyfw.12306.cn/otn/queryOrder/queryMyOrder
54 | def get_Order(self):
55 | url = 'https://kyfw.12306.cn/otn/queryOrder/queryMyOrder'
56 | from dateutil.relativedelta import relativedelta
57 |
58 | # 时间可变
59 | queryStartDate = (datetime.date.today() - relativedelta(months=+1)).strftime("%Y-%m-%d")
60 | queryEndDate = datetime.datetime.now().strftime("%Y-%m-%d")
61 | data = {'come_from_flag': 'my_order',
62 | 'pageIndex': 0,
63 | 'pageSize': 8,
64 | 'query_where': 'G',
65 | 'queryStartDate': queryStartDate,
66 | 'queryEndDate': queryEndDate,
67 | 'queryType': 1,
68 | 'sequeue_train_name': ''}
69 | resp = self.session.post(url, data=data)
70 | json_data = json.loads(resp.content.decode())
71 | self.save_json('user_order.json', resp.content.decode())
72 | return 0
73 |
74 | # 联系人 https://kyfw.12306.cn/otn/passengers/query
75 | def get_passengers(self):
76 | url = 'https://kyfw.12306.cn/otn/passengers/query'
77 | data = {'pageIndex': 1,
78 | 'pageSize': 10}
79 | resp = self.session.post(url, data=data)
80 | json_data = json.loads(resp.content.decode())
81 | self.save_json('user_passengers.json', resp.content.decode())
82 | return 0
83 |
84 | # 车票快递地址 https://kyfw.12306.cn/otn/address/initApi
85 | def get_address(self):
86 | url = 'https://kyfw.12306.cn/otn/address/initApi'
87 | data = None
88 | resp = self.session.post(url, data=data)
89 | json_data = json.loads(resp.content.decode())
90 | self.save_json('user_address.json', resp.content.decode())
91 | return 0
92 |
93 | # 保险订单 https://kyfw.12306.cn/otn/insurance/queryMyIns
94 | def get_insurance(self):
95 | url = 'https://kyfw.12306.cn/otn/insurance/queryMyIns'
96 | # 时间可变
97 | from dateutil.relativedelta import relativedelta
98 | queryStartDate = (datetime.date.today() - relativedelta(months=+1)).strftime("%Y-%m-%d")
99 | queryEndDate = datetime.datetime.now().strftime("%Y-%m-%d")
100 | data = {'come_from_flag': 'my_ins',
101 | 'pageIndex': 0,
102 | 'pageSize': 8,
103 | 'query_where': 'H',
104 | 'queryStartDate': queryStartDate,
105 | 'queryEndDate': queryEndDate,
106 | 'queryType': 1,
107 | 'sequeue_train_name': ''}
108 | data = 'queryStartDate=2019-04-09&queryEndDate=2019-04-09&pageSize=8&pageIndex=1&query_where=H&sequeue_train_name=&come_from_flag=my_ins'
109 | resp = self.session.post(url, data=data)
110 | self.save_json('user_insurance.json', resp.content.decode())
111 | return 0
112 |
113 | # 历史订单 https://kyfw.12306.cn/otn/queryOrder/queryMyOrder
114 | def get_History_Order(self):
115 | url = 'https://kyfw.12306.cn/otn/queryOrder/queryMyOrder'
116 | from dateutil.relativedelta import relativedelta
117 |
118 | cookie_dict = {'Referer': 'https://kyfw.12306.cn/otn/view/train_order.html'}
119 |
120 | self.headers.update(cookie_dict)
121 | # 时间可变
122 | queryStartDate = (datetime.date.today() - relativedelta(months=+1)).strftime("%Y-%m-%d")
123 | queryEndDate = datetime.datetime.now().strftime("%Y-%m-%d")
124 |
125 | # data = {'come_from_flag': 'my_order',
126 | # 'pageIndex': 0,
127 | # 'pageSize': 8,
128 | # 'query_where': 'H',
129 | # 'queryStartDate': queryStartDate,
130 | # 'queryEndDate': queryEndDate,
131 | # 'queryType': 1,
132 | # 'sequeue_train_name': ''}
133 |
134 | data = 'come_from_flag=my_order&pageIndex=0&pageSize=8&query_where=H&queryStartDate=2019-06-01&queryEndDate=2019-06-21&queryType=1&sequeue_train_name=15659358815'
135 | resp = self.session.post(url, headers=self.headers, data=data, verify=False)
136 | self.save_json('user_history_order.json', resp.content.decode())
137 | return 0
138 |
139 | # 会员信息
140 | def get_level(self):
141 | url = 'https://cx.12306.cn/tlcx/memberInfo/memberPointQuery'
142 | data = 'queryType=0'
143 | resp = self.session.post(url, data=data)
144 | self.save_json('user_level.json', resp.content.decode())
145 | return 0
146 |
147 | def save_json(self, name, ret):
148 | # file_path = os.path.join(os.path.dirname(__file__) + '/' + name)
149 | with open(self.path + os.sep + name, 'w', encoding='utf-8') as f:
150 | f.write(ret)
151 |
152 |
153 | if __name__ == '__main__':
154 | pass
155 | # a = Info()
156 | # user = a.get_user_info()
157 | # a.save_json('user.json', user)
158 | # OrderNoComplete = a.get_OrderNoComplete()
159 | # a.save_json('OrderNoComplete.json',OrderNoComplete)
160 | # Order = a.get_Order()
161 | # a.save_json('Order.json',Order)
162 | # passengers = a.get_passengers()
163 | # a.save_json('passengers.json',passengers)
164 | # address = a.get_address()
165 | # a.save_json('address.json',address)
166 | # insurance = a.get_insurance()
167 | # a.save_json('insurance.json',insurance)
168 | # History_Order = a.get_History_Order()
169 | # a.save_json('History_Order.json',History_Order)
170 | #
171 | # # 换json
172 | # level = a.get_level()
173 | # a.save_json('level.json',level)
174 |
--------------------------------------------------------------------------------
/Spiders/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kangvcar/InfoSpider/aa5f730396db33f36cff17148ae9a7a1e25cea77/Spiders/__init__.py
--------------------------------------------------------------------------------
/Spiders/alipay/main.py:
--------------------------------------------------------------------------------
1 | import json
2 | import os
3 | import re
4 | import os
5 | import requests
6 | from lxml import etree
7 | from selenium import webdriver
8 | from selenium.webdriver import ChromeOptions
9 | from tkinter.filedialog import askdirectory
10 | from tqdm import tqdm
11 |
12 | class ASpider(object):
13 | def __init__(self, cookie):
14 | self.path = askdirectory(title='选择信息保存文件夹')
15 | if str(self.path) == "":
16 | sys.exit(1)
17 | self.session = requests.session()
18 | self.headers = {
19 | 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.86 Safari/537.36',
20 | 'referer': ''
21 | }
22 | cookie_dict = {}
23 | list = cookie.split(';')
24 | for i in list:
25 | try:
26 | cookie_dict[i.split('=')[0]] = i.split('=')[1]
27 | except Exception:
28 | pass
29 | requests.utils.add_dict_to_cookiejar(self.session.cookies, cookie_dict)
30 |
31 | def get_user_info(self):
32 | url = 'https://custweb.alipay.com/account/index.htm'
33 | resp = self.session.get(url)
34 | obj = etree.HTML(resp.content.decode()).xpath('//tbody')[0]
35 | item = {}
36 | item['name'] = ''.join(obj.xpath('./tr[1]/td[1]//text()')).strip()
37 | item['email'] = ''.join(obj.xpath('./tr[2]/td[1]//text()')).strip()
38 | item['mobile'] = ''.join(obj.xpath('./tr[3]/td[1]//text()')).strip()
39 | item['tb_name'] = ''.join(obj.xpath('./tr[4]/td[1]//text()')).strip()
40 | item['register_time'] = ''.join(obj.xpath('./tr[7]/td[1]//text()')).strip()
41 | self.write_json(self.path + os.sep + 'user_info.json', json.dumps(item))
42 |
43 | def write_json(self, name, str):
44 | # file_path = os.path.join(os.path.dirname(__file__) + '/' + name)
45 | with open(name, 'w') as f:
46 | f.write(str)
47 |
48 | def get_YEB(self):
49 | url = 'https://yebprod.alipay.com/yeb/asset.htm'
50 | resp = self.session.get(url)
51 | ele = etree.HTML(resp.content.decode('gbk'))
52 | item = {}
53 | # print(etree.tostring(ele))
54 | item['eye-val'] = re.sub('\s', '', ele.xpath('.//span[@class="eye-val"]/text()')[0])
55 | item['total_val'] = re.sub('\s', '', ele.xpath('.//div[@class="box-bill-foot-account eye-val"]/text()')[0])
56 | item['Unavailable_val'] = re.sub('\s', '', ele.xpath('.//div[@class="box-bill-foot-account eye-val"]/text()')[1])
57 | self.write_json(self.path + os.sep + 'yu_e_bao.json', json.dumps(item))
58 |
59 | def get_bills(self):
60 | url = 'https://lab.alipay.com/consume/record/items.htm'
61 | self.headers['referer'] = 'https://my.alipay.com/portal/i.htm'
62 | resp = self.session.get(url, headers=self.headers, verify=False)
63 | obj_list = etree.HTML(resp.content.decode('gbk')).xpath('//tbody/tr')
64 | json_list = []
65 | for obj in tqdm(obj_list):
66 | item = {}
67 | item['number'] = ''.join(obj.xpath('./td[1]//text()')).strip()
68 | item['time'] = ''.join(obj.xpath('./td[2]//text()')).strip()
69 | item['info'] = ''.join(obj.xpath('./td[3]//text()')).strip()
70 | item['income'] = ''.join(obj.xpath('./td[4]//text()')).strip()
71 | item['outcome'] = ''.join(obj.xpath('./td[5]//text()')).strip()
72 | item['balance'] = ''.join(obj.xpath('./td[6]//text()')).strip()
73 | item['from'] = ''.join(obj.xpath('./td[7]//text()')).strip()
74 | item['detail'] = ''.join(obj.xpath('./td[8]//text()')).strip()
75 | json_list.append(item)
76 | ye = ''.join(obj_list[0].xpath('./td[6]//text()')).strip()
77 | ye_dict = {'YuE': ye}
78 | self.write_json(self.path + os.sep + 'bill_list.json', json.dumps(json_list))
79 | self.write_json(self.path + os.sep + 'balance.json', json.dumps(ye_dict))
80 |
81 |
82 | if __name__ == '__main__':
83 | cookie = 'cna=FMHmFL1zqnUCASQH4bAneyUf; mobileSendTime=-1; credibleMobileSendTime=-1; ctuMobileSendTime=-1; riskMobileBankSendTime=-1; riskMobileAccoutSendTime=-1; riskMobileCreditSendTime=-1; riskCredibleMobileSendTime=-1; riskOriginalAccountMobileSendTime=-1; isg=BMTEs6f5RNVXdvCZiIUsYWqLlUR2dekgISd1n95lQQ9SCWTTBu-t19yoSeF0FCCf; l=bBgcZ5c7vJ2Of-mJBOCwCuI8L179_IRYSuPRwCmXi_5pZ6T68E7Olorn_F96Vj5Rs4TB4UJxb0v9-etXw; UM_distinctid=169b3c04ea8509-063bdd824c9e64-12306d51-fa000-169b3c04ea95a8; unicard1.vm="K1iSL1mnW5fEFTtXnTWZPQ=="; NEW_ALIPAY_TIP=1; csrfToken=M_AdqLObk41r9VvTDoRdyy2Q; CLUB_ALIPAY_COM=2088022680005311; iw.userid="K1iSL1mnW5fEFTtXnTWZPQ=="; ali_apache_tracktmp="uid=2088022680005311"; session.cookieNameId=ALIPAYJSESSIONID; LoginForm=alipay_login_auth; alipay="K1iSL1mnW5fEFTtXnTWZPca48DVsXJKl1U07jLnVskUcfw=="; spanner=hWXgcY78eHIkRX5btAjBSJV5G91m2+NMXt2T4qEYgj0=; locale=zh-cn; CHAIR_SESS=JWYmdXvINYrjfJhNfnAOApEy7drxxpERpaBXObg17RYQr9jGJZDWNQuk7GTZ-NeYuRSIYTsU7tiaFoLpKJpwTQ2FZqKmOSphZ98CHxZicmK3XOz8tgVdDWKxbBKLiiY4Tk4zkLNOIkCMlfoY4vOsGvxtikpzFXx61uyLzy-_-PGsZT1UzN0CDKSYTq1xRxaYhfp7vURB4eAzWjJpQXXmxXDq8A8cqmAyErsLtLBG8MfxigkVOwR88J5o95xQFcJ0; ctoken=QwetGqWKOjvvPRGx; zone=GZ00D; ALIPAYJSESSIONID=RZ257CXtTz7r7Ra0sc4QHeC4nrz1eyauthRZ25GZ00; rtk=umvDaVnzeH3Uz7V5rmCCnDE+MOkI1ZKNRTuJzmidxn8p1ZcI5EA'
84 | spider = ASpider(cookie)
85 | spider.get_bills()
86 | spider.get_user_info()
87 | spider.get_YEB()
88 |
--------------------------------------------------------------------------------
/Spiders/bilibili/main.py:
--------------------------------------------------------------------------------
1 | import os
2 | import json
3 | import time
4 | import requests
5 | from tkinter.filedialog import askdirectory
6 |
7 | class BilibiliHistory(object):
8 | def __init__(self, cookie_str):
9 | self.path = askdirectory(title='选择信息保存文件夹')
10 | if str(self.path) == "":
11 | sys.exit(1)
12 | self.MAX_PAGE = 10
13 | self.PAGE_PER_NUM = 200
14 |
15 | self.cookie = cookie_str
16 | self.history = self.get_all_bili_history()
17 | self.save(self.history, 'bilibili_history.json')
18 | self.userinfo = self.get_user_info()
19 | self.save(self.userinfo, 'user_info.json')
20 |
21 | def get_all_bili_history(self):
22 | headers = self.get_header()
23 | # history = {'all': []}
24 | history = []
25 | for page_num in range(self.MAX_PAGE):
26 |
27 | url = 'https://api.bilibili.com/x/v2/history?pn={pn}&ps={ps}&jsonp=jsonp'.format(pn=page_num, ps=self.PAGE_PER_NUM)
28 | result = self.req_get(headers, url)
29 | # print('page = {} code = {} datalen = {}'.format(page_num, result['code'], len(result['data'])))
30 | print('爬取中...')
31 | time.sleep(1)
32 | # if len(result['data']) == 0:
33 | if not result['data']:
34 | print('爬取完成...')
35 | break
36 | # if page_num == 2:
37 | # break
38 | history.append(result)
39 | return history
40 |
41 | def get_user_info(self):
42 | headers = self.get_header()
43 | url = 'https://api.bilibili.com/x/member/web/account'
44 | result = self.req_get(headers, url)
45 | return result
46 |
47 | def req_get(self, headers, url):
48 | resp = requests.get(url, headers=headers)
49 | return json.loads(resp.text)
50 |
51 | def save(self, data, filename):
52 | with open(self.path + os.sep + filename, 'w', encoding='utf-8') as fp:
53 | json.dump(data, fp, ensure_ascii=False)
54 | return True
55 |
56 | def get_header(self):
57 | headers = {
58 | 'Accept': '*/*',
59 | 'Accept-Encoding': 'gzip, deflate, br',
60 | 'Accept-Language': 'zh-CN,zh;q=0.9,en-US;q=0.8,en;q=0.7',
61 | 'Connection': 'keep-alive',
62 | 'Cookie': self.cookie,
63 | 'Host': 'api.bilibili.com',
64 | 'Referer': 'https://www.bilibili.com/account/history',
65 | 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_2) AppleWebKit/537.36 '
66 | '(KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36'
67 | }
68 | return headers
69 |
70 |
71 |
--------------------------------------------------------------------------------
/Spiders/browser/main.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: UTF-8 -*-
3 | import os
4 | import sys
5 | import json
6 | import time
7 | import sqlite3
8 | import operator
9 | from collections import OrderedDict
10 | import matplotlib.pyplot as plt
11 | from tkinter.filedialog import askdirectory
12 | from tqdm import tqdm
13 |
14 | class Browserhistory(object):
15 | def __init__(self):
16 | self.path = askdirectory(title='选择信息保存文件夹')
17 | if str(self.path) == "":
18 | sys.exit(1)
19 | #path to user's history database (Chrome)
20 | self.data_path=os.path.expanduser('~') + r"\AppData\Local\Google\Chrome\User Data\Default"
21 | self.history_db=os.path.join(self.data_path,'history')
22 | #querying the db
23 | c = sqlite3.connect(self.history_db)
24 | cursor = c.cursor()
25 | select_statement = "SELECT urls.id, urls.url, urls.title, urls.visit_count, urls.last_visit_time, visits.visit_time, visits.visit_duration FROM urls, visits WHERE urls.id = visits.url;"
26 | cursor.execute(select_statement)
27 | self.results = cursor.fetchall() #tuple
28 |
29 | self.data_save_as_json(self.results)
30 |
31 | # transfer timestamp format
32 | def timestamp_format(self, timestamp):
33 | if timestamp > 13000000000000000:
34 | time_c = timestamp/1000000-11644473600
35 | return time.strftime("%Y-%m-%d %X", time.localtime(time_c))
36 | else:
37 | return timestamp
38 |
39 | # transfer to json and save to file.
40 | def data_save_as_json(self, data):
41 | history_list = []
42 | for i in tqdm(data):
43 | item = {}
44 | item['urls.id'] = i[0]
45 | item['urls.url'] = i[1]
46 | item['urls.title'] = i[2]
47 | item['urls.visit_count'] = i[3]
48 | item['urls.last_visit_time'] = self.timestamp_format(i[4])
49 | item['visits.visit_time'] = self.timestamp_format(i[5])
50 | item['visits.visit_duration'] = self.timestamp_format(i[6])
51 | history_list.append(item)
52 | history_json = json.dumps(history_list, ensure_ascii=False)
53 | with open(self.path + '/browser_data.json', 'w', encoding='utf-8') as f:
54 | f.write(history_json)
--------------------------------------------------------------------------------
/Spiders/chsi/main.py:
--------------------------------------------------------------------------------
1 | import json
2 | import os
3 | import re
4 |
5 | import requests
6 | from lxml import etree
7 |
8 |
9 | class Chis(object):
10 | def __init__(self, cookie):
11 | self.session = requests.session()
12 | self.headers = {
13 | 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.86 Safari/537.36',
14 | }
15 | cookie_dict = {}
16 | list = cookie.split(';')
17 | for i in list:
18 | try:
19 | cookie_dict[i.split('=')[0]] = i.split('=')[1]
20 | except IndexError:
21 | cookie_dict[''] = i
22 | requests.utils.add_dict_to_cookiejar(self.session.cookies, cookie_dict)
23 |
24 | # 学籍信息
25 | def get_xueji_info(self):
26 | url = 'https://my.chsi.com.cn/archive/gdjy/xj/show.action'
27 | resp = self.session.get(url, headers=self.headers, verify=False)
28 | ele = etree.HTML(resp.content.decode())
29 | try:
30 | pic_path = ele.xpath('//img[@alt="录取照片"]/@src')[0]
31 | if ("no-photo" in pic_path):
32 | pic_1_url = pic_path
33 | else:
34 | pic_1_url = 'https://my.chsi.com.cn' + ele.xpath('//img[@alt="录取照片"]/@src')[0]
35 | except Exception:
36 | pic_1_url = None
37 | pass
38 | try:
39 | pic_path = ele.xpath('//img[@alt="学历照片"]/@src')[0]
40 | if ("no-photo" in pic_path):
41 | pic_2_url = pic_path
42 | else:
43 | pic_2_url = 'https://my.chsi.com.cn' + ele.xpath('//img[@alt="学历照片"]/@src')[0]
44 | except Exception:
45 | pic_2_url = None
46 | pass
47 | try:
48 | xueji_pic_url = ele.xpath('//img[@class="xjxx-img"]/@src')[0]
49 | except Exception:
50 | xueji_pic_url = None
51 |
52 | return pic_1_url, pic_2_url, xueji_pic_url
53 |
54 | # 报告
55 | def get_report(self):
56 | url = 'https://my.chsi.com.cn/archive/bab/index.action'
57 | resp = self.session.get(url, verify=False)
58 | report_detail_url = etree.HTML(resp.content.decode()).xpath('//a[@class="green-btn mid-btn marginr20"]/@href')[
59 | 0]
60 | detail_resp = self.session.get(report_detail_url, headers=self.headers, verify=False)
61 | report_detail_url = etree.HTML(detail_resp.content.decode()).xpath('//a[text()="查看"]/@href')[0]
62 | resp = self.session.get(report_detail_url, headers=self.headers, verify=False)
63 | ele = etree.HTML(resp.content.decode())
64 | if '请输入验证码以继续当前操作:' in resp.content.decode():
65 | ret = ele.xpath('//td[@class="tdRight"]')[1]
66 | capt_url = 'https://www.chsi.com.cn' + ret.xpath('./following-sibling::td[1]/img/@src')[0]
67 | value = ret.xpath('./following-sibling::td[1]/input/@value')[0]
68 | num = re.findall(r'cap=(\d{4})', capt_url)
69 | data = {'cap': num,
70 | 'capachatok': value,
71 | 'Submit': ' 继续'}
72 | self.session.post('https://www.chsi.com.cn/xlcx/yzm.do', data=data)
73 | resp = self.session.get(report_detail_url, verify=False)
74 | ele = resp.content.decode()
75 |
76 | pdf_url = 'https://www.chsi.com.cn' + ele.xpath('//a[@title="下载"]/@href')[0]
77 | item = {}
78 | item['name_url'] = 'https://www.chsi.com.cn' + \
79 | ele.xpath('//td[@class="title1"]/following-sibling::td[1]/img/@src')[0]
80 | print(ele.xpath('//div[@class="cnt1"]/text()'))
81 | try:
82 | item['genre'] = ele.xpath('//div[@class="cnt1"]/text()')[0]
83 | except Exception:
84 | pass
85 | try:
86 | item['sfz_id'] = ele.xpath('//div[@class="cnt1"]/text()')[1]
87 | except Exception:
88 | pass
89 | try:
90 | item['nation'] = ele.xpath('//div[@class="cnt1"]/text()')[2]
91 | except Exception:
92 | pass
93 | try:
94 | item['birth'] = ele.xpath('//div[@class="cnt1"]/text()')[3]
95 | except Exception:
96 | pass
97 | try:
98 | item['school'] = ele.xpath('//div[@class="cnt1"]/text()')[4]
99 | except Exception:
100 | pass
101 | try:
102 | item['education'] = ele.xpath('//div[@class="cnt1"]/text()')[5]
103 | except Exception:
104 | pass
105 | try:
106 | item['faculty'] = ele.xpath('//div[@class="cnt1"]/text()')[6]
107 | except Exception:
108 | pass
109 | try:
110 | item['class'] = ele.xpath('//div[@class="cnt1"]/text()')[7]
111 | except Exception:
112 | pass
113 | try:
114 | item['major'] = ele.xpath('//div[@class="cnt1"]/text()')[8]
115 | except Exception:
116 | pass
117 | try:
118 | item['student_id'] = ele.xpath('//div[@class="cnt1"]/text()')[9]
119 | except Exception:
120 | pass
121 | try:
122 | item['style'] = ele.xpath('//div[@class="cnt1"]/text()')[10]
123 | except Exception:
124 | pass
125 | try:
126 | item['entrance_time'] = ele.xpath('//div[@class="cnt1"]/text()')[11]
127 | except Exception:
128 | pass
129 | try:
130 | item['duration'] = ele.xpath('//div[@class="cnt1"]/text()')[12]
131 | except Exception:
132 | pass
133 | try:
134 | item['education_style'] = ele.xpath('//div[@class="cnt1"]/text()')[13]
135 | except Exception:
136 | pass
137 | try:
138 | item['status'] = ele.xpath('//div[@class="cnt1"]/text()')[14]
139 | except Exception:
140 | pass
141 | ret = json.dumps(item)
142 | file_path = os.path.join(os.path.dirname(__file__) + '/info.json')
143 | with open(file_path, 'w') as f:
144 | f.write(ret)
145 | return pdf_url
146 |
147 | def save_ret(self, url, name):
148 | if url == None:
149 | return
150 | resp = self.session.get(url, verify=False)
151 | file_path = os.path.join(os.path.dirname(__file__) + '/' + name)
152 | with open(file_path, 'wb') as f:
153 | f.write(resp.content)
154 |
155 |
156 | if __name__ == '__main__':
157 | # chis = Chis()
158 | # p1, p2, x = chis.get_xueji_info()
159 | # chis.save_ret(p1, '录取前照片.jpg')
160 | # chis.save_ret(p2, '学籍照片.jpg')
161 | # chis.save_ret(x, '学信网信息.jpg')
162 | # p3 = chis.get_report()
163 | # chis.save_ret(p3, '学信报告.pdf')
164 | pass
--------------------------------------------------------------------------------
/Spiders/cloudmusic/main.py:
--------------------------------------------------------------------------------
1 | import requests
2 | import json
3 | import re
4 | import time
5 | from tkinter.filedialog import askdirectory
6 |
7 | class Cloudmusic(object):
8 | def __init__(self, username, password):
9 | self.path = askdirectory(title='选择信息保存文件夹')
10 | self.username = username
11 | self.password = password
12 | self.api = 'http://45.129.2.73:3000'
13 | self.isphone = re.compile(r'[1][^1269]\d{9}')
14 | self.isemail = re.compile(r'[^\._][\w\._-]+@(?:[A-Za-z0-9]+\.)+[A-Za-z]+$')
15 | self.login_refresh()
16 | if self.isphone.match(self.username):
17 | self.userid = str(self.user_login_as_cellphone())
18 | elif self.isemail.match(self.username):
19 | self.userid = str(self.user_login_as_email())
20 | else:
21 | print('登录失败!用户名需为手机号码或者邮箱。')
22 |
23 | ## 刷新登录状态
24 | def login_refresh(self):
25 | url = self.api + '/login/refresh'
26 | response = requests.get(url)
27 | return 0
28 |
29 | ## 使用‘手机号码’ + ‘密码’ 登录网易云音乐
30 | def user_login_as_cellphone(self):
31 | url = self.api + '/login/cellphone?phone=' + self.username + '&password=' + self.password
32 | response = requests.get(url)
33 | code = response.json()['code']
34 | if str(200) == "200":
35 | print('登录成功')
36 | else:
37 | print('登录失败')
38 | userid = response.json()['account']['id']
39 | # print('userid = ' + str(userid))
40 | return userid
41 |
42 | ## 使用 ‘邮箱’ + ‘密码’ 登录网易云音乐
43 | def user_login_as_email(self):
44 | url = self.api + '/login?email=' + self.username + '&password=' + self.password
45 | response = requests.get(url)
46 | code = response.json()['code']
47 | if str(200) == "200":
48 | print('登录成功')
49 | else:
50 | print('登录失败')
51 | userid = response.json()['account']['id']
52 | # print('userid = ' + str(userid))
53 | return userid
54 |
55 | ## 把获取的个人信息写入json文件
56 | def data_wirte_to_json(self, filename, context):
57 | filepath = self.path + '/' + filename + '.json'
58 | with open(filepath, 'w', encoding='utf-8') as f:
59 | f.write(context)
60 | return filepath
61 |
62 | ## 获取用户基本信息
63 | def get_user_detail(self):
64 | url = self.api + '/user/detail?uid=' + self.userid
65 | response = requests.get(url)
66 | self.data_wirte_to_json('user_detail', response.text)
67 | print('获取用户基本信息成功!')
68 | return 0
69 |
70 | ## 获取用户歌单
71 | def get_playlist(self):
72 | url = self.api + '/user/playlist?uid=' + self.userid
73 | response = requests.get(url)
74 | self.data_wirte_to_json('user_playlist', response.text)
75 | print('获取用户歌单成功!')
76 | return 0
77 |
78 | ## 获取用户关注列表
79 | def get_user_follows(self):
80 | url = self.api + '/user/follows?uid=' + self.userid
81 | response = requests.post(url)
82 | self.data_wirte_to_json('user_follows', response.text)
83 | print('获取用户关注列表成功!')
84 | return 0
85 |
86 | ## 获取用户粉丝列表
87 | def get_user_followeds(self):
88 | url = self.api + '/user/followeds?uid=' + self.userid
89 | response = requests.post(url)
90 | self.data_wirte_to_json('user_followeds', response.text)
91 | print('获取用户粉丝列表成功!')
92 | return 0
93 |
94 | ## 获取用户动态
95 | def get_user_event(self):
96 | url = self.api + '/user/event?uid=' + self.userid
97 | response = requests.post(url)
98 | self.data_wirte_to_json('user_event', response.text)
99 | print('获取用户动态成功!')
100 | return 0
101 |
102 | ## 获取用户听歌排行(周榜)
103 | def get_user_record_week(self):
104 | url = self.api + '/user/record?uid=' + self.userid + '&type=1'
105 | response = requests.get(url)
106 | self.data_wirte_to_json('user_record_week', response.text)
107 | print('获取用户听歌排行(周榜)成功!')
108 | return 0
109 |
110 | ## 获取用户听歌排行(总榜)
111 | def get_user_record_all(self):
112 | url = self.api + '/user/record?uid=' + self.userid + '&type=0'
113 | response = requests.get(url)
114 | self.data_wirte_to_json('user_record_all', response.text)
115 | print('获取用户听歌排行(总榜)成功!')
116 | return 0
117 |
118 | if __name__ == '__main__':
119 | music = Cloudmusic('132****', '*****')
120 | music.get_user_detail()
121 | music.get_playlist()
122 | music.get_user_follows()
123 | music.get_user_followeds()
124 | music.get_user_event()
125 | music.get_user_record_week()
126 | music.get_user_record_all()
127 |
--------------------------------------------------------------------------------
/Spiders/cnblog/main.py:
--------------------------------------------------------------------------------
1 | import re
2 | import os
3 | import sys
4 | import json
5 | import requests
6 | import pandas as pd
7 | import numpy as np
8 | import jieba
9 | import pyecharts
10 | from pyecharts import options as opts
11 | from collections import Counter
12 | from pyecharts.charts import WordCloud
13 | from pyecharts.charts import Line
14 | from bs4 import BeautifulSoup
15 | from tkinter.filedialog import askdirectory
16 | class Cnblog(object):
17 | def __init__(self, blogname):
18 | self.blogname = blogname
19 | self.path = askdirectory(title='选择信息保存文件夹')
20 | if str(self.path) == "":
21 | sys.exit(1)
22 |
23 | def get_element_of_article(self):
24 | '''
25 | 获取元素(标题,发布时间,阅读量)
26 | '''
27 | url = 'https://www.cnblogs.com/' + str(self.blogname) + '/default.html'
28 | headers = {
29 | 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.89 Safari/537.36'
30 | }
31 | pos = 1
32 | article_list = []
33 | while 1:
34 | key_dict = {'page': str(pos)}
35 | reps = requests.get(url, headers=headers, params=key_dict, timeout=3)
36 | soup = BeautifulSoup(reps.text, "html.parser")
37 | posts = soup.find_all("div", class_="day")
38 | if not len(posts):
39 | break
40 | date_pattern = re.compile(r"\d{4}-\d{1,2}-\d{1,2}")
41 | time_pattern = re.compile(r"\d{2}:\d{2}")
42 | views_pattern = re.compile(r"\d+")
43 | from tqdm import tqdm
44 | pbar = tqdm(posts)
45 | for each_post in pbar:
46 | try:
47 | item = {}
48 | item['title'] = each_post.find("div", class_="postTitle").text.strip()
49 | item['sumary'] = each_post.find("div", class_="c_b_p_desc").text.strip()
50 | item['postdate'] = date_pattern.findall(each_post.find("div", class_="postDesc").text)[0]
51 | item['posttime'] = time_pattern.findall(each_post.find("div", class_="postDesc").text)[0]
52 | item['views'] = views_pattern.findall(each_post.find("span", class_="post-view-count").text)[0]
53 | article_list.append(item)
54 | pbar.set_description("正在爬取文章:%s" % item['title'])
55 | except:
56 | pass
57 | import time
58 | time.sleep(0.1)
59 | pos += 1
60 | article_json = json.dumps(article_list)
61 | return article_json
62 |
63 | def save_as_json(self, content_json):
64 | json_file_name = self.path + os.sep + 'cnblog_article.json'
65 | with open(json_file_name, 'w', encoding='utf-8') as f:
66 | f.write(content_json)
67 | return json_file_name
68 |
69 | # 获取所有字段存为一个字符串
70 | def get_text(self, json_file, column='title'):
71 | df_json = pd.read_json(json_file, encoding='utf-8')
72 | text = ''
73 | for i in df_json[column]:
74 | text += i
75 | return text
76 |
77 | # 去停用词,使用jieba分词
78 | def split_word(self, text):
79 | word_list = list(jieba.cut(text))
80 | # 去掉一些无意义的词和符号,我这里自己整理了停用词库
81 | with open('stop_word.txt', encoding='utf-8') as f:
82 | meaningless_word = f.read().splitlines()
83 | # print(meaningless_word)
84 | result = []
85 | # 筛选词语
86 | for i in word_list:
87 | if i not in meaningless_word:
88 | result.append(i.replace(' ', ''))
89 | return result
90 |
91 | # 词频统计
92 | def word_counter(self, words):
93 | # 使用Count计数方法
94 | words_counter = Counter(words)
95 | # 将Counter类型转换为列表
96 | words_list = words_counter.most_common(100)
97 | return words_list
98 |
99 | # 生成词云
100 | def create_wordcloud(self, json_file, title='词云', column='title'):
101 | text = self.get_text(json_file, column=column)
102 | clear_word = self.split_word(text)
103 | data = self.word_counter(clear_word)
104 | wd = WordCloud()
105 | wd.add(series_name=title, data_pair=data, word_size_range=[40, 150])
106 | wd.set_global_opts(title_opts=opts.TitleOpts(title="你的文章词云", subtitle="基于你的博客数据生成", title_textstyle_opts=opts.TextStyleOpts(font_size=23)), tooltip_opts=opts.TooltipOpts(is_show=True))
107 | # wd.render_notebook()
108 | wd.render(self.path + os.sep + 'topic_wordcloud.html')
109 |
110 | # 生成折线图
111 | def create_postdate_line(self, json_file, title='折线图', column='postdate'):
112 | df_json = pd.read_json(json_file, encoding='utf-8')
113 | postdate_month_list = []
114 | for i in df_json[column]:
115 | postdate_month_list.append('-'.join(i.split('-')[:-1]))
116 | date_counter = Counter(postdate_month_list)
117 | line = Line()
118 | x_data = [i for i in date_counter]
119 | y_data = [date_counter[i] for i in date_counter]
120 | line.add_xaxis(x_data)
121 | line.add_yaxis(series_name="发文数量", y_axis=y_data)
122 | line.set_global_opts(title_opts=opts.TitleOpts(title="你的发文数量", subtitle="基于你的博客数据生成"))
123 | line.render(self.path + os.sep + 'postdate_line.html')
124 |
125 |
126 | if __name__ == '__main__':
127 | article = get_element_of_article('kangvcar')
128 | json_file_name = save_as_json(article)
129 | create_wordcloud(json_file_name, title='你的创作领域词云', column='title')
130 | create_postdate_line(json_file_name, title='发文时间线', column='postdate')
--------------------------------------------------------------------------------
/Spiders/csdn/main.py:
--------------------------------------------------------------------------------
1 | import re
2 | import os
3 | import sys
4 | import json
5 | import requests
6 | from bs4 import BeautifulSoup
7 | from tkinter.filedialog import askdirectory
8 |
9 | class Csdn(object):
10 | def __init__(self, blogname):
11 | self.blogname = blogname
12 | self.path = askdirectory(title='选择信息保存文件夹')
13 | if str(self.path) == "":
14 | sys.exit(1)
15 | self.headers = {
16 | 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.89 Safari/537.36'
17 | }
18 |
19 | def get_element_of_article(self):
20 | '''
21 | 获取元素(标题,发布时间,阅读量)
22 | '''
23 | article_list = []
24 | pos = 1
25 | while 1:
26 | url='https://blog.csdn.net/' + self.blogname + '/article/list/' + str(pos)
27 | reps = requests.get(url,headers=self.headers,timeout=30)
28 | soup = BeautifulSoup(reps.text, 'lxml')
29 | posts = soup.findAll(name="div", attrs={"class" :"article-item-box csdn-tracking-statistics"})
30 | if not len(posts):
31 | break
32 | date_pattern = re.compile(r"\d{4}-\d{1,2}-\d{1,2}")
33 | time_pattern = re.compile(r"\d{2}:\d{2}:\d{2}")
34 | views_pattern = re.compile(r"\d+")
35 |
36 | from tqdm import tqdm
37 | pbar = tqdm(posts)
38 | for each_post in pbar:
39 | item = {}
40 | try:
41 | item['title'] = each_post.find(name="h4").text.split(' ', 1)[1].strip()
42 | item['sumary'] = each_post.find(name="p", attrs={"class": "content"}).text.strip().replace('\n', "")
43 | item['postdate'] = date_pattern.findall(each_post.find(name="span", attrs={"class": "date"}).text.strip())[0]
44 | item['posttime'] = time_pattern.findall(each_post.find(name="span", attrs={"class": "date"}).text.strip())[0]
45 | item['views'] = views_pattern.findall(each_post.find(name="span", attrs={"class": "read-num"}).text)[0]
46 | # print(item)
47 | article_list.append(item)
48 | pbar.set_description("正在爬取文章:%s" % item['title'])
49 | except Exception as e:
50 | print('异常信息:' + repr(e))
51 | pass
52 | import time
53 | time.sleep(0.1)
54 | pos += 1
55 | article_json = json.dumps(article_list)
56 | return article_json
57 |
58 | def save_as_json(self, content_json):
59 | with open(self.path + os.sep + 'csdn_article.json', 'w', encoding='utf-8') as f:
60 | f.write(content_json)
61 |
62 |
63 | if __name__ == '__main__':
64 | article = get_element_of_article('kangvcar')
65 | save_as_json(article)
--------------------------------------------------------------------------------
/Spiders/ctrip/main.py:
--------------------------------------------------------------------------------
1 | import os
2 | import time
3 |
4 | import requests
5 | import json
6 | import xlsxwriter
7 |
8 |
9 | class Ctrip(object):
10 | def __init__(self, cookie):
11 | self.session = requests.session()
12 | self.headers = {
13 | 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.86 Safari/537.36',
14 | }
15 | cookie_dict = {}
16 | list = cookie.split(';')
17 | for i in list:
18 | try:
19 | cookie_dict[i.split('=')[0]] = i.split('=')[1]
20 | except IndexError:
21 | cookie_dict[''] = i
22 | requests.utils.add_dict_to_cookiejar(self.session.cookies, cookie_dict)
23 |
24 |
25 | def get_json_order(self):
26 | headers = self.headers
27 | headers['referer'] = 'https://my.ctrip.com/Home/Order/AllOrder.aspx'
28 | url = 'https://my.ctrip.com/Home/Ajax/GetAllOrder.ashx'
29 | sequence = int(time.time() * 10000000)
30 | # 时间等字段可以传入
31 | data = {
32 | 'BizTypes': '',
33 | 'BookingDateTime': '',
34 | 'BeginBookingDateTime': '',
35 | 'EndBookingDateTime': '',
36 | 'BeginUsageDateTime': '',
37 | 'EndUsageDateTime': '',
38 | 'PageSize': 10,
39 | 'PageIndex': 1,
40 | 'OrderStatusClassify': 'All',
41 | 'OrderIDs': '',
42 | 'OrderStatuses': '',
43 | 'PassengerName': '',
44 | 'OrderType': '',
45 | 'FieldName': '',
46 | 'IsASC': '',
47 | 'sequence': sequence
48 | }
49 | resp = self.session.post(url, headers=self.headers, data=data, verify=False)
50 | return resp.content.decode('gbk');
51 |
52 | def transfer_and_save(self, json_str):
53 |
54 | json_orders = json.loads(json_str)
55 |
56 | for key in json_orders:
57 | if key == 'OrderEnities':
58 | json_order_lists = json_orders[key]
59 |
60 | book = xlsxwriter.Workbook('ctrip_order.xlsx')
61 | sheet = book.add_worksheet()
62 | sheet.write(0, 0, 'Date')
63 | sheet.write(0, 1, 'OrderDetails')
64 | sheet.write(0, 2, 'Price')
65 |
66 | for i in range(len(json_order_lists)):
67 | json_order = json_order_lists[i]
68 | sheet.write(i+1, 0, json_order['BookingDate'])
69 | sheet.write(i+1, 1, json_order['OrderName'])
70 | sheet.write(i+1, 2, json_order['OrderTotalPrice'])
71 |
72 | book.close()
73 |
74 | # download orders and save them in an excel file
75 | def get_order(self):
76 |
77 | # get the order from the sctrip website
78 | json_order = self.get_json_order()
79 |
80 | # transfer the order and store it in an excel
81 | self.transfer_and_save(json_order)
82 |
83 |
84 | if __name__ == '__main__':
85 | ctrip = Ctrip()
86 | ctrip.get_order()
--------------------------------------------------------------------------------
/Spiders/github/main.py:
--------------------------------------------------------------------------------
1 | import json
2 | import os
3 | import re
4 | import requests
5 | from tkinter.filedialog import askdirectory
6 |
7 | class Github(object):
8 | def __init__(self, username):
9 | self.username = username
10 | self.headers = {
11 | 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.86 Safari/537.36',
12 | }
13 | self.path = askdirectory(title='选择信息保存文件夹')
14 |
15 |
16 |
17 | # 用户信息
18 | def get_user_info(self):
19 | url = 'https://api.github.com/users/' + self.username
20 | resp = requests.get(url, headers=self.headers)
21 | # print(resp.text)
22 | file_path = self.path + '/user_infomation.json'
23 | with open(file_path, 'w') as f:
24 | f.write(resp.text.encode("gbk", 'ignore').decode("gbk", "ignore"))
25 | return file_path
26 |
27 | # 用户仓库信息
28 | def get_user_repos(self):
29 | url = 'https://api.github.com/users/' + self.username + '/repos'
30 | resp = requests.get(url, headers=self.headers)
31 | # print(resp.text)
32 | file_path = self.path + '/user_repository.json'
33 | with open(file_path, 'w') as f:
34 | f.write(resp.text.encode("gbk", 'ignore').decode("gbk", "ignore"))
35 | return file_path
36 |
37 | # 用户的关注信息
38 | def get_user_following(self):
39 | url = 'https://api.github.com/users/' + self.username + '/following'
40 | resp = requests.get(url, headers=self.headers)
41 | # print(resp.text)
42 | file_path = self.path + '/user_following.json'
43 | with open(file_path, 'w') as f:
44 | f.write(resp.text.encode("gbk", 'ignore').decode("gbk", "ignore"))
45 | return file_path
46 |
47 | # 用户的粉丝信息
48 | def get_user_followers(self):
49 | url = 'https://api.github.com/users/' + self.username + '/followers'
50 | resp = requests.get(url, headers=self.headers)
51 | # print(resp.text)
52 | file_path = self.path + '/user_followers.json'
53 | with open(file_path, 'w') as f:
54 | f.write(resp.text.encode("gbk", 'ignore').decode("gbk", "ignore"))
55 | return file_path
56 |
57 | # 用户activity信息
58 | def get_user_activity(self):
59 | url = 'https://api.github.com/users/' + self.username + '/received_events'
60 | resp = requests.get(url, headers=self.headers)
61 | # print(resp.text)
62 | file_path = self.path + '/user_activity.json'
63 | with open(file_path, 'w') as f:
64 | f.write(resp.text.encode("gbk", 'ignore').decode("gbk", "ignore"))
65 | return file_path
66 |
67 | # 用户所有仓库的详细信息
68 | def get_user_repos_detail(self):
69 | url = 'https://api.github.com/users/' + self.username + '/repos'
70 | resp = requests.get(url, headers=self.headers, verify=False, timeout=2)
71 | repo_detail = []
72 | for name in resp.json():
73 | repo_url = 'https://api.github.com/repos/' + self.username + '/' + name['name']
74 | detail = requests.get(repo_url, headers=self.headers, verify=False, timeout=2)
75 | repo_detail.append(detail.text.encode("gbk", 'ignore').decode("gbk", "ignore"))
76 | print('正在下载仓库信息 >>> ', name['name'])
77 | print(repo_detail)
78 | file_path = self.path + '/user_all_repos_detail.json'
79 | with open(file_path, 'w') as f:
80 | f.write(str(repo_detail))
81 | return file_path
82 |
83 | if __name__ == '__main__':
84 | github = Github('kangvcar')
85 | github.get_user_info()
86 | github.get_user_repos()
87 | github.get_user_following()
88 | github.get_user_followers()
89 | github.get_user_activity()
90 | github.get_user_repos_detail()
--------------------------------------------------------------------------------
/Spiders/jianshu/main.py:
--------------------------------------------------------------------------------
1 | import re
2 | import os
3 | import sys
4 | import json
5 | import requests
6 | from bs4 import BeautifulSoup
7 | from tkinter.filedialog import askdirectory
8 |
9 | class Jianshu(object):
10 | def __init__(self, blogurl):
11 | self.blogurl = blogurl
12 | self.path = askdirectory(title='选择信息保存文件夹')
13 | if str(self.path) == "":
14 | sys.exit(1)
15 | self.headers = {
16 | 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.89 Safari/537.36'
17 | }
18 |
19 | def get_element_of_article(self):
20 | '''
21 | 获取元素(标题,发布时间,阅读量)
22 | '''
23 | # url = "https://www.jianshu.com/u/d959ac37cdde?order_by=shared_at&page=1"
24 | url = self.blogurl
25 | pos = 1
26 | article_list = []
27 | while 1:
28 | key_dict = {
29 | 'order_by': 'shared_at',
30 | 'page': str(pos),
31 | }
32 | reps = requests.get(url, headers=self.headers, params=key_dict, timeout=10)
33 | soup = BeautifulSoup(reps.text, "html.parser")
34 | posts = soup.find_all("div", class_="content")
35 | print('=======================>>>>' + str(len(posts)))
36 | # if not len(posts):
37 | # break
38 | date_pattern = re.compile(r"\d+-\d{1,2}-\d{1,2}")
39 | time_pattern = re.compile(r"\d{2}:\d{2}")
40 | from tqdm import tqdm
41 | pbar = tqdm(posts)
42 | for each_post in pbar:
43 | try:
44 | item = {}
45 | item['title'] = each_post.find("a", class_="title").text.strip()
46 | item['sumary'] = each_post.find("p", class_="abstract").text.strip()
47 | item['postdate'] = date_pattern.findall(each_post.find("span", class_="time")['data-shared-at'])[0]
48 | item['posttime'] = time_pattern.findall(each_post.find("span", class_="time")['data-shared-at'])[0]
49 | item['views'] = each_post.find("div", class_="meta").find("a").text.strip()
50 | article_list.append(item)
51 | pbar.set_description("正在爬取文章:%s" % item['title'])
52 | except:
53 | pass
54 | import time
55 | time.sleep(0.1)
56 | pos += 1
57 | if len(posts) < 9:
58 | break
59 | article_json = json.dumps(article_list)
60 | return article_json
61 |
62 | def save_as_json(self, content_json):
63 | with open(self.path + os.sep + 'jianshu_article.json', 'w', encoding='utf-8') as f:
64 | f.write(content_json)
65 |
66 |
67 | if __name__ == '__main__':
68 | article = get_element_of_article('https://www.jianshu.com/u/d959ac37cdde')
69 | save_as_json(article)
--------------------------------------------------------------------------------
/Spiders/moments_album/main.py:
--------------------------------------------------------------------------------
1 | # -*- coding:utf-8 -*-
2 | from selenium import webdriver
3 | import selenium.webdriver.support.expected_conditions as EC
4 | from selenium.webdriver.common.by import By
5 | from selenium.webdriver.support.ui import WebDriverWait
6 | from platform import system
7 | import time
8 | import json
9 | import os
10 | import sys
11 | import random
12 | from tkinter.filedialog import askdirectory
13 |
14 | class Momentsablum(object):
15 | def __init__(self):
16 | self.path = askdirectory(title='选择信息保存文件夹')
17 | if str(self.path) == "":
18 | sys.exit(1)
19 |
20 | # 以网页输入文本框形式提示用户输入url地址
21 | def input_url(self, driver):
22 | while(True):
23 | # js脚本
24 | random_id = [str(random.randint(0, 9)) for i in range(0,10)]
25 | random_id = "".join(random_id)
26 | random_id = 'id_input_target_url_' + random_id
27 | js = """
28 | // 弹出文本输入框,输入微信书的完整链接地址
29 | target_url = prompt("请输入微信书的完整链接地址","https://");
30 | // 动态创建一个input元素
31 | input_target_url = document.createElement("input");
32 | // 为其设置id,以便在程序中能够获取到它的值
33 | input_target_url.id = "id_input_target_url";
34 | // 插入到当前网页中
35 | document.getElementsByTagName("body")[0].appendChild(input_target_url);
36 | // 设置不可见
37 | document.getElementById("id_input_target_url").style.display = 'none';
38 | // 设置value为target_url的值
39 | document.getElementById("id_input_target_url").value = target_url
40 | """
41 | js = js.replace('id_input_target_url', random_id)
42 | # 执行以上js脚本
43 | driver.execute_script(js)
44 | # 判断弹出框是否存在
45 | while(True):
46 | try:
47 | # 检测是否存在弹出框
48 | alert = driver.switch_to.alert
49 | time.sleep(0.5)
50 | except:
51 | # 如果抛异常,说明当前页面不存在弹出框,即用户点击了取消或者确定
52 | break
53 | # 获取用户输入的链接地址
54 | target_url = WebDriverWait(driver, 20).until(EC.presence_of_element_located((By.ID, random_id)))
55 | value = target_url.get_attribute('value')
56 | # 删除空格
57 | value = value.strip()
58 | # 判断输入的链接地址是否正确
59 | if( value != '' and 'https://chushu.la' in value):
60 | break
61 | return value
62 |
63 |
64 | def make_album(self):
65 | chromedriver_path = './chromedriver_mac_74.0.3729.6'
66 | option = webdriver.ChromeOptions()
67 | # 屏蔽chrome的提示
68 | option.add_argument('disable-infobars')
69 | # 静默自动打印为高清PDF文件,并存储到os.getcwd()目录,也就是当前目录
70 | appState = {
71 | # 添加保存为pdf选项
72 | "recentDestinations": [
73 | {
74 | "id": "Save as PDF",
75 | "origin": "local",
76 | "account":""
77 | }
78 | ],
79 | # 选择保存为pdf选项
80 | "selectedDestinationId": "Save as PDF",
81 | # 版本2
82 | "version": 2,
83 | # 不显示页眉页脚
84 | "isHeaderFooterEnabled": False
85 | }
86 | profile = {
87 | # 打印前置参数
88 | 'printing.print_preview_sticky_settings.appState': json.dumps(appState),
89 | # 默认下载、打印保存路径
90 | 'savefile.default_directory': self.path
91 | }
92 | # 添加实验性质的设置参数
93 | option.add_experimental_option('prefs', profile)
94 | # 添加启动参数,后台静默打印
95 | option.add_argument('--kiosk-printing')
96 | # 绑定Chrome和chromedriver,不同Chrome版本对应的chromedriver是不同的,请注意
97 | driver = webdriver.Chrome(options=option)
98 | # 将浏览器最大化显示,使得截图效果更好
99 | driver.maximize_window()
100 | # 延迟2秒,给最大化过程一点时间
101 | time.sleep(2)
102 | # 你的微信朋友圈数据地址,注意不要泄露给其他人
103 | # 在调试过程中,可以直接给target_url赋值
104 | target_url = self.input_url(driver)
105 |
106 | # 模拟浏览指定网页
107 | driver.get(target_url)
108 | for i in range(0, 10000):
109 | # 等待当前页面所有数据加载完毕,正常情况下数据加载完毕后,这个‘加载中’元素会隐藏起来
110 | while (True):
111 | loading_status = WebDriverWait(driver, 20).until(
112 | EC.presence_of_element_located((By.CSS_SELECTOR, 'div.j-save-popup.save-popup')))
113 | if (loading_status.is_displayed() == False):
114 | break
115 | # 隐藏导航栏,防止影响截图效果
116 | js = 'document.querySelector("body > header").style.display="none";'
117 | driver.execute_script(js)
118 | # 等待 下一月控件 出现
119 | next_month = WebDriverWait(driver, 20).until(EC.presence_of_element_located((By.CSS_SELECTOR, 'button.next-month')))
120 | # 等待 下一月控件 可见才能模拟点击
121 | while(True):
122 | if(next_month.is_displayed() == True):
123 | break
124 | # 模拟点击 下一月控件
125 | time.sleep(0.5)
126 | next_month.click()
127 | # 判断当下一月控件的class name 是否为next-month disable,如果是,则说明翻到最后一月了
128 | page_source = driver.page_source
129 | if('next-month disable' in page_source):
130 | # 等待当前页面所有数据加载完毕,正常情况下数据加载完毕后,这个‘加载中’元素会隐藏起来
131 | while (True):
132 | loading_status = WebDriverWait(driver, 20).until(
133 | EC.presence_of_element_located((By.CSS_SELECTOR, 'div.j-save-popup.save-popup')))
134 | if (loading_status.is_displayed() == False):
135 | break
136 | # 等待 主页面控件 出现
137 | WebDriverWait(driver, 20).until(EC.presence_of_element_located((By.CSS_SELECTOR, 'ul.main')))
138 | main = driver.find_element_by_css_selector('ul.main')
139 | element_left_list = main.find_elements_by_css_selector('div.con-left')
140 | # 每一个element代表每一页,将每一页中style的display属性改成block,即可见状态
141 | for index, element in enumerate(element_left_list):
142 | # ..在xpath中表示上一级的元素,也就是父元素
143 | parent_element = element.find_element_by_xpath('..')
144 | # 获取这个父元素的完整id
145 | parent_element_id = parent_element.get_attribute('id')
146 |
147 | # 将该父元素更改为可见状态
148 | js = 'document.getElementById("{}").style.display="block";'.format(parent_element_id)
149 | driver.execute_script(js)
150 |
151 | # 将每一页之间的间隔去掉
152 | js = 'document.getElementById("{}").style.marginTop="0px";'.format(parent_element_id)
153 | driver.execute_script(js)
154 | # 由于网站的图片是懒加载形式,所以需要挨个定位到每张图片的位置
155 | # 每次寻找是否存在类名为lazy-img的img元素集合,当元素集合至少存在一个元素,则定位到第一个元素
156 | # 当元素集合不存在任何元素,则说明懒加载的图片已经没有了,可以退出循环了
157 | while(True):
158 | try:
159 | lazy_img = driver.find_elements_by_css_selector('img.lazy-img')
160 | js = 'document.getElementsByClassName("lazy-img")[0].scrollIntoView();'
161 | driver.execute_script(js)
162 | time.sleep(3)
163 | except:
164 | # 找不到控件img.lazy-img,所以退出循环
165 | break
166 | break
167 | # 调用chrome打印功能
168 | driver.execute_script('window.print();')
169 |
170 | # 退出浏览器
171 | driver.quit()
172 |
--------------------------------------------------------------------------------
/Spiders/oschina/main.py:
--------------------------------------------------------------------------------
1 | import re
2 | import os
3 | import sys
4 | import json
5 | import requests
6 | from bs4 import BeautifulSoup
7 | from tkinter.filedialog import askdirectory
8 |
9 | class Oschina(object):
10 | def __init__(self, blogurl):
11 | self.blogurl = blogurl
12 | self.path = askdirectory(title='选择信息保存文件夹')
13 | if str(self.path) == "":
14 | sys.exit(1)
15 | self.headers = {
16 | 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.89 Safari/537.36'
17 | }
18 |
19 | def get_element_of_article(self):
20 | '''
21 | 获取元素(标题,发布时间,阅读量)
22 | '''
23 | # url = blogurl + 'widgets/_space_index_newest_blog?catalogId=0&q=&p={}&type=ajax'
24 | url = self.blogurl + '/widgets/_space_index_newest_blog'
25 | pos = 1
26 | article_list = []
27 | while 1:
28 | key_dict = {
29 | 'catalogId': '0',
30 | 'q': '',
31 | 'p': str(pos),
32 | 'type': 'ajax'
33 | }
34 | reps = requests.get(url, headers=self.headers, params=key_dict, timeout=10)
35 | soup = BeautifulSoup(reps.text, "html.parser")
36 | posts = soup.find_all("div", class_="content")
37 | # print(len(posts))
38 | if not len(posts):
39 | break
40 | date_pattern = re.compile(r"\d+/\d{1,2}/\d{1,2}")
41 | time_pattern = re.compile(r"\d{2}:\d{2}")
42 | from tqdm import tqdm
43 | pbar = tqdm(posts)
44 | for each_post in pbar:
45 | try:
46 | item = {}
47 | item['title'] = each_post.find("a", class_="header").text.replace(" ", "").split('\n')[-2]
48 | item['sumary'] = each_post.find("div", class_="description").text.strip().replace('\n', '')
49 | item['postdate'] = date_pattern.findall(posts[3].find("div", class_="extra").text)[0]
50 | item['posttime'] = time_pattern.findall(posts[3].find("div", class_="extra").text)[0]
51 | item['views'] = each_post.find("div", class_="extra").find_all('div', class_='item')[-2].text.strip()
52 | article_list.append(item)
53 | pbar.set_description("正在爬取文章:%s" % item['title'])
54 | except:
55 | pass
56 | import time
57 | time.sleep(0.1)
58 | pos += 1
59 | article_json = json.dumps(article_list)
60 | return article_json
61 |
62 | def save_as_json(self, content_json):
63 | with open(self.path + os.sep + 'oschina_article.json', 'w', encoding='utf-8') as f:
64 | f.write(content_json)
65 |
66 |
67 | if __name__ == '__main__':
68 | article = get_element_of_article('https://my.oschina.net/kangvcar')
69 | save_as_json(article)
--------------------------------------------------------------------------------
/Spiders/qqfriend/main.py:
--------------------------------------------------------------------------------
1 | import selenium
2 | from selenium import webdriver
3 | from selenium.webdriver.chrome.options import Options
4 | import json
5 | import tkinter as tk
6 | from tkinter.filedialog import asksaveasfilename
7 | from tkinter.filedialog import askdirectory
8 | from bs4 import BeautifulSoup
9 | import lxml
10 | # import openpyxl
11 | # from openpyxl import Workbook
12 |
13 | class Qqfriend(object):
14 | def __init__(self):
15 | # 浏览器位置
16 | self.driver = webdriver.Chrome()
17 | self.browser = self.driver
18 | # self.browser = webdriver.Chrome()
19 | self.browser.get("https://pay.qq.com/index.shtml")
20 | self.root = tk.Tk()
21 | # 设置窗口标题
22 | self.root.title('从QQ充值获取好友列表')
23 | # 设置窗口大小
24 | self.root.geometry('400x200')
25 | # 进入消息循环(检测到事件,就刷新组件)
26 | # button1 = tk.Button(self.root, text='已登陆并打开充值界面且点开列表(不用选择表项),保存为excel', command=self.callback_excel)
27 | # button1.pack()
28 | button2 = tk.Button(self.root, text='已登陆并打开充值界面且,点开列表(不用选择表项),保存为json', command=self.callback_json)
29 | button2.pack()
30 | button3 = tk.Button(self.root, text='爬取完成后点击此按钮', command=self.close_chrome)
31 | button3.pack()
32 | self.root.mainloop()
33 |
34 | # 存储为excel
35 | # def callback_excel(self):
36 | # self.driver.switch_to_frame('webpay-iframe')
37 | # iframe = self.driver.find_element_by_xpath('//*[@id="midas-webpay-main-1450000186"]/div[2]/div[1]/iframe')
38 | # self.driver.switch_to_frame(iframe)
39 | # html = self.driver.page_source
40 | # soup = BeautifulSoup(html, "lxml")
41 | # a = soup.find_all(attrs={'class': 'icon-friend-s'})
42 | # wb = Workbook()
43 | # ws = wb.active
44 | # ws.append(["raw", "group", "view_name", "qqnumber"])
45 | # for i in a:
46 | # if i.next_sibling != ' {{el.name}}({{el.qq}})':
47 | # k = 0
48 | # for x in i.next_sibling:
49 | # if x == '(':
50 | # f = k
51 | # if x == ')':
52 | # l = k
53 | # k = k + 1
54 | # ws.append([i.next_sibling, i.next_sibling.parent.parent.parent.parent.find(
55 | # attrs={'class': 'icon-more-friend'}).next_sibling, i.next_sibling[:f], i.next_sibling[f + 1:l]])
56 | # print([i.next_sibling, i.next_sibling.parent.parent.parent.parent.find(
57 | # attrs={'class': 'icon-more-friend'}).next_sibling, i.next_sibling[:f], i.next_sibling[f + 1:l]])
58 | # wb.save(asksaveasfilename(defaultextension='.xlsx', filetypes=[('Excel 工作簿', '*.xlsx')]))
59 |
60 | # return 0
61 |
62 | # 存储为json
63 | def callback_json(self):
64 | self.path = askdirectory(title='选择信息保存文件夹')
65 | self.driver.switch_to_frame('webpay-iframe')
66 | iframe = self.driver.find_element_by_xpath('//*[@id="midas-webpay-main-1450000186"]/div[2]/div[1]/iframe')
67 | self.driver.switch_to_frame(iframe)
68 | html = self.driver.page_source
69 | soup = BeautifulSoup(html, "lxml")
70 | a = soup.find_all(attrs={'class': 'icon-friend-s'})
71 | from tqdm import tqdm
72 | pbar = tqdm(a)
73 | friend_list = []
74 | for i in pbar:
75 | if i.next_sibling != ' {{el.name}}({{el.qq}})':
76 | k = 0
77 | for x in i.next_sibling:
78 |
79 | if x == '(':
80 | f = k
81 | if x == ')':
82 | l = k
83 | k = k + 1
84 | item = {}
85 | item['raw'] = i.next_sibling
86 | item['group'] = i.next_sibling.parent.parent.parent.parent.find(
87 | attrs={'class': 'icon-more-friend'}).next_sibling
88 | item['view_name'] = i.next_sibling[:f]
89 | item['qqnumber'] = i.next_sibling[f + 1:l]
90 | friend_list.append(item)
91 | pbar.set_description("正在爬取:%s" % item['raw'])
92 | friend_list_json = json.dumps(friend_list, ensure_ascii=False)
93 | # print(friend_list_json)
94 | with open(self.path + '/friend_list.json', 'w', encoding="utf-8") as f:
95 | f.write(friend_list_json)
96 | self.close_chrome()
97 | return 0
98 |
99 | def close_chrome(self):
100 | self.browser.close()
101 | self.root.destroy()
102 | return 0
103 |
--------------------------------------------------------------------------------
/Spiders/qqqun/main.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | import selenium
3 | from selenium import webdriver
4 | from selenium.webdriver.chrome.options import Options
5 | import tkinter as tk
6 | from tkinter import *
7 | from tkinter.filedialog import askdirectory
8 | from lxml import etree
9 | import lxml
10 | from bs4 import BeautifulSoup
11 | import time
12 | import os
13 | import json
14 | import pandas
15 |
16 | class Qqqun(object):
17 | def __init__(self):
18 | self.path = askdirectory(title='选择信息保存文件夹')
19 | self.driver = webdriver.Chrome()
20 | self.browser = self.driver
21 | self.browser.get("https://qun.qq.com/member.html")
22 | self.root = tk.Tk()
23 | # 设置窗口标题
24 | self.root.title('从QQ群管理获取群成员列表')
25 | # 设置窗口大小
26 | self.root.geometry('400x200')
27 | # 进入消息循环(检测到事件,就刷新组件)
28 | # button1 = tk.Button(self.root, text='已登陆并打开界面,保存为excel', pady=5, command=self.callback_excel)
29 | # button1.pack()
30 | button2 = tk.Button(self.root, text='已登陆并打开界面,保存为json', pady=5, command=self.callback_json)
31 | button2.pack()
32 | button3 = tk.Button(self.root, text='爬取完成后点击此按钮', pady=5, command=self.close_chrome)
33 | button3.pack()
34 | self.root.mainloop()
35 |
36 | # 去字符串两端'\n'、'\t'
37 | def delNT(self, s):
38 | while s.startswith('\n') or s.startswith('\t'):
39 | s = s[1:]
40 | while s.endswith('\t') or s.endswith('\n'):
41 | s = s[:-1]
42 | return s
43 |
44 | # def callback_excel(self):
45 | # a = self.driver.find_elements_by_class_name('icon-def-gicon')
46 | # Num = len(a)
47 | # time_start = time.time()
48 | # for i in range(0, Num):
49 | # # 点击进入具体群
50 | # a = self.driver.find_elements_by_class_name('icon-def-gicon')
51 | # # time.sleep(0.5)
52 | # a[i].click()
53 | # time.sleep(1)
54 | # html = self.driver.page_source
55 | # soup = BeautifulSoup(html, "lxml")
56 | # groupTit = self.delNT(soup.find(attrs={'id': 'groupTit'}).text)
57 | # groupMemberNum = self.delNT(soup.find(attrs={'id': 'groupMemberNum'}).text)
58 |
59 | # while len(soup.find_all(attrs={'class': 'td-no'})) < int(groupMemberNum):
60 | # self.driver.execute_script("window.scrollTo(0,document.body.scrollHeight);")
61 | # time.sleep(0.1)
62 | # html = self.driver.page_source
63 | # soup = BeautifulSoup(html, "lxml")
64 |
65 | # res_elements = etree.HTML(html)
66 | # table = res_elements.xpath('//*[@id="groupMember"]')
67 | # table = etree.tostring(table[0], encoding='utf-8').decode()
68 | # df = pandas.read_html(table, encoding='utf-8', header=0)[0]
69 | # try:
70 | # print(str(int((time.time() - time_start) / 60)) + ':' + str(int((time.time() - time_start) % 60)),
71 | # '第' + str(i + 1) + '群,' + str(int((i + 1) / Num * 100)) + '% ' + groupTit + ' 此表完成')
72 | # writer = pandas.ExcelWriter(self.path + '/' + groupTit + '.xlsx')
73 | # df.to_excel(writer, 'Sheet1')
74 | # writer.save()
75 | # except:
76 | # k = 0
77 | # for v in groupTit:
78 | # if v == '(':
79 | # f = k
80 | # if v == ')':
81 | # l = k
82 | # k = k + 1
83 |
84 | # writer = pandas.ExcelWriter(self.path + '/' + groupTit[f + 1:l] + '.xlsx')
85 | # df.to_excel(writer, 'Sheet1')
86 | # writer.save()
87 | # self.driver.find_element_by_id('changeGroup').click()
88 | # time.sleep(1)
89 | # self.close_chrome()
90 | # return 0
91 |
92 | def callback_json(self):
93 | a = self.driver.find_elements_by_class_name('icon-def-gicon')
94 | Num = len(a)
95 | time_start = time.time()
96 |
97 | # for i in range(0, Num):
98 | from tqdm import trange
99 | for i in trange(Num):
100 | # 点击进入具体群
101 | a = self.driver.find_elements_by_class_name('icon-def-gicon')
102 | # time.sleep(0.5)
103 | a[i].click()
104 | time.sleep(1)
105 | html = self.driver.page_source
106 | soup = BeautifulSoup(html, "lxml")
107 | groupTit = self.delNT(soup.find(attrs={'id': 'groupTit'}).text)
108 | groupMemberNum = self.delNT(soup.find(attrs={'id': 'groupMemberNum'}).text)
109 | # 模拟滚动到顶部以查看所有信息
110 | while len(soup.find_all(attrs={'class': 'td-no'})) < int(groupMemberNum):
111 | self.driver.execute_script("window.scrollTo(0,document.body.scrollHeight);")
112 | time.sleep(0.1)
113 | html = self.driver.page_source
114 | soup = BeautifulSoup(html, "lxml")
115 | res_elements = etree.HTML(html)
116 | table = res_elements.xpath('//*[@id="groupMember"]')
117 | table = etree.tostring(table[0], encoding='utf-8').decode()
118 | df = pandas.read_html(table, encoding='utf-8', header=0)[0]
119 | try:
120 | # print(str(int((time.time() - time_start) / 60)) + ':' + str(int((time.time() - time_start) % 60)),
121 | # '第' + str(i + 1) + '群,' + str(int((i + 1) / Num * 100)) + '% ' + groupTit + ' 此表完成')
122 | # df.drop(['Unnamed: 0','Unnamed: 1','Unnamed: 10'],axis=1,inplace=True)
123 | # df.columns = ['member', 'nick_name', 'qqnumber', 'sex', 'qqage', 'join_date', 'last_post']
124 | qun_friend_list = []
125 | for j in range(0, df.shape[0]):
126 | item = {}
127 | data = df.values[j].tolist()
128 | item['member'] = data[2]
129 | item['nick_name'] = data[3]
130 | item['qqnumber'] = data[4]
131 | item['sex'] = data[5]
132 | item['qqage'] = data[6]
133 | item['join_date'] = data[7]
134 | item['last_post'] = data[8]
135 | qun_friend_list.append(item)
136 | # print(item)
137 | qun_friend_list_json = json.dumps(qun_friend_list, ensure_ascii=False)
138 | with open(self.path + '/' + groupTit + '.json', 'w', encoding="utf-8") as f:
139 | f.write(qun_friend_list_json)
140 | except:
141 | k = 0
142 | for v in groupTit:
143 | if v == '(':
144 | f = k
145 | if v == ')':
146 | l = k
147 | k = k + 1
148 | self.driver.find_element_by_id('changeGroup').click()
149 | time.sleep(1)
150 | self.close_chrome()
151 | return 0
152 |
153 | def close_chrome(self):
154 | self.browser.close()
155 | self.root.destroy()
156 | return 0
157 |
--------------------------------------------------------------------------------
/Spiders/shgjj/main.py:
--------------------------------------------------------------------------------
1 | import json
2 | import os
3 |
4 | import requests
5 |
6 |
7 | class GjjSpider(object):
8 | def __init__(self, cookie, token):
9 | self.session = requests.session()
10 | self.token = token
11 | self.headers = {
12 | 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.86 Safari/537.36',
13 | 'Authorization': 'Bearer ' + self.token
14 | }
15 | cookie_dict = {}
16 | list = cookie.split(';')
17 | for i in list:
18 | try:
19 | cookie_dict[i.split('=')[0]] = i.split('=')[1]
20 | except IndexError:
21 | cookie_dict[''] = i
22 | requests.utils.add_dict_to_cookiejar(self.session.cookies, cookie_dict)
23 |
24 | def write_json(self, name, str):
25 | file_path = os.path.join(os.path.dirname(__file__) + '/' + name)
26 | with open(file_path, 'w') as f:
27 | f.write(str)
28 |
29 | # 住房公积金,补充公积金账户
30 | def get_priaccountForWeb(self):
31 | url = 'http://person.shgjj.com/gjjapi/private/priaccountForWeb?token={}&source=WANGZHAN'.format(self.token)
32 | self.headers['Referer'] = 'http://person.shgjj.com/gjjweb/'
33 | resp = self.session.get(url, headers=self.headers)
34 | self.write_json('priaccountForWeb_gjj.json', resp.content.decode())
35 |
36 | # 贷款账户
37 | def get_accountForWeb(self):
38 | url = 'http://person.shgjj.com/gjjapi/loan/accountForWeb?token={}&source=WANGZHAN'.format(self.token)
39 | self.headers['Referer'] = 'http://person.shgjj.com/gjjweb/'
40 | resp = self.session.get(url, headers=self.headers)
41 | self.write_json('贷款账户.json', resp.content.decode())
42 |
43 |
44 | if __name__ == '__main__':
45 | pass
46 | cookie = 'ic-GJJGeRen=r-GJJGeRen-1;eks_cache_keys=true;'
47 | # token = 'eyJhbGciOiJIUzUxMiJ9.eyJzdWIiOiIxMDA0OTgwNTAyMDUiLCJhdXRoIjoiUk9MRV9BRE1JTixST0xFX1VTRVIiLCJleHAiOjE1NTY2MTYwNTZ9.TAUPynGD52hwscJmM2Icam2q5SNXimQAFG19G9a4cESUh1eSBRLnbm6ZfTfEw62gUaR_movqxKeKWxMXIXXeJg'
48 | token = 'eyJhbGciOiJIUzUxMiJ9.eyJzdWIiOiIxMDA0OTgwNTAyMDUiLCJhdXRoIjoiUk9MRV9BRE1JTixST0xFX1VTRVIiLCJleHAiOjE1NTY2MTYwNTZ9.TAUPynGD52hwscJmM2Icam2q5SNXimQAFG19G9a4cESUh1eSBRLnbm6ZfTfEw62gUaR_movqxKeKWxMXIXXeJg'
49 | spider = GjjSpider(cookie, token)
50 | spider.get_priaccountForWeb()
51 | spider.get_accountForWeb()
52 |
--------------------------------------------------------------------------------
/Spiders/taobao/taobao_cookies.json:
--------------------------------------------------------------------------------
1 | [{"domain": ".taobao.com", "expiry": 1610775095, "httpOnly": false, "name": "l", "path": "/", "secure": false, "value": "eBEaQFfROTFFSd5DBOfanurza77OSIRYYuPzaNbMiOCP_k1B5MXMXZkS5N86C3GVhss6R3oIr-vXBeYBq7Vonxv9w8VMULkmn"}, {"domain": ".taobao.com", "expiry": 1610775095, "httpOnly": false, "name": "isg", "path": "/", "secure": false, "value": "BMDAv1_Xi5baKneSxmuaYOPykU6SSaQTFje4qTpRjFtutWDf4ll0o5YHyR11Hlzr"}, {"domain": ".taobao.com", "httpOnly": false, "name": "uc1", "path": "/", "secure": true, "value": "cookie15=UtASsssmOIJ0bQ%3D%3D&cookie21=UIHiLt3xThH8t7YQoFNq&existShop=false&cookie14=UoTV6e9p4NiGhg%3D%3D&cookie16=VFC%2FuZ9az08KUQ56dCrZDlbNdA%3D%3D&pas=0"}, {"domain": ".taobao.com", "expiry": 1597843892.934704, "httpOnly": false, "name": "lgc", "path": "/", "secure": true, "value": "kangvcar"}, {"domain": ".taobao.com", "httpOnly": false, "name": "sg", "path": "/", "secure": true, "value": "r7f"}, {"domain": ".taobao.com", "expiry": 1595856694.502407, "httpOnly": false, "name": "mt", "path": "/", "secure": true, "value": "ci=1_1"}, {"domain": ".taobao.com", "httpOnly": false, "name": "dnk", "path": "/", "secure": true, "value": "kangvcar"}, {"domain": ".taobao.com", "httpOnly": true, "name": "cookie1", "path": "/", "secure": true, "value": "Uoe1g8k%2BDFmE6JSlbDjztfnBf8Su8ZIce2H5Qe6NPG8%3D"}, {"domain": ".taobao.com", "httpOnly": false, "name": "_l_g_", "path": "/", "secure": true, "value": "Ug%3D%3D"}, {"domain": ".taobao.com", "httpOnly": false, "name": "_nk_", "path": "/", "secure": true, "value": "kangvcar"}, {"domain": ".taobao.com", "httpOnly": false, "name": "existShop", "path": "/", "secure": true, "value": "MTU5NTIyMzA5Mw%3D%3D"}, {"domain": ".taobao.com", "expiry": 1626787892.934865, "httpOnly": false, "name": "_cc_", "path": "/", "secure": true, "value": "VT5L2FSpdA%3D%3D"}, {"domain": ".taobao.com", "httpOnly": true, "name": "cookie17", "path": "/", "secure": true, "value": "UU26%2BWFHuJvw5Q%3D%3D"}, {"domain": ".taobao.com", "expiry": 1610775082, "httpOnly": false, "name": "tfstk", "path": "/", "secure": false, "value": "c4qGB3XJ2PusLL03PGi_35Kzq-CdZF_Z1orUYlefuprjE5qFiQTezwqhKfFgDE1.."}, {"domain": ".taobao.com", "httpOnly": false, "name": "csg", "path": "/", "secure": true, "value": "7e5ca79e"}, {"domain": ".taobao.com", "expiry": 1597843892.934673, "httpOnly": true, "name": "uc3", "path": "/", "secure": true, "value": "nk2=CNa6qFU%2FXCw%3D&lg2=URm48syIIVrSKA%3D%3D&vt3=F8dBxGPjY8X4FfE9CrY%3D&id2=UU26%2BWFHuJvw5Q%3D%3D"}, {"domain": ".taobao.com", "httpOnly": true, "name": "unb", "path": "/", "secure": true, "value": "2585920367"}, {"domain": ".taobao.com", "httpOnly": true, "name": "skt", "path": "/", "secure": true, "value": "37f6ec259fbe9df7"}, {"domain": ".taobao.com", "expiry": 1626787892.934589, "httpOnly": true, "name": "sgcookie", "path": "/", "secure": true, "value": "EFX36fMwvYRJ3IJcXLn6L"}, {"domain": ".taobao.com", "expiry": 1597843892.934803, "httpOnly": true, "name": "uc4", "path": "/", "secure": true, "value": "nk4=0%40CrOzC9I9rHAcfnYJh0YPFnHOEA%3D%3D&id4=0%40U2%2F9qmBwapHjt985fUMRwV5kujTc"}, {"domain": ".taobao.com", "httpOnly": true, "name": "cookie2", "path": "/", "secure": true, "value": "1c04e2a84540d07da77e6660f799d750"}, {"domain": ".taobao.com", "expiry": 1626787892.934832, "httpOnly": false, "name": "tracknick", "path": "/", "secure": true, "value": "kangvcar"}, {"domain": ".taobao.com", "expiry": 1626759094.50246, "httpOnly": false, "name": "thw", "path": "/", "secure": false, "value": "cn"}, {"domain": ".taobao.com", "expiry": 2225943081, "httpOnly": false, "name": "cna", "path": "/", "secure": true, "value": "KhqcF/7ow34CAQ7SRicNiUy2"}, {"domain": ".taobao.com", "expiry": 1603027892.934718, "httpOnly": false, "name": "t", "path": "/", "secure": true, "value": "eb51841214fc861cb9fc4ea75ac1c5e9"}, {"domain": ".taobao.com", "httpOnly": false, "name": "_tb_token_", "path": "/", "secure": true, "value": "5e85b170795ee"}, {"domain": ".taobao.com", "httpOnly": true, "name": "_samesite_flag_", "path": "/", "secure": true, "value": "true"}]
--------------------------------------------------------------------------------
/Spiders/telephone/main.py:
--------------------------------------------------------------------------------
1 | import json
2 | import os
3 | import re
4 | import sys
5 | import xlsxwriter
6 | import requests
7 | from tkinter.filedialog import askdirectory
8 | from requests.packages.urllib3.exceptions import InsecureRequestWarning
9 | # 禁用安全请求警告
10 | requests.packages.urllib3.disable_warnings(InsecureRequestWarning)
11 |
12 |
13 | class LianTong(object):
14 | def __init__(self, cookie):
15 | self.path = askdirectory(title='选择信息保存文件夹')
16 | if str(self.path) == "":
17 | sys.exit(1)
18 | self.session = requests.session()
19 | self.headers = {
20 | 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.86 Safari/537.36',
21 | }
22 | self.cookie_dict = {}
23 | list = cookie.split(';')
24 | for i in list:
25 | try:
26 | self.cookie_dict[i.split('=')[0]] = i.split('=')[1]
27 | except IndexError:
28 | self.cookie_dict[''] = i
29 | requests.utils.add_dict_to_cookiejar(self.session.cookies, self.cookie_dict)
30 | self.mobile = None
31 |
32 | def get_user_info(self):
33 | import time
34 | url = 'http://iservice.10010.com/e3/static/query/searchPerInfoUser/'
35 | resp = self.session.post(url, headers=self.headers, verify=False)
36 | file_path = os.path.join(self.path + '/10010_user_info.json')
37 | with open(file_path, 'w', encoding='utf-8') as f:
38 | f.write(resp.content.decode())
39 |
40 | # 查询账单 http://iservice.10010.com/e3/static/wohistory/bill?dat=201902 可传入时间
41 | def get_bill_info(self, dat=''):
42 | try:
43 | url = 'http://iservice.10010.com/e3/static/wohistory/bill?dat={}'.format(dat)
44 | self.headers['Referer'] = 'http://iservice.10010.com/e4/skip.html?menuCode=000100020001'
45 | resp = self.session.post(url, data='', headers=self.headers, verify=False)
46 | # print(resp)
47 | file_path = os.path.join(self.path + '/10010_bill_info.json')
48 | with open(file_path, 'w', encoding='utf-8') as f:
49 | f.write(resp.content.decode())
50 | except Exception:
51 | # 捕获到异常说明是短信登录,非服务密码登录
52 | pass
53 |
54 |
55 | class DianXin(object):
56 | def __init__(self, cookie):
57 | self.path = askdirectory(title='选择信息保存文件夹')
58 | if str(self.path) == "":
59 | sys.exit(1)
60 | self.session = requests.session()
61 | self.headers = {
62 | 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.86 Safari/537.36',
63 | }
64 | cookie_dict = {}
65 | list = cookie.split(';')
66 | for i in list:
67 | try:
68 | cookie_dict[i.split('=')[0]] = i.split('=')[1]
69 | except IndexError:
70 | cookie_dict[''] = i
71 | requests.utils.add_dict_to_cookiejar(self.session.cookies, cookie_dict)
72 | self.mobile = None
73 | resp = self.session.get('https://service.sh.189.cn/service/mytelecom/deviceInfo', headers=self.headers,
74 | verify=False)
75 | self.mobile = re.findall('var login = "(\d{11})";', resp.content.decode())[0]
76 | print(self.mobile)
77 |
78 | def get_user_info(self):
79 | url = 'https://service.sh.189.cn/service/my/basicinfo.do'
80 | resp = self.session.post(url, data=None, headers=self.headers, verify=False)
81 | file_path = os.path.join(os.path.dirname(__file__) + '/' + '10000_user.json')
82 | with open(file_path, 'w') as f:
83 | f.write(resp.content.decode())
84 |
85 | # 查询账单 http://iservice.10010.com/e3/static/wohistory/bill?dat=201902 可传入时间
86 | def get_bill_info(self, dat=''):
87 | try:
88 | url = 'https://service.sh.189.cn/service/mobileBill.do'
89 | self.headers['Referer'] = 'https://service.sh.189.cn/service/query/bill'
90 | self.headers['Content-Type'] = 'application/x-www-form-urlencoded; charset=UTF-8'
91 | print('device={}&acctNum='.format(self.mobile))
92 | resp = self.session.post(url, data='device={}&acctNum='.format(self.mobile), headers=self.headers,
93 | verify=False)
94 | file_path = os.path.join(os.path.dirname(__file__) + '/' + '10000_bill_info.json')
95 | with open(file_path, 'w') as f:
96 | f.write(resp.content.decode())
97 | except Exception:
98 | # 捕获到异常说明是短信登录,非服务密码登录
99 | pass
100 |
101 |
102 | if __name__ == '__main__':
103 | pass
104 | # y = YiDong(
105 | # y.get_user_info()
106 | # y.get_bill_info()
107 |
108 | # l = LianTong(
109 | # l.get_user_info()
110 | # l.get_bill_info()
111 |
112 | # d = DianXin(
113 | # # d.get_user_info()
114 | # d.get_bill_info()
115 |
116 | # http://www.189.cn/dqmh/ssoLink.do?method=skip&platNo=93507&toStUrl=http://service.sh.189.cn/service/self_index
117 | # http://ah.189.cn/service/
118 | # http://www.189.cn/dqmh/frontLinkSkip.do?method=skip&shopId=10011&toStUrl=http://js.189.cn/nservice/login/toIndex
119 |
--------------------------------------------------------------------------------
/Spiders/yidong/main.py:
--------------------------------------------------------------------------------
1 | import json
2 | import os
3 | import re
4 | import xlsxwriter
5 | import sys
6 | import requests
7 | from requests.packages.urllib3.exceptions import InsecureRequestWarning
8 | from tkinter.filedialog import askdirectory
9 | requests.packages.urllib3.disable_warnings(InsecureRequestWarning)
10 |
11 |
12 | class YiDong(object):
13 | def __init__(self, cookie):
14 | self.path = askdirectory(title='选择信息保存文件夹')
15 | if str(self.path) == "":
16 | sys.exit(1)
17 | self.session = requests.session()
18 | self.headers = {
19 | 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.86 Safari/537.36',
20 | }
21 | cookie_dict = {}
22 | list = cookie.split(';')
23 | for i in list:
24 | try:
25 | cookie_dict[i.split('=')[0]] = i.split('=')[1]
26 | except IndexError:
27 | cookie_dict[''] = i
28 | requests.utils.add_dict_to_cookiejar(self.session.cookies, cookie_dict)
29 | self.mobile = None
30 |
31 | def get_user_info(self):
32 | # print('执行----> get_user_info')
33 | url = 'https://shop.10086.cn/i/v1/auth/loginfo'
34 | resp = self.session.get(url, headers=self.headers, verify=False)
35 | self.mobile = json.loads(resp.content.decode())['data']['loginValue']
36 |
37 | def get_bill_info(self):
38 | # print('执行----> get_bill_info')
39 | # Get the mobile number from the website
40 | self.get_user_info()
41 | # Download the bill string
42 | bill_json_str = self.get_bill_json()
43 | # transfer and save the bill
44 | self.transfer_and_save_bill(bill_json_str)
45 |
46 | def get_bill_json(self):
47 | # print('执行----> get_bill_json')
48 | # constract the request url
49 | begin_month = '202001'
50 | # end_month = '202004'
51 | import datetime
52 | end_month = str(datetime.date.today().strftime('%Y%m'))
53 | url = 'https://touch.10086.cn/i/v1/fee/touchbillinfo/'+self.mobile+'?bgnMonth='+begin_month+'&endMonth='+end_month+'&time=202062215373895&channel=02'
54 | self.headers['Referer'] = 'https://touch.10086.cn/i/mobile/billqry.html'
55 |
56 | # get the bill json from website
57 | resp = self.session.get(url, headers=self.headers, verify=False)
58 | return resp.content.decode()
59 |
60 | def transfer_and_save_bill(self, bill_json_str):
61 | # print('执行----> transfer_and_save_bill')
62 | bill_json = json.loads(bill_json_str)
63 | bill_json_month_lists = bill_json['data']
64 |
65 | bill_details = {}
66 | for i in range(len(bill_json_month_lists)):
67 | bill_json_month = bill_json_month_lists[i]
68 | month = bill_json_month['billMonth']
69 | month_item_lists = bill_json_month['billMaterials']
70 | item_month = []
71 | for j in range(len(month_item_lists)):
72 | bill_item = month_item_lists[j]['billMaterialInfos']
73 | if len(bill_item) != 0:
74 | for k in bill_item:
75 | item_month.append(k)
76 | bill_details[month] = item_month
77 | with open(self.path + os.sep + 'yidong_bill.json', 'w', encoding='utf-8') as f:
78 | f.write(json.dumps(bill_details))
79 | # print(bill_details)
80 | print('Done.')
81 |
82 |
83 |
--------------------------------------------------------------------------------
/Spiders/zhihu/main.py:
--------------------------------------------------------------------------------
1 | # import zhihuapi as zhihu
2 | import requests
3 | from tkinter.filedialog import askdirectory
4 |
5 | class Zhihu(object):
6 | def __init__(self, userToken):
7 | self.path = askdirectory(title='选择信息保存文件夹')
8 | self.userToken = userToken
9 | self.session = requests.session()
10 | self.headers = {
11 | 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.86 Safari/537.36',
12 | }
13 |
14 | # 把信息写入文件
15 | def info_write_to_json(self, filename, response):
16 | json_path = self.path + '/' + filename + '.json'
17 | with open(json_path, 'w') as f:
18 | f.write(response)
19 | return json_path
20 |
21 | # 获取用户基本信息
22 | def get_user_profile(self):
23 | url = 'https://www.zhihu.com/api/v4/members/' + self.userToken
24 | resp = self.session.get(url, headers=self.headers).content.decode()
25 | print(resp)
26 | self.info_write_to_json('user_profile', resp)
27 |
28 | # 获取用户关注的人
29 | def get_user_followees(self):
30 | url = 'https://www.zhihu.com/api/v4/members/' + self.userToken + '/followees'
31 | resp = self.session.get(url, headers=self.headers).content.decode()
32 | print(resp)
33 | self.info_write_to_json('user_followees', resp)
34 |
35 | # 获取用户的粉丝
36 | def get_user_followers(self):
37 | url = 'https://www.zhihu.com/api/v4/members/' + self.userToken + '/followers'
38 | resp = self.session.get(url, headers=self.headers).content.decode()
39 | print(resp)
40 | self.info_write_to_json('user_followers', resp)
41 |
42 | # 获取用户发布的文章
43 | def get_user_articles(self):
44 | url = 'https://www.zhihu.com/api/v4/members/' + self.userToken + '/articles'
45 | resp = self.session.get(url, headers=self.headers).content.decode()
46 | print(resp)
47 | self.info_write_to_json('user_articles', resp)
48 |
49 | # 获取用户的收藏
50 | def get_user_collections(self):
51 | url = 'https://www.zhihu.com/api/v4/members/' + self.userToken + '/collections'
52 | resp = self.session.get(url, headers=self.headers).content.decode()
53 | print(resp)
54 | self.info_write_to_json('user_collections', resp)
55 |
56 | # 获取用户发布的视频
57 | def get_user_zvideos(self):
58 | url = 'https://www.zhihu.com/api/v4/members/' + self.userToken + '/zvideos'
59 | resp = self.session.get(url, headers=self.headers).content.decode()
60 | print(resp)
61 | self.info_write_to_json('user_zvideos', resp)
62 |
63 | # 获取用户的动态
64 | def get_user_activities(self):
65 | url = 'https://www.zhihu.com/api/v4/members/' + self.userToken + '/activities'
66 | resp = self.session.get(url, headers=self.headers).content.decode()
67 | print(resp)
68 | self.info_write_to_json('user_activities', resp)
--------------------------------------------------------------------------------
/docs/.nojekyll:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kangvcar/InfoSpider/aa5f730396db33f36cff17148ae9a7a1e25cea77/docs/.nojekyll
--------------------------------------------------------------------------------
/docs/QuickStart.md:
--------------------------------------------------------------------------------
1 |
2 | ## Prerequisites
3 |
4 | * Ubuntu 16.04
5 | * Python3 & pip3
6 | * Chrome Browser and [Chrome Driver](http://chromedriver.storage.googleapis.com/index.html) in the same version
7 |
8 | ## Installation
9 | ```
10 | $ ./install_deps.sh
11 | ```
12 |
--------------------------------------------------------------------------------
/docs/_coverpage.md:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | 
5 |
6 | # **INFO-SPIDER** **1.0**
7 |
8 | > **一个神奇的工具箱,拿回你的个人信息。**
9 |
10 | - 简单、易用、安全、开源
11 | - 支持众多数据源
12 | - 模块化
13 |
14 | [GitHub](https://github.com/kangvcar/InfoSpider)
15 | [视频演示](https://www.bilibili.com/video/BV14f4y1R7oF/)
16 | [获得开发者技术支持](https://mianbaoduo.com/o/bread/aZiTlJo=)
17 | [Get Started](#INFO-SPIDER)
--------------------------------------------------------------------------------
/docs/_media/JetBrains.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kangvcar/InfoSpider/aa5f730396db33f36cff17148ae9a7a1e25cea77/docs/_media/JetBrains.png
--------------------------------------------------------------------------------
/docs/_media/acnblog1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kangvcar/InfoSpider/aa5f730396db33f36cff17148ae9a7a1e25cea77/docs/_media/acnblog1.png
--------------------------------------------------------------------------------
/docs/_media/acnblog2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kangvcar/InfoSpider/aa5f730396db33f36cff17148ae9a7a1e25cea77/docs/_media/acnblog2.png
--------------------------------------------------------------------------------
/docs/_media/alimail1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kangvcar/InfoSpider/aa5f730396db33f36cff17148ae9a7a1e25cea77/docs/_media/alimail1.png
--------------------------------------------------------------------------------
/docs/_media/alimail2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kangvcar/InfoSpider/aa5f730396db33f36cff17148ae9a7a1e25cea77/docs/_media/alimail2.png
--------------------------------------------------------------------------------
/docs/_media/alimail3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kangvcar/InfoSpider/aa5f730396db33f36cff17148ae9a7a1e25cea77/docs/_media/alimail3.png
--------------------------------------------------------------------------------
/docs/_media/alimail4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kangvcar/InfoSpider/aa5f730396db33f36cff17148ae9a7a1e25cea77/docs/_media/alimail4.png
--------------------------------------------------------------------------------
/docs/_media/alipay1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kangvcar/InfoSpider/aa5f730396db33f36cff17148ae9a7a1e25cea77/docs/_media/alipay1.png
--------------------------------------------------------------------------------
/docs/_media/alipay2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kangvcar/InfoSpider/aa5f730396db33f36cff17148ae9a7a1e25cea77/docs/_media/alipay2.png
--------------------------------------------------------------------------------
/docs/_media/alipay3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kangvcar/InfoSpider/aa5f730396db33f36cff17148ae9a7a1e25cea77/docs/_media/alipay3.png
--------------------------------------------------------------------------------
/docs/_media/alipay4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kangvcar/InfoSpider/aa5f730396db33f36cff17148ae9a7a1e25cea77/docs/_media/alipay4.png
--------------------------------------------------------------------------------
/docs/_media/bilibili1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kangvcar/InfoSpider/aa5f730396db33f36cff17148ae9a7a1e25cea77/docs/_media/bilibili1.png
--------------------------------------------------------------------------------
/docs/_media/bilibili2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kangvcar/InfoSpider/aa5f730396db33f36cff17148ae9a7a1e25cea77/docs/_media/bilibili2.png
--------------------------------------------------------------------------------
/docs/_media/bilibili3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kangvcar/InfoSpider/aa5f730396db33f36cff17148ae9a7a1e25cea77/docs/_media/bilibili3.png
--------------------------------------------------------------------------------
/docs/_media/bilibili4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kangvcar/InfoSpider/aa5f730396db33f36cff17148ae9a7a1e25cea77/docs/_media/bilibili4.png
--------------------------------------------------------------------------------
/docs/_media/chrome1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kangvcar/InfoSpider/aa5f730396db33f36cff17148ae9a7a1e25cea77/docs/_media/chrome1.png
--------------------------------------------------------------------------------
/docs/_media/chrome2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kangvcar/InfoSpider/aa5f730396db33f36cff17148ae9a7a1e25cea77/docs/_media/chrome2.png
--------------------------------------------------------------------------------
/docs/_media/chrome3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kangvcar/InfoSpider/aa5f730396db33f36cff17148ae9a7a1e25cea77/docs/_media/chrome3.png
--------------------------------------------------------------------------------
/docs/_media/cloudmusic1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kangvcar/InfoSpider/aa5f730396db33f36cff17148ae9a7a1e25cea77/docs/_media/cloudmusic1.png
--------------------------------------------------------------------------------
/docs/_media/cloudmusic2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kangvcar/InfoSpider/aa5f730396db33f36cff17148ae9a7a1e25cea77/docs/_media/cloudmusic2.png
--------------------------------------------------------------------------------
/docs/_media/cloudmusic3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kangvcar/InfoSpider/aa5f730396db33f36cff17148ae9a7a1e25cea77/docs/_media/cloudmusic3.png
--------------------------------------------------------------------------------
/docs/_media/cloudmusic4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kangvcar/InfoSpider/aa5f730396db33f36cff17148ae9a7a1e25cea77/docs/_media/cloudmusic4.png
--------------------------------------------------------------------------------
/docs/_media/cnblog1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kangvcar/InfoSpider/aa5f730396db33f36cff17148ae9a7a1e25cea77/docs/_media/cnblog1.png
--------------------------------------------------------------------------------
/docs/_media/cnblog2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kangvcar/InfoSpider/aa5f730396db33f36cff17148ae9a7a1e25cea77/docs/_media/cnblog2.png
--------------------------------------------------------------------------------
/docs/_media/cnblog3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kangvcar/InfoSpider/aa5f730396db33f36cff17148ae9a7a1e25cea77/docs/_media/cnblog3.png
--------------------------------------------------------------------------------
/docs/_media/cnblog4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kangvcar/InfoSpider/aa5f730396db33f36cff17148ae9a7a1e25cea77/docs/_media/cnblog4.png
--------------------------------------------------------------------------------
/docs/_media/csdn1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kangvcar/InfoSpider/aa5f730396db33f36cff17148ae9a7a1e25cea77/docs/_media/csdn1.png
--------------------------------------------------------------------------------
/docs/_media/csdn2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kangvcar/InfoSpider/aa5f730396db33f36cff17148ae9a7a1e25cea77/docs/_media/csdn2.png
--------------------------------------------------------------------------------
/docs/_media/csdn3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kangvcar/InfoSpider/aa5f730396db33f36cff17148ae9a7a1e25cea77/docs/_media/csdn3.png
--------------------------------------------------------------------------------
/docs/_media/csdn4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kangvcar/InfoSpider/aa5f730396db33f36cff17148ae9a7a1e25cea77/docs/_media/csdn4.png
--------------------------------------------------------------------------------
/docs/_media/favicon.ico:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kangvcar/InfoSpider/aa5f730396db33f36cff17148ae9a7a1e25cea77/docs/_media/favicon.ico
--------------------------------------------------------------------------------
/docs/_media/favicon.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kangvcar/InfoSpider/aa5f730396db33f36cff17148ae9a7a1e25cea77/docs/_media/favicon.png
--------------------------------------------------------------------------------
/docs/_media/github1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kangvcar/InfoSpider/aa5f730396db33f36cff17148ae9a7a1e25cea77/docs/_media/github1.png
--------------------------------------------------------------------------------
/docs/_media/github2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kangvcar/InfoSpider/aa5f730396db33f36cff17148ae9a7a1e25cea77/docs/_media/github2.png
--------------------------------------------------------------------------------
/docs/_media/github3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kangvcar/InfoSpider/aa5f730396db33f36cff17148ae9a7a1e25cea77/docs/_media/github3.png
--------------------------------------------------------------------------------
/docs/_media/github4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kangvcar/InfoSpider/aa5f730396db33f36cff17148ae9a7a1e25cea77/docs/_media/github4.png
--------------------------------------------------------------------------------
/docs/_media/hotmail1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kangvcar/InfoSpider/aa5f730396db33f36cff17148ae9a7a1e25cea77/docs/_media/hotmail1.png
--------------------------------------------------------------------------------
/docs/_media/hotmail2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kangvcar/InfoSpider/aa5f730396db33f36cff17148ae9a7a1e25cea77/docs/_media/hotmail2.png
--------------------------------------------------------------------------------
/docs/_media/hotmail3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kangvcar/InfoSpider/aa5f730396db33f36cff17148ae9a7a1e25cea77/docs/_media/hotmail3.png
--------------------------------------------------------------------------------
/docs/_media/hotmail4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kangvcar/InfoSpider/aa5f730396db33f36cff17148ae9a7a1e25cea77/docs/_media/hotmail4.png
--------------------------------------------------------------------------------
/docs/_media/infospider-16x16-icon.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kangvcar/InfoSpider/aa5f730396db33f36cff17148ae9a7a1e25cea77/docs/_media/infospider-16x16-icon.png
--------------------------------------------------------------------------------
/docs/_media/infospider-logo-mini.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kangvcar/InfoSpider/aa5f730396db33f36cff17148ae9a7a1e25cea77/docs/_media/infospider-logo-mini.png
--------------------------------------------------------------------------------
/docs/_media/infospider-logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kangvcar/InfoSpider/aa5f730396db33f36cff17148ae9a7a1e25cea77/docs/_media/infospider-logo.png
--------------------------------------------------------------------------------
/docs/_media/infospider.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kangvcar/InfoSpider/aa5f730396db33f36cff17148ae9a7a1e25cea77/docs/_media/infospider.png
--------------------------------------------------------------------------------
/docs/_media/infospider2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kangvcar/InfoSpider/aa5f730396db33f36cff17148ae9a7a1e25cea77/docs/_media/infospider2.png
--------------------------------------------------------------------------------
/docs/_media/jd1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kangvcar/InfoSpider/aa5f730396db33f36cff17148ae9a7a1e25cea77/docs/_media/jd1.png
--------------------------------------------------------------------------------
/docs/_media/jd2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kangvcar/InfoSpider/aa5f730396db33f36cff17148ae9a7a1e25cea77/docs/_media/jd2.png
--------------------------------------------------------------------------------
/docs/_media/jd3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kangvcar/InfoSpider/aa5f730396db33f36cff17148ae9a7a1e25cea77/docs/_media/jd3.png
--------------------------------------------------------------------------------
/docs/_media/jd4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kangvcar/InfoSpider/aa5f730396db33f36cff17148ae9a7a1e25cea77/docs/_media/jd4.png
--------------------------------------------------------------------------------
/docs/_media/jianshu1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kangvcar/InfoSpider/aa5f730396db33f36cff17148ae9a7a1e25cea77/docs/_media/jianshu1.png
--------------------------------------------------------------------------------
/docs/_media/jianshu2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kangvcar/InfoSpider/aa5f730396db33f36cff17148ae9a7a1e25cea77/docs/_media/jianshu2.png
--------------------------------------------------------------------------------
/docs/_media/jianshu3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kangvcar/InfoSpider/aa5f730396db33f36cff17148ae9a7a1e25cea77/docs/_media/jianshu3.png
--------------------------------------------------------------------------------
/docs/_media/jianshu4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kangvcar/InfoSpider/aa5f730396db33f36cff17148ae9a7a1e25cea77/docs/_media/jianshu4.png
--------------------------------------------------------------------------------
/docs/_media/liantong1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kangvcar/InfoSpider/aa5f730396db33f36cff17148ae9a7a1e25cea77/docs/_media/liantong1.png
--------------------------------------------------------------------------------
/docs/_media/liantong2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kangvcar/InfoSpider/aa5f730396db33f36cff17148ae9a7a1e25cea77/docs/_media/liantong2.png
--------------------------------------------------------------------------------
/docs/_media/liantong3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kangvcar/InfoSpider/aa5f730396db33f36cff17148ae9a7a1e25cea77/docs/_media/liantong3.png
--------------------------------------------------------------------------------
/docs/_media/liantong4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kangvcar/InfoSpider/aa5f730396db33f36cff17148ae9a7a1e25cea77/docs/_media/liantong4.png
--------------------------------------------------------------------------------
/docs/_media/logo-100px.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kangvcar/InfoSpider/aa5f730396db33f36cff17148ae9a7a1e25cea77/docs/_media/logo-100px.png
--------------------------------------------------------------------------------
/docs/_media/logo-50px.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kangvcar/InfoSpider/aa5f730396db33f36cff17148ae9a7a1e25cea77/docs/_media/logo-50px.png
--------------------------------------------------------------------------------
/docs/_media/logo-transparent-100px.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kangvcar/InfoSpider/aa5f730396db33f36cff17148ae9a7a1e25cea77/docs/_media/logo-transparent-100px.png
--------------------------------------------------------------------------------
/docs/_media/logo-transparent-50px.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kangvcar/InfoSpider/aa5f730396db33f36cff17148ae9a7a1e25cea77/docs/_media/logo-transparent-50px.png
--------------------------------------------------------------------------------
/docs/_media/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kangvcar/InfoSpider/aa5f730396db33f36cff17148ae9a7a1e25cea77/docs/_media/logo.png
--------------------------------------------------------------------------------
/docs/_media/logo_tr.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kangvcar/InfoSpider/aa5f730396db33f36cff17148ae9a7a1e25cea77/docs/_media/logo_tr.png
--------------------------------------------------------------------------------
/docs/_media/momentsalbum1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kangvcar/InfoSpider/aa5f730396db33f36cff17148ae9a7a1e25cea77/docs/_media/momentsalbum1.png
--------------------------------------------------------------------------------
/docs/_media/momentsalbum2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kangvcar/InfoSpider/aa5f730396db33f36cff17148ae9a7a1e25cea77/docs/_media/momentsalbum2.png
--------------------------------------------------------------------------------
/docs/_media/momentsalbum3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kangvcar/InfoSpider/aa5f730396db33f36cff17148ae9a7a1e25cea77/docs/_media/momentsalbum3.png
--------------------------------------------------------------------------------
/docs/_media/momentsalbum4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kangvcar/InfoSpider/aa5f730396db33f36cff17148ae9a7a1e25cea77/docs/_media/momentsalbum4.png
--------------------------------------------------------------------------------
/docs/_media/oschina1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kangvcar/InfoSpider/aa5f730396db33f36cff17148ae9a7a1e25cea77/docs/_media/oschina1.png
--------------------------------------------------------------------------------
/docs/_media/oschina2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kangvcar/InfoSpider/aa5f730396db33f36cff17148ae9a7a1e25cea77/docs/_media/oschina2.png
--------------------------------------------------------------------------------
/docs/_media/oschina3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kangvcar/InfoSpider/aa5f730396db33f36cff17148ae9a7a1e25cea77/docs/_media/oschina3.png
--------------------------------------------------------------------------------
/docs/_media/oschina4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kangvcar/InfoSpider/aa5f730396db33f36cff17148ae9a7a1e25cea77/docs/_media/oschina4.png
--------------------------------------------------------------------------------
/docs/_media/qqfriend1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kangvcar/InfoSpider/aa5f730396db33f36cff17148ae9a7a1e25cea77/docs/_media/qqfriend1.png
--------------------------------------------------------------------------------
/docs/_media/qqfriend2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kangvcar/InfoSpider/aa5f730396db33f36cff17148ae9a7a1e25cea77/docs/_media/qqfriend2.png
--------------------------------------------------------------------------------
/docs/_media/qqfriend3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kangvcar/InfoSpider/aa5f730396db33f36cff17148ae9a7a1e25cea77/docs/_media/qqfriend3.png
--------------------------------------------------------------------------------
/docs/_media/qqfriend4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kangvcar/InfoSpider/aa5f730396db33f36cff17148ae9a7a1e25cea77/docs/_media/qqfriend4.png
--------------------------------------------------------------------------------
/docs/_media/qqfriend5.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kangvcar/InfoSpider/aa5f730396db33f36cff17148ae9a7a1e25cea77/docs/_media/qqfriend5.png
--------------------------------------------------------------------------------
/docs/_media/qqfriend6.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kangvcar/InfoSpider/aa5f730396db33f36cff17148ae9a7a1e25cea77/docs/_media/qqfriend6.png
--------------------------------------------------------------------------------
/docs/_media/qqfriend7.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kangvcar/InfoSpider/aa5f730396db33f36cff17148ae9a7a1e25cea77/docs/_media/qqfriend7.png
--------------------------------------------------------------------------------
/docs/_media/qqmail1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kangvcar/InfoSpider/aa5f730396db33f36cff17148ae9a7a1e25cea77/docs/_media/qqmail1.png
--------------------------------------------------------------------------------
/docs/_media/qqmail2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kangvcar/InfoSpider/aa5f730396db33f36cff17148ae9a7a1e25cea77/docs/_media/qqmail2.png
--------------------------------------------------------------------------------
/docs/_media/qqmail3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kangvcar/InfoSpider/aa5f730396db33f36cff17148ae9a7a1e25cea77/docs/_media/qqmail3.png
--------------------------------------------------------------------------------
/docs/_media/qqmail4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kangvcar/InfoSpider/aa5f730396db33f36cff17148ae9a7a1e25cea77/docs/_media/qqmail4.png
--------------------------------------------------------------------------------
/docs/_media/qqqun1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kangvcar/InfoSpider/aa5f730396db33f36cff17148ae9a7a1e25cea77/docs/_media/qqqun1.png
--------------------------------------------------------------------------------
/docs/_media/qqqun2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kangvcar/InfoSpider/aa5f730396db33f36cff17148ae9a7a1e25cea77/docs/_media/qqqun2.png
--------------------------------------------------------------------------------
/docs/_media/qqqun3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kangvcar/InfoSpider/aa5f730396db33f36cff17148ae9a7a1e25cea77/docs/_media/qqqun3.png
--------------------------------------------------------------------------------
/docs/_media/qqqun4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kangvcar/InfoSpider/aa5f730396db33f36cff17148ae9a7a1e25cea77/docs/_media/qqqun4.png
--------------------------------------------------------------------------------
/docs/_media/qqqun5.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kangvcar/InfoSpider/aa5f730396db33f36cff17148ae9a7a1e25cea77/docs/_media/qqqun5.png
--------------------------------------------------------------------------------
/docs/_media/qqqun6.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kangvcar/InfoSpider/aa5f730396db33f36cff17148ae9a7a1e25cea77/docs/_media/qqqun6.png
--------------------------------------------------------------------------------
/docs/_media/qqqun7.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kangvcar/InfoSpider/aa5f730396db33f36cff17148ae9a7a1e25cea77/docs/_media/qqqun7.png
--------------------------------------------------------------------------------
/docs/_media/screenshot.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kangvcar/InfoSpider/aa5f730396db33f36cff17148ae9a7a1e25cea77/docs/_media/screenshot.png
--------------------------------------------------------------------------------
/docs/_media/sina1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kangvcar/InfoSpider/aa5f730396db33f36cff17148ae9a7a1e25cea77/docs/_media/sina1.png
--------------------------------------------------------------------------------
/docs/_media/sina2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kangvcar/InfoSpider/aa5f730396db33f36cff17148ae9a7a1e25cea77/docs/_media/sina2.png
--------------------------------------------------------------------------------
/docs/_media/sina3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kangvcar/InfoSpider/aa5f730396db33f36cff17148ae9a7a1e25cea77/docs/_media/sina3.png
--------------------------------------------------------------------------------
/docs/_media/sina4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kangvcar/InfoSpider/aa5f730396db33f36cff17148ae9a7a1e25cea77/docs/_media/sina4.png
--------------------------------------------------------------------------------
/docs/_media/taobao1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kangvcar/InfoSpider/aa5f730396db33f36cff17148ae9a7a1e25cea77/docs/_media/taobao1.png
--------------------------------------------------------------------------------
/docs/_media/taobao2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kangvcar/InfoSpider/aa5f730396db33f36cff17148ae9a7a1e25cea77/docs/_media/taobao2.png
--------------------------------------------------------------------------------
/docs/_media/taobao3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kangvcar/InfoSpider/aa5f730396db33f36cff17148ae9a7a1e25cea77/docs/_media/taobao3.png
--------------------------------------------------------------------------------
/docs/_media/taobao4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kangvcar/InfoSpider/aa5f730396db33f36cff17148ae9a7a1e25cea77/docs/_media/taobao4.png
--------------------------------------------------------------------------------
/docs/_media/tielu1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kangvcar/InfoSpider/aa5f730396db33f36cff17148ae9a7a1e25cea77/docs/_media/tielu1.png
--------------------------------------------------------------------------------
/docs/_media/tielu2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kangvcar/InfoSpider/aa5f730396db33f36cff17148ae9a7a1e25cea77/docs/_media/tielu2.png
--------------------------------------------------------------------------------
/docs/_media/tielu3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kangvcar/InfoSpider/aa5f730396db33f36cff17148ae9a7a1e25cea77/docs/_media/tielu3.png
--------------------------------------------------------------------------------
/docs/_media/tielu4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kangvcar/InfoSpider/aa5f730396db33f36cff17148ae9a7a1e25cea77/docs/_media/tielu4.png
--------------------------------------------------------------------------------
/docs/_media/wangyiemail1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kangvcar/InfoSpider/aa5f730396db33f36cff17148ae9a7a1e25cea77/docs/_media/wangyiemail1.png
--------------------------------------------------------------------------------
/docs/_media/wangyiemail2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kangvcar/InfoSpider/aa5f730396db33f36cff17148ae9a7a1e25cea77/docs/_media/wangyiemail2.png
--------------------------------------------------------------------------------
/docs/_media/wangyiemail3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kangvcar/InfoSpider/aa5f730396db33f36cff17148ae9a7a1e25cea77/docs/_media/wangyiemail3.png
--------------------------------------------------------------------------------
/docs/_media/wangyiemail4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kangvcar/InfoSpider/aa5f730396db33f36cff17148ae9a7a1e25cea77/docs/_media/wangyiemail4.png
--------------------------------------------------------------------------------
/docs/_media/yidong1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kangvcar/InfoSpider/aa5f730396db33f36cff17148ae9a7a1e25cea77/docs/_media/yidong1.png
--------------------------------------------------------------------------------
/docs/_media/yidong2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kangvcar/InfoSpider/aa5f730396db33f36cff17148ae9a7a1e25cea77/docs/_media/yidong2.png
--------------------------------------------------------------------------------
/docs/_media/yidong3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kangvcar/InfoSpider/aa5f730396db33f36cff17148ae9a7a1e25cea77/docs/_media/yidong3.png
--------------------------------------------------------------------------------
/docs/_media/yidong4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kangvcar/InfoSpider/aa5f730396db33f36cff17148ae9a7a1e25cea77/docs/_media/yidong4.png
--------------------------------------------------------------------------------
/docs/_media/zhihu1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kangvcar/InfoSpider/aa5f730396db33f36cff17148ae9a7a1e25cea77/docs/_media/zhihu1.png
--------------------------------------------------------------------------------
/docs/_media/zhihu2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kangvcar/InfoSpider/aa5f730396db33f36cff17148ae9a7a1e25cea77/docs/_media/zhihu2.png
--------------------------------------------------------------------------------
/docs/_media/zhihu3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kangvcar/InfoSpider/aa5f730396db33f36cff17148ae9a7a1e25cea77/docs/_media/zhihu3.png
--------------------------------------------------------------------------------
/docs/_media/zhihu4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kangvcar/InfoSpider/aa5f730396db33f36cff17148ae9a7a1e25cea77/docs/_media/zhihu4.png
--------------------------------------------------------------------------------
/docs/ads.txt:
--------------------------------------------------------------------------------
1 | google.com, pub-3091494829711028, DIRECT, f08c47fec0942fa0
--------------------------------------------------------------------------------
/docs/gif/12306.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kangvcar/InfoSpider/aa5f730396db33f36cff17148ae9a7a1e25cea77/docs/gif/12306.gif
--------------------------------------------------------------------------------
/docs/gif/alimail.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kangvcar/InfoSpider/aa5f730396db33f36cff17148ae9a7a1e25cea77/docs/gif/alimail.gif
--------------------------------------------------------------------------------
/docs/gif/alipay.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kangvcar/InfoSpider/aa5f730396db33f36cff17148ae9a7a1e25cea77/docs/gif/alipay.gif
--------------------------------------------------------------------------------
/docs/gif/bilibili.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kangvcar/InfoSpider/aa5f730396db33f36cff17148ae9a7a1e25cea77/docs/gif/bilibili.gif
--------------------------------------------------------------------------------
/docs/gif/chrome.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kangvcar/InfoSpider/aa5f730396db33f36cff17148ae9a7a1e25cea77/docs/gif/chrome.gif
--------------------------------------------------------------------------------
/docs/gif/cloudmusic.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kangvcar/InfoSpider/aa5f730396db33f36cff17148ae9a7a1e25cea77/docs/gif/cloudmusic.gif
--------------------------------------------------------------------------------
/docs/gif/cnblog.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kangvcar/InfoSpider/aa5f730396db33f36cff17148ae9a7a1e25cea77/docs/gif/cnblog.gif
--------------------------------------------------------------------------------
/docs/gif/csdn.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kangvcar/InfoSpider/aa5f730396db33f36cff17148ae9a7a1e25cea77/docs/gif/csdn.gif
--------------------------------------------------------------------------------
/docs/gif/document.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kangvcar/InfoSpider/aa5f730396db33f36cff17148ae9a7a1e25cea77/docs/gif/document.gif
--------------------------------------------------------------------------------
/docs/gif/github.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kangvcar/InfoSpider/aa5f730396db33f36cff17148ae9a7a1e25cea77/docs/gif/github.gif
--------------------------------------------------------------------------------
/docs/gif/hotmail.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kangvcar/InfoSpider/aa5f730396db33f36cff17148ae9a7a1e25cea77/docs/gif/hotmail.gif
--------------------------------------------------------------------------------
/docs/gif/jd.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kangvcar/InfoSpider/aa5f730396db33f36cff17148ae9a7a1e25cea77/docs/gif/jd.gif
--------------------------------------------------------------------------------
/docs/gif/jianshu.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kangvcar/InfoSpider/aa5f730396db33f36cff17148ae9a7a1e25cea77/docs/gif/jianshu.gif
--------------------------------------------------------------------------------
/docs/gif/oschina.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kangvcar/InfoSpider/aa5f730396db33f36cff17148ae9a7a1e25cea77/docs/gif/oschina.gif
--------------------------------------------------------------------------------
/docs/gif/qqmail.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kangvcar/InfoSpider/aa5f730396db33f36cff17148ae9a7a1e25cea77/docs/gif/qqmail.gif
--------------------------------------------------------------------------------
/docs/gif/qqqun.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kangvcar/InfoSpider/aa5f730396db33f36cff17148ae9a7a1e25cea77/docs/gif/qqqun.gif
--------------------------------------------------------------------------------
/docs/gif/sinamail.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kangvcar/InfoSpider/aa5f730396db33f36cff17148ae9a7a1e25cea77/docs/gif/sinamail.gif
--------------------------------------------------------------------------------
/docs/gif/taobao.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kangvcar/InfoSpider/aa5f730396db33f36cff17148ae9a7a1e25cea77/docs/gif/taobao.gif
--------------------------------------------------------------------------------
/docs/gif/wangyimail.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kangvcar/InfoSpider/aa5f730396db33f36cff17148ae9a7a1e25cea77/docs/gif/wangyimail.gif
--------------------------------------------------------------------------------
/docs/gif/wechatalbum.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kangvcar/InfoSpider/aa5f730396db33f36cff17148ae9a7a1e25cea77/docs/gif/wechatalbum.gif
--------------------------------------------------------------------------------
/docs/gif/zhihu.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kangvcar/InfoSpider/aa5f730396db33f36cff17148ae9a7a1e25cea77/docs/gif/zhihu.gif
--------------------------------------------------------------------------------
/docs/index.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | INFO-SPIDER - 拿回你的个人信息
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
24 |
27 |
28 |
29 |
30 |
31 |
37 |
40 |
41 |
47 |
50 |
51 |
52 |
71 |
72 |
73 |
74 |
75 |
76 |
77 |
78 |
79 |
85 |
88 |
89 |
90 |
91 |
--------------------------------------------------------------------------------
/extension/img/chrome-logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kangvcar/InfoSpider/aa5f730396db33f36cff17148ae9a7a1e25cea77/extension/img/chrome-logo.png
--------------------------------------------------------------------------------
/extension/img/cnblog.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kangvcar/InfoSpider/aa5f730396db33f36cff17148ae9a7a1e25cea77/extension/img/cnblog.png
--------------------------------------------------------------------------------
/extension/img/github.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kangvcar/InfoSpider/aa5f730396db33f36cff17148ae9a7a1e25cea77/extension/img/github.png
--------------------------------------------------------------------------------
/extension/img/jianshu.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kangvcar/InfoSpider/aa5f730396db33f36cff17148ae9a7a1e25cea77/extension/img/jianshu.png
--------------------------------------------------------------------------------
/extension/img/logo-50px.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kangvcar/InfoSpider/aa5f730396db33f36cff17148ae9a7a1e25cea77/extension/img/logo-50px.png
--------------------------------------------------------------------------------
/extension/img/oschina.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kangvcar/InfoSpider/aa5f730396db33f36cff17148ae9a7a1e25cea77/extension/img/oschina.png
--------------------------------------------------------------------------------
/extension/index.css:
--------------------------------------------------------------------------------
1 | .cnblog{
2 | width:40px;
3 | height: 40px;
4 | border:0;
5 | background:url(img/cnblog.png) no-repeat;
6 | background-size: 100% 100%;
7 | }
8 | .github{
9 | width:40px;
10 | height: 40px;
11 | border:0;
12 | background:url(img/github.png) no-repeat;
13 | background-size: 100% 100%;
14 | }
15 | .jianshu{
16 | width:40px;
17 | height: 40px;
18 | border:0;
19 | background:url(img/jianshu.png) no-repeat;
20 | background-size: 100% 100%;
21 | }
22 | .oschina{
23 | width:40px;
24 | height: 40px;
25 | border:0;
26 | background:url(img/oschina.png) no-repeat;
27 | background-size: 100% 100%;
28 | }
29 | .kuangjia{
30 | width: 100px;
31 | height: 100px;
32 | }
--------------------------------------------------------------------------------
/extension/index.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 | Document
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
--------------------------------------------------------------------------------
/extension/js/FileSaver.js:
--------------------------------------------------------------------------------
1 | (function (global, factory) {
2 | if (typeof define === "function" && define.amd) {
3 | define([], factory);
4 | } else if (typeof exports !== "undefined") {
5 | factory();
6 | } else {
7 | var mod = {
8 | exports: {}
9 | };
10 | factory();
11 | global.FileSaver = mod.exports;
12 | }
13 | })(this, function () {
14 | "use strict";
15 |
16 | /*
17 | * FileSaver.js
18 | * A saveAs() FileSaver implementation.
19 | *
20 | * By Eli Grey, http://eligrey.com
21 | *
22 | * License : https://github.com/eligrey/FileSaver.js/blob/master/LICENSE.md (MIT)
23 | * source : http://purl.eligrey.com/github/FileSaver.js
24 | */
25 | // The one and only way of getting global scope in all environments
26 | // https://stackoverflow.com/q/3277182/1008999
27 | var _global = typeof window === 'object' && window.window === window ? window : typeof self === 'object' && self.self === self ? self : typeof global === 'object' && global.global === global ? global : void 0;
28 |
29 | function bom(blob, opts) {
30 | if (typeof opts === 'undefined') opts = {
31 | autoBom: false
32 | };else if (typeof opts !== 'object') {
33 | console.warn('Deprecated: Expected third argument to be a object');
34 | opts = {
35 | autoBom: !opts
36 | };
37 | } // prepend BOM for UTF-8 XML and text/* types (including HTML)
38 | // note: your browser will automatically convert UTF-16 U+FEFF to EF BB BF
39 |
40 | if (opts.autoBom && /^\s*(?:text\/\S*|application\/xml|\S*\/\S*\+xml)\s*;.*charset\s*=\s*utf-8/i.test(blob.type)) {
41 | return new Blob([String.fromCharCode(0xFEFF), blob], {
42 | type: blob.type
43 | });
44 | }
45 |
46 | return blob;
47 | }
48 |
49 | function download(url, name, opts) {
50 | var xhr = new XMLHttpRequest();
51 | xhr.open('GET', url);
52 | xhr.responseType = 'blob';
53 |
54 | xhr.onload = function () {
55 | saveAs(xhr.response, name, opts);
56 | };
57 |
58 | xhr.onerror = function () {
59 | console.error('could not download file');
60 | };
61 |
62 | xhr.send();
63 | }
64 |
65 | function corsEnabled(url) {
66 | var xhr = new XMLHttpRequest(); // use sync to avoid popup blocker
67 |
68 | xhr.open('HEAD', url, false);
69 |
70 | try {
71 | xhr.send();
72 | } catch (e) {}
73 |
74 | return xhr.status >= 200 && xhr.status <= 299;
75 | } // `a.click()` doesn't work for all browsers (#465)
76 |
77 |
78 | function click(node) {
79 | try {
80 | node.dispatchEvent(new MouseEvent('click'));
81 | } catch (e) {
82 | var evt = document.createEvent('MouseEvents');
83 | evt.initMouseEvent('click', true, true, window, 0, 0, 0, 80, 20, false, false, false, false, 0, null);
84 | node.dispatchEvent(evt);
85 | }
86 | } // Detect WebView inside a native macOS app by ruling out all browsers
87 | // We just need to check for 'Safari' because all other browsers (besides Firefox) include that too
88 | // https://www.whatismybrowser.com/guides/the-latest-user-agent/macos
89 |
90 |
91 | var isMacOSWebView = _global.navigator && /Macintosh/.test(navigator.userAgent) && /AppleWebKit/.test(navigator.userAgent) && !/Safari/.test(navigator.userAgent);
92 | var saveAs = _global.saveAs || ( // probably in some web worker
93 | typeof window !== 'object' || window !== _global ? function saveAs() {}
94 | /* noop */
95 | // Use download attribute first if possible (#193 Lumia mobile) unless this is a macOS WebView
96 | : 'download' in HTMLAnchorElement.prototype && !isMacOSWebView ? function saveAs(blob, name, opts) {
97 | var URL = _global.URL || _global.webkitURL;
98 | var a = document.createElement('a');
99 | name = name || blob.name || 'download';
100 | a.download = name;
101 | a.rel = 'noopener'; // tabnabbing
102 | // TODO: detect chrome extensions & packaged apps
103 | // a.target = '_blank'
104 |
105 | if (typeof blob === 'string') {
106 | // Support regular links
107 | a.href = blob;
108 |
109 | if (a.origin !== location.origin) {
110 | corsEnabled(a.href) ? download(blob, name, opts) : click(a, a.target = '_blank');
111 | } else {
112 | click(a);
113 | }
114 | } else {
115 | // Support blobs
116 | a.href = URL.createObjectURL(blob);
117 | setTimeout(function () {
118 | URL.revokeObjectURL(a.href);
119 | }, 4E4); // 40s
120 |
121 | setTimeout(function () {
122 | click(a);
123 | }, 0);
124 | }
125 | } // Use msSaveOrOpenBlob as a second approach
126 | : 'msSaveOrOpenBlob' in navigator ? function saveAs(blob, name, opts) {
127 | name = name || blob.name || 'download';
128 |
129 | if (typeof blob === 'string') {
130 | if (corsEnabled(blob)) {
131 | download(blob, name, opts);
132 | } else {
133 | var a = document.createElement('a');
134 | a.href = blob;
135 | a.target = '_blank';
136 | setTimeout(function () {
137 | click(a);
138 | });
139 | }
140 | } else {
141 | navigator.msSaveOrOpenBlob(bom(blob, opts), name);
142 | }
143 | } // Fallback to using FileReader and a popup
144 | : function saveAs(blob, name, opts, popup) {
145 | // Open a popup immediately do go around popup blocker
146 | // Mostly only available on user interaction and the fileReader is async so...
147 | popup = popup || open('', '_blank');
148 |
149 | if (popup) {
150 | popup.document.title = popup.document.body.innerText = 'downloading...';
151 | }
152 |
153 | if (typeof blob === 'string') return download(blob, name, opts);
154 | var force = blob.type === 'application/octet-stream';
155 |
156 | var isSafari = /constructor/i.test(_global.HTMLElement) || _global.safari;
157 |
158 | var isChromeIOS = /CriOS\/[\d]+/.test(navigator.userAgent);
159 |
160 | if ((isChromeIOS || force && isSafari || isMacOSWebView) && typeof FileReader !== 'undefined') {
161 | // Safari doesn't allow downloading of blob URLs
162 | var reader = new FileReader();
163 |
164 | reader.onloadend = function () {
165 | var url = reader.result;
166 | url = isChromeIOS ? url : url.replace(/^data:[^;]*;/, 'data:attachment/file;');
167 | if (popup) popup.location.href = url;else location = url;
168 | popup = null; // reverse-tabnabbing #460
169 | };
170 |
171 | reader.readAsDataURL(blob);
172 | } else {
173 | var URL = _global.URL || _global.webkitURL;
174 | var url = URL.createObjectURL(blob);
175 | if (popup) popup.location = url;else location.href = url;
176 | popup = null; // reverse-tabnabbing #460
177 |
178 | setTimeout(function () {
179 | URL.revokeObjectURL(url);
180 | }, 4E4); // 40s
181 | }
182 | });
183 | _global.saveAs = saveAs.saveAs = saveAs;
184 |
185 | if (typeof module !== 'undefined') {
186 | module.exports = saveAs;
187 | }
188 | });
--------------------------------------------------------------------------------
/extension/js/cnblog/cnblogrun0.js:
--------------------------------------------------------------------------------
1 | window.onload = function () {
2 | chrome.storage.sync.get('cnblogname',function(budget){
3 | var cnblog1="https://home.cnblogs.com/u/"+budget.cnblogname+"/";
4 | window.open(cnblog1)
5 | })
6 | }
--------------------------------------------------------------------------------
/extension/js/cnblog/cnblogrun1.js:
--------------------------------------------------------------------------------
1 | window.onload = function () {
2 | // for(var i=0;i<10;i++)
3 | // {
4 | // console.log(document.getElementsByTagName('a')[i].outerHTML)
5 | // console.log(js = document.getElementsByTagName('a')[i].getAttribute('href'));
6 | // }
7 | var data= Array();
8 | for(var i=12;i<=14;i++)
9 | {
10 | console.log(document.getElementsByClassName('text_gray')[i-11].innerHTML)
11 | data.push(document.getElementsByClassName('text_gray')[i-11].innerHTML)
12 | var s=document.getElementsByTagName('li')[i].innerHTML;
13 | var st="";
14 | for(var j=0;j')
18 | st="";
19 | }
20 | console.log(st)
21 | data.push(st);
22 | }
23 | for(var i=6;i<=11;i++)
24 | {
25 | console.log(document.getElementsByTagName('span')[i].innerHTML)
26 | data.push(document.getElementsByTagName('span')[i].innerHTML)
27 | }
28 |
29 | data.push("关注数")
30 | data.push(document.getElementById('following_count').innerHTML)
31 | data.push("粉丝数")
32 | data.push(document.getElementById('follower_count').innerHTML)
33 | console.log("关注数")
34 | console.log(document.getElementById('following_count').innerHTML)
35 | console.log("粉丝数")
36 | console.log(document.getElementById('follower_count').innerHTML)
37 |
38 | // var Divs = new Array();
39 | // Divs= Array.from(document.getElementsByClassName("avatar_name"))
40 | // console.log(Divs);
41 | // console.log(document.getElementsByClassName("avatar_name"));
42 | chrome.storage.sync.set({'user':data})
43 | chrome.storage.sync.get('cnblogname',function(budget){
44 | var cnblogurl2="https://www.cnblogs.com/"+budget.cnblogname+"/";
45 | window.open(cnblogurl2)
46 | })
47 |
48 | }
--------------------------------------------------------------------------------
/extension/js/github/githubrun1.js:
--------------------------------------------------------------------------------
1 | window.onload = function () {
2 |
3 | // console.log(document.getElementsByTagName('pre')[0].innerHTML)
4 | var data = document.getElementsByTagName('pre')[0].innerHTML;
5 | chrome.storage.sync.set({'user':data});
6 | // var content = JSON.stringify(data);
7 | // var blob = new Blob([data]);
8 | // saveAs(blob, "users.json");
9 | chrome.storage.sync.get('flag',function(budget){
10 | if(budget.flag==1)
11 | {
12 | chrome.storage.sync.get('githubname',function(budget){
13 | var githuburl2="https://api.github.com/users/"+budget.githubname+"/following";
14 | window.open(githuburl2)
15 | window.close();
16 |
17 | })
18 | chrome.storage.sync.set({'flag':0});
19 |
20 | }
21 | })
22 |
23 | }
--------------------------------------------------------------------------------
/extension/js/github/githubrun2.js:
--------------------------------------------------------------------------------
1 |
2 | window.onload = function () {
3 | console.log(document.getElementsByTagName('pre')[0].innerHTML)
4 | let data = document.getElementsByTagName('pre')[0].innerHTML;
5 | chrome.storage.sync.set({'following':data});
6 | // var blob = new Blob([data], {type: "text/plain;charset=utf-8"});
7 | // saveAs(blob, "following.json");
8 | chrome.storage.sync.get('githubname',function(budget){
9 | var githuburl3="https://api.github.com/users/"+budget.githubname+"/followers";
10 | window.open(githuburl3)
11 | window.close();
12 |
13 | })
14 |
15 | }
--------------------------------------------------------------------------------
/extension/js/github/githubrun3.js:
--------------------------------------------------------------------------------
1 |
2 | window.onload = function () {
3 | console.log(document.getElementsByTagName('pre')[0].innerHTML)
4 | let data = document.getElementsByTagName('pre')[0].innerHTML;
5 | chrome.storage.sync.set({'followers':data});
6 | // var blob = new Blob([data], {type: "text/plain;charset=utf-8"});
7 | // saveAs(blob, "followers.json");
8 | chrome.storage.sync.get('githubname',function(budget){
9 | var githuburl4="https://api.github.com/users/"+budget.githubname+"/repos";
10 | window.open(githuburl4)
11 | window.close();
12 |
13 | })
14 |
15 | }
--------------------------------------------------------------------------------
/extension/js/github/githubrun4.js:
--------------------------------------------------------------------------------
1 |
2 | window.onload = function () {
3 | console.log(document.getElementsByTagName('pre')[0].innerHTML)
4 | let data = document.getElementsByTagName('pre')[0].innerHTML;
5 | chrome.storage.sync.set({'repos':data});
6 | // var blob = new Blob([data], {type: "text/plain;charset=utf-8"});
7 | // saveAs(blob, "repos.json");
8 | chrome.storage.sync.get('githubname',function(budget){
9 | var githuburl5="https://api.github.com/users/"+budget.githubname+"/received_events";
10 | window.open(githuburl5)
11 | window.close();
12 | })
13 |
14 | }
--------------------------------------------------------------------------------
/extension/js/index.js:
--------------------------------------------------------------------------------
1 | window.onload = function () {
2 | $('#github').click(function(){
3 | var githubname=prompt("请输入用户名");
4 | chrome.storage.sync.set({'githubname':githubname});
5 | chrome.storage.sync.set({'flag':1});
6 | var githuburl1="https://api.github.com/users/"+githubname;
7 | // var githuburl2="https://api.github.com/users/"+githubname+"/following";
8 | // var githuburl3="https://api.github.com/users/"+githubname+"/followers";
9 | // var githuburl4="https://api.github.com/users/"+githubname+"/repos";
10 | // var githuburl5="https://api.github.com/users/"+githubname+"/received_events";
11 | window.open(githuburl1)
12 |
13 | // chrome.storage.sync.set({ 'githubname': githubname })
14 | })
15 |
16 |
17 | $('#jianshu').click(function(){
18 | var jianshu=prompt("请输入用户的主页链接");
19 | var jianshuname="";
20 | var j=0;
21 | for(var i=0;i= 200 && xhr.status <= 299;
75 | } // `a.click()` doesn't work for all browsers (#465)
76 |
77 |
78 | function click(node) {
79 | try {
80 | node.dispatchEvent(new MouseEvent('click'));
81 | } catch (e) {
82 | var evt = document.createEvent('MouseEvents');
83 | evt.initMouseEvent('click', true, true, window, 0, 0, 0, 80, 20, false, false, false, false, 0, null);
84 | node.dispatchEvent(evt);
85 | }
86 | } // Detect WebView inside a native macOS app by ruling out all browsers
87 | // We just need to check for 'Safari' because all other browsers (besides Firefox) include that too
88 | // https://www.whatismybrowser.com/guides/the-latest-user-agent/macos
89 |
90 |
91 | var isMacOSWebView = _global.navigator && /Macintosh/.test(navigator.userAgent) && /AppleWebKit/.test(navigator.userAgent) && !/Safari/.test(navigator.userAgent);
92 | var saveAs = _global.saveAs || ( // probably in some web worker
93 | typeof window !== 'object' || window !== _global ? function saveAs() {}
94 | /* noop */
95 | // Use download attribute first if possible (#193 Lumia mobile) unless this is a macOS WebView
96 | : 'download' in HTMLAnchorElement.prototype && !isMacOSWebView ? function saveAs(blob, name, opts) {
97 | var URL = _global.URL || _global.webkitURL;
98 | var a = document.createElement('a');
99 | name = name || blob.name || 'download';
100 | a.download = name;
101 | a.rel = 'noopener'; // tabnabbing
102 | // TODO: detect chrome extensions & packaged apps
103 | // a.target = '_blank'
104 |
105 | if (typeof blob === 'string') {
106 | // Support regular links
107 | a.href = blob;
108 |
109 | if (a.origin !== location.origin) {
110 | corsEnabled(a.href) ? download(blob, name, opts) : click(a, a.target = '_blank');
111 | } else {
112 | click(a);
113 | }
114 | } else {
115 | // Support blobs
116 | a.href = URL.createObjectURL(blob);
117 | setTimeout(function () {
118 | URL.revokeObjectURL(a.href);
119 | }, 4E4); // 40s
120 |
121 | setTimeout(function () {
122 | click(a);
123 | }, 0);
124 | }
125 | } // Use msSaveOrOpenBlob as a second approach
126 | : 'msSaveOrOpenBlob' in navigator ? function saveAs(blob, name, opts) {
127 | name = name || blob.name || 'download';
128 |
129 | if (typeof blob === 'string') {
130 | if (corsEnabled(blob)) {
131 | download(blob, name, opts);
132 | } else {
133 | var a = document.createElement('a');
134 | a.href = blob;
135 | a.target = '_blank';
136 | setTimeout(function () {
137 | click(a);
138 | });
139 | }
140 | } else {
141 | navigator.msSaveOrOpenBlob(bom(blob, opts), name);
142 | }
143 | } // Fallback to using FileReader and a popup
144 | : function saveAs(blob, name, opts, popup) {
145 | // Open a popup immediately do go around popup blocker
146 | // Mostly only available on user interaction and the fileReader is async so...
147 | popup = popup || open('', '_blank');
148 |
149 | if (popup) {
150 | popup.document.title = popup.document.body.innerText = 'downloading...';
151 | }
152 |
153 | if (typeof blob === 'string') return download(blob, name, opts);
154 | var force = blob.type === 'application/octet-stream';
155 |
156 | var isSafari = /constructor/i.test(_global.HTMLElement) || _global.safari;
157 |
158 | var isChromeIOS = /CriOS\/[\d]+/.test(navigator.userAgent);
159 |
160 | if ((isChromeIOS || force && isSafari || isMacOSWebView) && typeof FileReader !== 'undefined') {
161 | // Safari doesn't allow downloading of blob URLs
162 | var reader = new FileReader();
163 |
164 | reader.onloadend = function () {
165 | var url = reader.result;
166 | url = isChromeIOS ? url : url.replace(/^data:[^;]*;/, 'data:attachment/file;');
167 | if (popup) popup.location.href = url;else location = url;
168 | popup = null; // reverse-tabnabbing #460
169 | };
170 |
171 | reader.readAsDataURL(blob);
172 | } else {
173 | var URL = _global.URL || _global.webkitURL;
174 | var url = URL.createObjectURL(blob);
175 | if (popup) popup.location = url;else location.href = url;
176 | popup = null; // reverse-tabnabbing #460
177 |
178 | setTimeout(function () {
179 | URL.revokeObjectURL(url);
180 | }, 4E4); // 40s
181 | }
182 | });
183 | _global.saveAs = saveAs.saveAs = saveAs;
184 |
185 | if (typeof module !== 'undefined') {
186 | module.exports = saveAs;
187 | }
188 | });
189 | window.onload = function () {
190 | console.log(document.getElementsByTagName('pre')[0].innerHTML)
191 | chrome.storage.sync.get('user',function(budget){
192 |
193 | var blob = new Blob([budget.user], {type: "text/plain;charset=utf-8"});
194 | saveAs(blob, "user.json");
195 | })
196 | var data=document.getElementsByTagName('pre')[0].innerHTML;
197 | var blob = new Blob([data], {type: "text/plain;charset=utf-8"});
198 | saveAs(blob, "public_notes.json");
199 | }
--------------------------------------------------------------------------------
/extension/manifest.json:
--------------------------------------------------------------------------------
1 | {
2 | "manifest_version":3,
3 | "name":"helloWorld",
4 | "version":"1.0",
5 | "description":"hello world 插件",
6 | "icons":{
7 | "128":"img/logo-50px.png",
8 | "48":"img/logo-50px.png",
9 | "16":"img/logo-50px.png"
10 | },
11 | "content_scripts": [
12 | {
13 | "matches": ["https://api.github.com/users/*/following"],
14 | "js": [
15 | "js/jquery.js",
16 | "js/github/githubrun2.js"
17 | ],
18 | "run_at": "document_idle"
19 | },
20 | {
21 | "matches": ["https://api.github.com/users/*/followers"],
22 | "js": [
23 | "js/jquery.js",
24 | "js/github/githubrun3.js"
25 | ],
26 | "run_at": "document_idle"
27 | },
28 | {
29 | "matches": ["https://api.github.com/users/*/repos"],
30 | "js": [
31 | "js/jquery.js",
32 | "js/github/githubrun4.js"
33 | ],
34 | "run_at": "document_idle"
35 | },
36 | {
37 | "matches": ["https://api.github.com/users/*/received_events"],
38 | "js": [
39 | "js/jquery.js",
40 | "js/github/githubrun5.js"
41 | ],
42 | "run_at": "document_idle"
43 | },
44 | {
45 | "matches": ["https://api.github.com/users/*"],
46 | "exclude_globs":["https://api.github.com/users/*/following","https://api.github.com/users/*/followers"
47 | ,"https://api.github.com/users/*/repos","https://api.github.com/users/*/received_events"],
48 | "js": [
49 | "js/jquery.js",
50 | "js/github/githubrun1.js"
51 | ],
52 | "run_at": "document_idle"
53 | },
54 |
55 |
56 |
57 |
58 | {
59 | "matches": ["https://www.cnblogs.com/"],
60 | "js": [
61 | "js/jquery.js",
62 | "js/cnblog/cnblogrun0.js"
63 | ],
64 | "run_at": "document_end"
65 | },
66 | {
67 | "matches": ["https://home.cnblogs.com/u/*/"],
68 | "exclude_globs":["https://home.cnblogs.com/u/*/followers/",
69 | "https://home.cnblogs.com/u/*/followees/"],
70 | "js": [
71 | "js/jquery.js",
72 | "js/cnblog/cnblogrun1.js"
73 | ],
74 | "run_at": "document_end"
75 | },
76 | {
77 | "matches": ["https://www.cnblogs.com/*/"],
78 | "exclude_globs":["https://home.cnblogs.com/u/*/followers/"],
79 | "js": [
80 | "js/jquery.js",
81 | "js/cnblog/cnblogrun2.js"
82 | ],
83 | "run_at": "document_end"
84 | },
85 |
86 |
87 |
88 |
89 |
90 | {
91 | "matches": ["https://www.jianshu.com/asimov/users/slug/*/public_notes"],
92 | "js": [
93 | "js/jquery.js",
94 | "js/jianshu/jianshurun2.js"
95 | ],
96 | "run_at": "document_start"
97 | },
98 | {
99 | "matches": ["https://www.jianshu.com/asimov/users/slug/*"],
100 | "exclude_globs":["https://www.jianshu.com/asimov/users/slug/*/public_notes"],
101 | "js": [
102 | "js/jquery.js",
103 | "js/jianshu/jianshurun1.js"
104 | ],
105 | "run_at": "document_start"
106 | },
107 |
108 |
109 |
110 |
111 | {
112 | "matches": ["https://my.oschina.net/u/*/"],
113 | "exclude_globs":["https://my.oschina.net/u/*/followers/","https://my.oschina.net/u/*/following"],
114 | "js": [
115 | "js/jquery.js",
116 | "js/oschina/oschinarun0.js"
117 | ],
118 | "run_at": "document_start"
119 | },
120 |
121 |
122 | {
123 | "matches": ["https://my.oschina.net/"],
124 | "js": [
125 | "js/jquery.js",
126 | "js/oschina/oschinarun0.js"
127 | ],
128 | "run_at": "document_start"
129 | }
130 | ] ,
131 | "action":{
132 | "default_icon":"img/logo-50px.png",
133 | "default_popup":"index.html"
134 | },
135 | "permissions":[
136 | "storage",
137 | "tabs"
138 | ]
139 | }
--------------------------------------------------------------------------------
/install_deps.sh:
--------------------------------------------------------------------------------
1 | sudo apt-get install build-essential libgtk-3-dev libgstreamer-plugins-base1.0-dev libwebkitgtk-3.0-dev libxslt-dev freeglut3-dev
2 | pip3 install -r requirements.txt
3 |
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | matplotlib==3.2.0
2 | pyecharts==1.7.1
3 | selenium==3.141.0
4 | XlsxWriter==1.2.9
5 | openpyxl==3.0.4
6 | nltk==3.6.6
7 | pyquery==1.4.0
8 | lxml==4.9.1
9 | requests==2.32.0
10 | numpy==1.22.0
11 | tqdm==4.66.3
12 | wxPython==4.0.7
13 | pandas==1.0.1
14 | wxpy==0.3.9.8
15 | beautifulsoup4==4.9.1
16 | Pillow==10.3.0
17 | python_dateutil==2.8.1
--------------------------------------------------------------------------------
/tests/DeepAnalysis/dataprocess.py:
--------------------------------------------------------------------------------
1 | ### 使用pytorch创建dataset和dataloader
2 | import torch
3 | import torch.nn as nn
4 | import torch.nn.functional as F
5 | import torch.optim as optim
6 | import numpy as np
7 | import pandas as pd
8 | import matplotlib.pyplot as plt
9 | from torch.utils.data import Dataset, DataLoader
10 | from sklearn.preprocessing import MinMaxScaler
11 | from sklearn.metrics import mean_squared_error
12 | import math
13 |
14 |
15 | df = pd.read_csv('data.csv')
16 | df.head()
17 |
18 |
19 | data = df.filter(['Close'])
20 | dataset = data.values
21 | training_data_len = math.ceil(len(dataset) * .8)
22 | training_data_len
23 | scaler = MinMaxScaler(feature_range=(0, 1))
24 | scaled_data = scaler.fit_transform(dataset)
25 | scaled_data
26 |
27 |
28 | train_data = scaled_data[0:training_data_len, :]
29 | x_train = []
30 | y_train = []
31 | for i in range(60, len(train_data)):
32 | x_train.append(train_data[i-60:i, 0])
33 | y_train.append(train_data[i, 0])
34 | if i <= 60:
35 | print(x_train)
36 | print(y_train)
37 | print()
38 |
39 | x_train, y_train = np.array(x_train), np.array(y_train)
40 | x_train = np.reshape(x_train, (x_train.shape[0], x_train.shape[1], 1))
41 | x_train.shape
42 | test_data = scaled_data[training_data_len - 60:, :]
43 | x_test = []
44 | y_test = dataset[training_data_len:, :]
45 | for i in range(60, len(test_data)):
46 | x_test.append(test_data[i-60:i, 0])
47 | x_test = np.array(x_test)
48 | x_test = np.reshape(x_test, (x_test.shape[0], x_test.shape[1], 1))
49 |
50 |
51 | class StockDataset(Dataset):
52 | def __init__(self, x, y):
53 | self.x = x
54 | self.y = y
55 | def __getitem__(self, index):
56 | return self.x[index], self.y[index]
57 | def __len__(self):
58 | return len(self.x)
59 |
60 |
61 | train_dataset = StockDataset(x_train, y_train)
62 | train_loader = DataLoader(train_dataset, batch_size=1, shuffle=False)
63 | test_dataset = StockDataset(x_test, y_test)
64 | test_loader = DataLoader(test_dataset, batch_size=1, shuffle=False)
--------------------------------------------------------------------------------
/tests/DeepAnalysis/model.py:
--------------------------------------------------------------------------------
1 | ### 使用pytorch,创建一个LSTM模型
2 | import torch
3 | import torch.nn as nn
4 | import torch.nn.functional as F
5 | import torch.optim as optim
6 | import numpy as np
7 | import pandas as pd
8 | import matplotlib.pyplot as plt
9 |
10 | class LSTM(nn.Module):
11 | def __init__(self, input_size=1, hidden_size=100, output_size=1):
12 | super(LSTM, self).__init__()
13 | self.hidden_size = hidden_size
14 | self.lstm = nn.LSTM(input_size, hidden_size)
15 | self.linear = nn.Linear(hidden_size, output_size)
16 | self.hidden = self.init_hidden()
17 |
18 | def init_hidden(self):
19 | return (torch.zeros(1, 1, self.hidden_size),
20 | torch.zeros(1, 1, self.hidden_size))
21 |
22 | def forward(self, input):
23 | lstm_out, self.hidden = self.lstm(input.view(len(input), 1, -1), self.hidden)
24 | y_pred = self.linear(lstm_out.view(len(input), -1))
25 | return y_pred[-1]
--------------------------------------------------------------------------------
/tests/DeepAnalysis/trainer.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 | import torch.nn.functional as F
4 | import torch.optim as optim
5 | import numpy as np
6 | import pandas as pd
7 | import matplotlib.pyplot as plt
8 | import logging.handlers
9 | import logging.config
10 | from sklearn.preprocessing import MinMaxScaler
11 | from model import LSTM
12 |
13 | def read_data():
14 | data = pd.read_csv('data.csv')
15 | data = data.dropna()
16 | data = data.reset_index(drop=True)
17 | data = data.drop(['date'], axis=1)
18 | data = data.astype('float32')
19 | return data
20 |
21 |
22 | def normalize_data(data):
23 | scaler = MinMaxScaler(feature_range=(-1, 1))
24 | data['close'] = scaler.fit_transform(data['close'].values.reshape(-1, 1))
25 | return data, scaler
26 |
27 |
28 | def create_train_data(data, seq_len):
29 | x_train = []
30 | y_train = []
31 |
32 | for i in range(seq_len, len(data)):
33 | x_train.append(data[i-seq_len:i, 0])
34 | y_train.append(data[i, 0])
35 |
36 | x_train, y_train = np.array(x_train), np.array(y_train)
37 | x_train = np.reshape(x_train, (x_train.shape[0], x_train.shape[1], 1))
38 | return x_train, y_train
39 |
40 |
41 | def create_test_data(data, seq_len):
42 | x_test = []
43 | y_test = data[seq_len:, :]
44 | for i in range(seq_len, len(data)):
45 | x_test.append(data[i-seq_len:i, 0])
46 | x_test = np.array(x_test)
47 | x_test = np.reshape(x_test, (x_test.shape[0], x_test.shape[1], 1))
48 | return x_test, y_test
49 |
50 |
51 | def train_model(x_train, y_train, x_test, y_test):
52 | model = LSTM()
53 | criterion = torch.nn.MSELoss()
54 | optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
55 | epochs = 10
56 | for epoch in range(epochs):
57 | inputs = torch.from_numpy(x_train).float()
58 | labels = torch.from_numpy(y_train).float()
59 | optimizer.zero_grad()
60 | model.hidden = model.init_hidden()
61 | y_pred = model(inputs)
62 | loss = criterion(y_pred, labels)
63 | loss.backward()
64 | optimizer.step()
65 | print('Epoch [{}/{}], Loss: {:.4f}'.format(epoch+1, epochs, loss.item()))
66 | return model
67 |
68 |
69 | def predict(model, x_test, y_test, scaler):
70 | inputs = torch.from_numpy(x_test).float()
71 | y_pred = model(inputs)
72 | y_pred = y_pred.detach().numpy()
73 | y_test = y_test.reshape(-1, 1)
74 | y_pred = scaler.inverse_transform(y_pred)
75 | y_test = scaler.inverse_transform(y_test)
76 | return y_pred, y_test
77 |
78 |
79 | def evaluate_model(y_pred, y_test):
80 | rmse = np.sqrt(np.mean(((y_pred - y_test) ** 2)))
81 | print('RMSE: ', rmse)
82 | plt.plot(y_test, color='red', label='Real Stock Price')
83 | plt.plot(y_pred, color='blue', label='Predicted Stock Price')
84 | plt.title('Stock Price Prediction')
85 | plt.xlabel('Time')
86 | plt.ylabel('Stock Price')
87 | plt.legend()
88 | plt.show()
89 |
90 | if __name__ == '__main__':
91 | data = read_data()
92 | data, scaler = normalize_data(data)
93 | seq_len = 60
94 | x_train, y_train = create_train_data(data, seq_len)
95 | x_test, y_test = create_test_data(data, seq_len)
96 | model = train_model(x_train, y_train, x_test, y_test)
97 | y_pred, y_test = predict(model, x_test, y_test, scaler)
98 | evaluate_model(y_pred, y_test)
--------------------------------------------------------------------------------
/tests/blog_analyse/postdate_line.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | Awesome-pyecharts
6 |
7 |
8 |
9 |
10 |
11 |
271 |
272 |
273 |
--------------------------------------------------------------------------------
/tests/blog_analyse/stop_word.txt:
--------------------------------------------------------------------------------
1 |
2 | $
3 | 0
4 | 1
5 | 2
6 | 3
7 | 4
8 | 5
9 | 6
10 | 7
11 | 8
12 | 9
13 | ?
14 | _
15 | “
16 | ”
17 | 、
18 | 。
19 | 《
20 | 》
21 | 一
22 | 一些
23 | 一何
24 | 一切
25 | 一则
26 | 一方面
27 | 一旦
28 | 一来
29 | 一样
30 | 一般
31 | 一转眼
32 | 一瞬间
33 | 万一
34 | 上
35 | 上下
36 | 下
37 | 不
38 | 不仅
39 | 不但
40 | 不光
41 | 不单
42 | 不只
43 | 不外乎
44 | 不如
45 | 不妨
46 | 不尽
47 | 不尽然
48 | 不得
49 | 不怕
50 | 不惟
51 | 不成
52 | 不拘
53 | 不料
54 | 不是
55 | 不比
56 | 不然
57 | 不特
58 | 不独
59 | 不管
60 | 不至于
61 | 不若
62 | 不论
63 | 不过
64 | 不问
65 | 与
66 | 与其
67 | 与其说
68 | 与否
69 | 与此同时
70 | 且
71 | 且不说
72 | 且说
73 | 两者
74 | 个
75 | 个别
76 | 临
77 | 为
78 | 为了
79 | 为什么
80 | 为何
81 | 为止
82 | 为此
83 | 为着
84 | 乃
85 | 乃至
86 | 乃至于
87 | 么
88 | 之
89 | 之一
90 | 之所以
91 | 之类
92 | 乌乎
93 | 乎
94 | 乘
95 | 也
96 | 也好
97 | 也罢
98 | 了
99 | 二来
100 | 于
101 | 于是
102 | 于是乎
103 | 云云
104 | 云尔
105 | 些
106 | 亦
107 | 人
108 | 人们
109 | 人家
110 | 什么
111 | 什么样
112 | 今
113 | 介于
114 | 仍
115 | 仍旧
116 | 从
117 | 从此
118 | 从而
119 | 他
120 | 他人
121 | 他们
122 | 以
123 | 以上
124 | 以为
125 | 以便
126 | 以免
127 | 以及
128 | 以故
129 | 以期
130 | 以来
131 | 以至
132 | 以至于
133 | 以致
134 | 们
135 | 任
136 | 任何
137 | 任凭
138 | 似的
139 | 但
140 | 但凡
141 | 但是
142 | 何
143 | 何以
144 | 何况
145 | 何处
146 | 何时
147 | 余外
148 | 作为
149 | 你
150 | 你们
151 | 使
152 | 使得
153 | 例如
154 | 依
155 | 依据
156 | 依照
157 | 便于
158 | 俺
159 | 俺们
160 | 倘
161 | 倘使
162 | 倘或
163 | 倘然
164 | 倘若
165 | 借
166 | 假使
167 | 假如
168 | 假若
169 | 傥然
170 | 像
171 | 儿
172 | 先不先
173 | 光是
174 | 全体
175 | 全部
176 | 兮
177 | 关于
178 | 其
179 | 其一
180 | 其中
181 | 其二
182 | 其他
183 | 其余
184 | 其它
185 | 其次
186 | 具体地说
187 | 具体说来
188 | 兼之
189 | 内
190 | 再
191 | 再其次
192 | 再则
193 | 再有
194 | 再者
195 | 再者说
196 | 再说
197 | 冒
198 | 冲
199 | 况且
200 | 几
201 | 几时
202 | 凡
203 | 凡是
204 | 凭
205 | 凭借
206 | 出于
207 | 出来
208 | 分别
209 | 则
210 | 则甚
211 | 别
212 | 别人
213 | 别处
214 | 别是
215 | 别的
216 | 别管
217 | 别说
218 | 到
219 | 前后
220 | 前此
221 | 前者
222 | 加之
223 | 加以
224 | 即
225 | 即令
226 | 即使
227 | 即便
228 | 即如
229 | 即或
230 | 即若
231 | 却
232 | 去
233 | 又
234 | 又及
235 | 及
236 | 及其
237 | 及至
238 | 反之
239 | 反而
240 | 反过来
241 | 反过来说
242 | 受到
243 | 另
244 | 另一方面
245 | 另外
246 | 另悉
247 | 只
248 | 只当
249 | 只怕
250 | 只是
251 | 只有
252 | 只消
253 | 只要
254 | 只限
255 | 叫
256 | 叮咚
257 | 可
258 | 可以
259 | 可是
260 | 可见
261 | 各
262 | 各个
263 | 各位
264 | 各种
265 | 各自
266 | 同
267 | 同时
268 | 后
269 | 后者
270 | 向
271 | 向使
272 | 向着
273 | 吓
274 | 吗
275 | 否则
276 | 吧
277 | 吧哒
278 | 吱
279 | 呀
280 | 呃
281 | 呕
282 | 呗
283 | 呜
284 | 呜呼
285 | 呢
286 | 呵
287 | 呵呵
288 | 呸
289 | 呼哧
290 | 咋
291 | 和
292 | 咚
293 | 咦
294 | 咧
295 | 咱
296 | 咱们
297 | 咳
298 | 哇
299 | 哈
300 | 哈哈
301 | 哉
302 | 哎
303 | 哎呀
304 | 哎哟
305 | 哗
306 | 哟
307 | 哦
308 | 哩
309 | 哪
310 | 哪个
311 | 哪些
312 | 哪儿
313 | 哪天
314 | 哪年
315 | 哪怕
316 | 哪样
317 | 哪边
318 | 哪里
319 | 哼
320 | 哼唷
321 | 唉
322 | 唯有
323 | 啊
324 | 啐
325 | 啥
326 | 啦
327 | 啪达
328 | 啷当
329 | 喂
330 | 喏
331 | 喔唷
332 | 喽
333 | 嗡
334 | 嗡嗡
335 | 嗬
336 | 嗯
337 | 嗳
338 | 嘎
339 | 嘎登
340 | 嘘
341 | 嘛
342 | 嘻
343 | 嘿
344 | 嘿嘿
345 | 因
346 | 因为
347 | 因了
348 | 因此
349 | 因着
350 | 因而
351 | 固然
352 | 在
353 | 在下
354 | 在于
355 | 地
356 | 基于
357 | 处在
358 | 多
359 | 多么
360 | 多少
361 | 大
362 | 大家
363 | 她
364 | 她们
365 | 好
366 | 如
367 | 如上
368 | 如上所述
369 | 如下
370 | 如何
371 | 如其
372 | 如同
373 | 如是
374 | 如果
375 | 如此
376 | 如若
377 | 始而
378 | 孰料
379 | 孰知
380 | 宁
381 | 宁可
382 | 宁愿
383 | 宁肯
384 | 它
385 | 它们
386 | 对
387 | 对于
388 | 对待
389 | 对方
390 | 对比
391 | 将
392 | 小
393 | 尔
394 | 尔后
395 | 尔尔
396 | 尚且
397 | 就
398 | 就是
399 | 就是了
400 | 就是说
401 | 就算
402 | 就要
403 | 尽
404 | 尽管
405 | 尽管如此
406 | 岂但
407 | 己
408 | 已
409 | 已矣
410 | 巴
411 | 巴巴
412 | 并
413 | 并且
414 | 并非
415 | 庶乎
416 | 庶几
417 | 开外
418 | 开始
419 | 归
420 | 归齐
421 | 当
422 | 当地
423 | 当然
424 | 当着
425 | 彼
426 | 彼时
427 | 彼此
428 | 往
429 | 待
430 | 很
431 | 得
432 | 得了
433 | 怎
434 | 怎么
435 | 怎么办
436 | 怎么样
437 | 怎奈
438 | 怎样
439 | 总之
440 | 总的来看
441 | 总的来说
442 | 总的说来
443 | 总而言之
444 | 恰恰相反
445 | 您
446 | 惟其
447 | 慢说
448 | 我
449 | 我们
450 | 或
451 | 或则
452 | 或是
453 | 或曰
454 | 或者
455 | 截至
456 | 所
457 | 所以
458 | 所在
459 | 所幸
460 | 所有
461 | 才
462 | 才能
463 | 打
464 | 打从
465 | 把
466 | 抑或
467 | 拿
468 | 按
469 | 按照
470 | 换句话说
471 | 换言之
472 | 据
473 | 据此
474 | 接着
475 | 故
476 | 故此
477 | 故而
478 | 旁人
479 | 无
480 | 无宁
481 | 无论
482 | 既
483 | 既往
484 | 既是
485 | 既然
486 | 时候
487 | 是
488 | 是以
489 | 是的
490 | 曾
491 | 替
492 | 替代
493 | 最
494 | 有
495 | 有些
496 | 有关
497 | 有及
498 | 有时
499 | 有的
500 | 望
501 | 朝
502 | 朝着
503 | 本
504 | 本人
505 | 本地
506 | 本着
507 | 本身
508 | 来
509 | 来着
510 | 来自
511 | 来说
512 | 极了
513 | 果然
514 | 果真
515 | 某
516 | 某个
517 | 某些
518 | 某某
519 | 根据
520 | 欤
521 | 正值
522 | 正如
523 | 正巧
524 | 正是
525 | 此
526 | 此地
527 | 此处
528 | 此外
529 | 此时
530 | 此次
531 | 此间
532 | 毋宁
533 | 每
534 | 每当
535 | 比
536 | 比及
537 | 比如
538 | 比方
539 | 没奈何
540 | 沿
541 | 沿着
542 | 漫说
543 | 焉
544 | 然则
545 | 然后
546 | 然而
547 | 照
548 | 照着
549 | 犹且
550 | 犹自
551 | 甚且
552 | 甚么
553 | 甚或
554 | 甚而
555 | 甚至
556 | 甚至于
557 | 用
558 | 用来
559 | 由
560 | 由于
561 | 由是
562 | 由此
563 | 由此可见
564 | 的
565 | 的确
566 | 的话
567 | 直到
568 | 相对而言
569 | 省得
570 | 看
571 | 眨眼
572 | 着
573 | 着呢
574 | 矣
575 | 矣乎
576 | 矣哉
577 | 离
578 | 竟而
579 | 第
580 | 等
581 | 等到
582 | 等等
583 | 简言之
584 | 管
585 | 类如
586 | 紧接着
587 | 纵
588 | 纵令
589 | 纵使
590 | 纵然
591 | 经
592 | 经过
593 | 结果
594 | 给
595 | 继之
596 | 继后
597 | 继而
598 | 综上所述
599 | 罢了
600 | 者
601 | 而
602 | 而且
603 | 而况
604 | 而后
605 | 而外
606 | 而已
607 | 而是
608 | 而言
609 | 能
610 | 能否
611 | 腾
612 | 自
613 | 自个儿
614 | 自从
615 | 自各儿
616 | 自后
617 | 自家
618 | 自己
619 | 自打
620 | 自身
621 | 至
622 | 至于
623 | 至今
624 | 至若
625 | 致
626 | 般的
627 | 若
628 | 若夫
629 | 若是
630 | 若果
631 | 若非
632 | 莫不然
633 | 莫如
634 | 莫若
635 | 虽
636 | 虽则
637 | 虽然
638 | 虽说
639 | 被
640 | 要
641 | 要不
642 | 要不是
643 | 要不然
644 | 要么
645 | 要是
646 | 譬喻
647 | 譬如
648 | 让
649 | 许多
650 | 论
651 | 设使
652 | 设或
653 | 设若
654 | 诚如
655 | 诚然
656 | 该
657 | 说来
658 | 诸
659 | 诸位
660 | 诸如
661 | 谁
662 | 谁人
663 | 谁料
664 | 谁知
665 | 贼死
666 | 赖以
667 | 赶
668 | 起
669 | 起见
670 | 趁
671 | 趁着
672 | 越是
673 | 距
674 | 跟
675 | 较
676 | 较之
677 | 边
678 | 过
679 | 还
680 | 还是
681 | 还有
682 | 还要
683 | 这
684 | 这一来
685 | 这个
686 | 这么
687 | 这么些
688 | 这么样
689 | 这么点儿
690 | 这些
691 | 这会儿
692 | 这儿
693 | 这就是说
694 | 这时
695 | 这样
696 | 这次
697 | 这般
698 | 这边
699 | 这里
700 | 进而
701 | 连
702 | 连同
703 | 逐步
704 | 通过
705 | 遵循
706 | 遵照
707 | 那
708 | 那个
709 | 那么
710 | 那么些
711 | 那么样
712 | 那些
713 | 那会儿
714 | 那儿
715 | 那时
716 | 那样
717 | 那般
718 | 那边
719 | 那里
720 | 都
721 | 鄙人
722 | 鉴于
723 | 针对
724 | 阿
725 | 除
726 | 除了
727 | 除外
728 | 除开
729 | 除此之外
730 | 除非
731 | 随
732 | 随后
733 | 随时
734 | 随着
735 | 难道说
736 | 难道说
737 | 非但
738 | 非徒
739 | 非特
740 | 非独
741 | 靠
742 | 顺
743 | 顺着
744 | !
745 | 靠
746 | 顺
747 | 顺着
748 | 首先
749 | !
750 | 非但
751 | 非徒
752 | 非特
753 | 非独
754 | 靠
755 | 顺
756 | 顺着
757 | 首先
758 | !
759 | ,
760 | :
761 | ;
762 | !
763 | ?
764 |
--------------------------------------------------------------------------------
/tests/ctrip/main.py:
--------------------------------------------------------------------------------
1 | import json
2 | import os
3 | import re
4 | import threading
5 |
6 | import wx
7 | import time
8 | from selenium import webdriver
9 | from selenium.webdriver import ChromeOptions
10 |
11 | import sys
12 | sys.path.append("../../Spiders/")
13 | import shgjj
14 | from ctrip.main import Ctrip
15 |
16 |
17 | class SpiderHelper:
18 | def __init__(self):
19 | return
20 |
21 | def Automation(self, url):
22 | option = ChromeOptions()
23 | option.add_experimental_option('excludeSwitches', ['enable-automation'])
24 | self.driver = webdriver.Chrome(options=option)
25 | url = str(url)
26 | self.driver.get(url)
27 |
28 | def getCookie3(self, login_url, quit):
29 | self.Automation(login_url)
30 | cookie_str = ''
31 | while 1:
32 | time.sleep(0.2)
33 | if self.driver.current_url != login_url:
34 | get_cookies = self.driver.get_cookies()
35 | cookie_str = ''
36 | for s in get_cookies:
37 | cookie_str = cookie_str + s['name'] + '=' + s['value'] + ';'
38 | if quit == 1:
39 | self.driver.quit()
40 | break
41 | return cookie_str
42 |
43 | def getCookie2(self, login_url, curr_url, extra_url, quit):
44 | self.Automation(login_url)
45 | cookie_str = ''
46 | while 1:
47 | time.sleep(0.2)
48 | if self.driver.current_url == curr_url:
49 | if extra_url == '':
50 | self.driver.get(extra_url)
51 | get_cookies = self.driver.get_cookies()
52 | cookie_str = ''
53 | for s in get_cookies:
54 | cookie_str = cookie_str + s['name'] + '=' + s['value'] + ';'
55 | if quit == 1:
56 | self.driver.quit()
57 | break
58 | return cookie_str
59 |
60 | def getCookie(self, login):
61 | while True:
62 | try:
63 | if self.driver.get_log('driver')[0]['level'] == "WARNING":
64 | return 0
65 | except:
66 | pass
67 |
68 | time.sleep(1)
69 |
70 | try:
71 | # if not login -> exception
72 | self.driver.find_element_by_css_selector(login)
73 | except Exception as e:
74 | #print(e)
75 | pass
76 | else:
77 | cookie_list = self.driver.get_cookies()
78 | self.driver.close()
79 |
80 | res = ''
81 | for cookie in cookie_list:
82 | res += cookie.get('name') + '=' + cookie.get('value').replace('\"', '') + ';'
83 | return res
84 |
85 |
86 | if __name__ == '__main__':
87 |
88 | # Get cookies from ctrip
89 | helper = SpiderHelper()
90 | login_url = 'https://passport.ctrip.com/user/login'
91 | cookie_str = helper.getCookie3(login_url, 1)
92 | if cookie_str == '':
93 | print("Get Cookie Error")
94 | exit()
95 |
96 | # Download the orders from the ctrip and save them in an excel
97 | y = Ctrip(cookie_str)
98 | y.get_order()
99 | print("Get orders from ctrip sucessfully")
100 | print("Orders have been stored in ctrip_orders.xslx")
101 |
--------------------------------------------------------------------------------
/tools/resource/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kangvcar/InfoSpider/aa5f730396db33f36cff17148ae9a7a1e25cea77/tools/resource/.DS_Store
--------------------------------------------------------------------------------
/tools/resource/icon/12306.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kangvcar/InfoSpider/aa5f730396db33f36cff17148ae9a7a1e25cea77/tools/resource/icon/12306.png
--------------------------------------------------------------------------------
/tools/resource/icon/alimail.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kangvcar/InfoSpider/aa5f730396db33f36cff17148ae9a7a1e25cea77/tools/resource/icon/alimail.png
--------------------------------------------------------------------------------
/tools/resource/icon/alipay-logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kangvcar/InfoSpider/aa5f730396db33f36cff17148ae9a7a1e25cea77/tools/resource/icon/alipay-logo.png
--------------------------------------------------------------------------------
/tools/resource/icon/bilibili.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kangvcar/InfoSpider/aa5f730396db33f36cff17148ae9a7a1e25cea77/tools/resource/icon/bilibili.png
--------------------------------------------------------------------------------
/tools/resource/icon/chrome-logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kangvcar/InfoSpider/aa5f730396db33f36cff17148ae9a7a1e25cea77/tools/resource/icon/chrome-logo.png
--------------------------------------------------------------------------------
/tools/resource/icon/cnblog.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kangvcar/InfoSpider/aa5f730396db33f36cff17148ae9a7a1e25cea77/tools/resource/icon/cnblog.png
--------------------------------------------------------------------------------
/tools/resource/icon/csdn.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kangvcar/InfoSpider/aa5f730396db33f36cff17148ae9a7a1e25cea77/tools/resource/icon/csdn.png
--------------------------------------------------------------------------------
/tools/resource/icon/ctrip.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kangvcar/InfoSpider/aa5f730396db33f36cff17148ae9a7a1e25cea77/tools/resource/icon/ctrip.png
--------------------------------------------------------------------------------
/tools/resource/icon/dianxin.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kangvcar/InfoSpider/aa5f730396db33f36cff17148ae9a7a1e25cea77/tools/resource/icon/dianxin.png
--------------------------------------------------------------------------------
/tools/resource/icon/github.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kangvcar/InfoSpider/aa5f730396db33f36cff17148ae9a7a1e25cea77/tools/resource/icon/github.png
--------------------------------------------------------------------------------
/tools/resource/icon/gjj.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kangvcar/InfoSpider/aa5f730396db33f36cff17148ae9a7a1e25cea77/tools/resource/icon/gjj.png
--------------------------------------------------------------------------------
/tools/resource/icon/hotmail.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kangvcar/InfoSpider/aa5f730396db33f36cff17148ae9a7a1e25cea77/tools/resource/icon/hotmail.png
--------------------------------------------------------------------------------
/tools/resource/icon/jd.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kangvcar/InfoSpider/aa5f730396db33f36cff17148ae9a7a1e25cea77/tools/resource/icon/jd.png
--------------------------------------------------------------------------------
/tools/resource/icon/jianshu.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kangvcar/InfoSpider/aa5f730396db33f36cff17148ae9a7a1e25cea77/tools/resource/icon/jianshu.png
--------------------------------------------------------------------------------
/tools/resource/icon/liantong.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kangvcar/InfoSpider/aa5f730396db33f36cff17148ae9a7a1e25cea77/tools/resource/icon/liantong.png
--------------------------------------------------------------------------------
/tools/resource/icon/netease_cloudmusic.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kangvcar/InfoSpider/aa5f730396db33f36cff17148ae9a7a1e25cea77/tools/resource/icon/netease_cloudmusic.png
--------------------------------------------------------------------------------
/tools/resource/icon/oschina.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kangvcar/InfoSpider/aa5f730396db33f36cff17148ae9a7a1e25cea77/tools/resource/icon/oschina.png
--------------------------------------------------------------------------------
/tools/resource/icon/qmail.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kangvcar/InfoSpider/aa5f730396db33f36cff17148ae9a7a1e25cea77/tools/resource/icon/qmail.png
--------------------------------------------------------------------------------
/tools/resource/icon/qq.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kangvcar/InfoSpider/aa5f730396db33f36cff17148ae9a7a1e25cea77/tools/resource/icon/qq.png
--------------------------------------------------------------------------------
/tools/resource/icon/qqqun.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kangvcar/InfoSpider/aa5f730396db33f36cff17148ae9a7a1e25cea77/tools/resource/icon/qqqun.png
--------------------------------------------------------------------------------
/tools/resource/icon/sina.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kangvcar/InfoSpider/aa5f730396db33f36cff17148ae9a7a1e25cea77/tools/resource/icon/sina.png
--------------------------------------------------------------------------------
/tools/resource/icon/taobao.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kangvcar/InfoSpider/aa5f730396db33f36cff17148ae9a7a1e25cea77/tools/resource/icon/taobao.png
--------------------------------------------------------------------------------
/tools/resource/icon/wangyi.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kangvcar/InfoSpider/aa5f730396db33f36cff17148ae9a7a1e25cea77/tools/resource/icon/wangyi.png
--------------------------------------------------------------------------------
/tools/resource/icon/wechat-moments-album.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kangvcar/InfoSpider/aa5f730396db33f36cff17148ae9a7a1e25cea77/tools/resource/icon/wechat-moments-album.png
--------------------------------------------------------------------------------
/tools/resource/icon/wechat-moments.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kangvcar/InfoSpider/aa5f730396db33f36cff17148ae9a7a1e25cea77/tools/resource/icon/wechat-moments.png
--------------------------------------------------------------------------------
/tools/resource/icon/wechat.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kangvcar/InfoSpider/aa5f730396db33f36cff17148ae9a7a1e25cea77/tools/resource/icon/wechat.png
--------------------------------------------------------------------------------
/tools/resource/icon/xiecheng.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kangvcar/InfoSpider/aa5f730396db33f36cff17148ae9a7a1e25cea77/tools/resource/icon/xiecheng.png
--------------------------------------------------------------------------------
/tools/resource/icon/xuexin.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kangvcar/InfoSpider/aa5f730396db33f36cff17148ae9a7a1e25cea77/tools/resource/icon/xuexin.png
--------------------------------------------------------------------------------
/tools/resource/icon/yidong.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kangvcar/InfoSpider/aa5f730396db33f36cff17148ae9a7a1e25cea77/tools/resource/icon/yidong.png
--------------------------------------------------------------------------------
/tools/resource/icon/zhihu-logo.svg:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/tools/resource/icon/zhihu.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kangvcar/InfoSpider/aa5f730396db33f36cff17148ae9a7a1e25cea77/tools/resource/icon/zhihu.png
--------------------------------------------------------------------------------
/tools/stop_word.txt:
--------------------------------------------------------------------------------
1 | $
2 | 0
3 | 1
4 | 2
5 | 3
6 | 4
7 | 5
8 | 6
9 | 7
10 | 8
11 | 9
12 | ?
13 | _
14 | “
15 | ”
16 | 、
17 | 。
18 | 《
19 | 》
20 | 一
21 | 一些
22 | 一何
23 | 一切
24 | 一则
25 | 一方面
26 | 一旦
27 | 一来
28 | 一样
29 | 一般
30 | 一转眼
31 | 万一
32 | 上
33 | 上下
34 | 下
35 | 不
36 | 不仅
37 | 不但
38 | 不光
39 | 不单
40 | 不只
41 | 不外乎
42 | 不如
43 | 不妨
44 | 不尽
45 | 不尽然
46 | 不得
47 | 不怕
48 | 不惟
49 | 不成
50 | 不拘
51 | 不料
52 | 不是
53 | 不比
54 | 不然
55 | 不特
56 | 不独
57 | 不管
58 | 不至于
59 | 不若
60 | 不论
61 | 不过
62 | 不问
63 | 与
64 | 与其
65 | 与其说
66 | 与否
67 | 与此同时
68 | 且
69 | 且不说
70 | 且说
71 | 两者
72 | 个
73 | 个别
74 | 临
75 | 为
76 | 为了
77 | 为什么
78 | 为何
79 | 为止
80 | 为此
81 | 为着
82 | 乃
83 | 乃至
84 | 乃至于
85 | 么
86 | 之
87 | 之一
88 | 之所以
89 | 之类
90 | 乌乎
91 | 乎
92 | 乘
93 | 也
94 | 也好
95 | 也罢
96 | 了
97 | 二来
98 | 于
99 | 于是
100 | 于是乎
101 | 云云
102 | 云尔
103 | 些
104 | 亦
105 | 人
106 | 人们
107 | 人家
108 | 什么
109 | 什么样
110 | 今
111 | 介于
112 | 仍
113 | 仍旧
114 | 从
115 | 从此
116 | 从而
117 | 他
118 | 他人
119 | 他们
120 | 以
121 | 以上
122 | 以为
123 | 以便
124 | 以免
125 | 以及
126 | 以故
127 | 以期
128 | 以来
129 | 以至
130 | 以至于
131 | 以致
132 | 们
133 | 任
134 | 任何
135 | 任凭
136 | 似的
137 | 但
138 | 但凡
139 | 但是
140 | 何
141 | 何以
142 | 何况
143 | 何处
144 | 何时
145 | 余外
146 | 作为
147 | 你
148 | 你们
149 | 使
150 | 使得
151 | 例如
152 | 依
153 | 依据
154 | 依照
155 | 便于
156 | 俺
157 | 俺们
158 | 倘
159 | 倘使
160 | 倘或
161 | 倘然
162 | 倘若
163 | 借
164 | 假使
165 | 假如
166 | 假若
167 | 傥然
168 | 像
169 | 儿
170 | 先不先
171 | 光是
172 | 全体
173 | 全部
174 | 兮
175 | 关于
176 | 其
177 | 其一
178 | 其中
179 | 其二
180 | 其他
181 | 其余
182 | 其它
183 | 其次
184 | 具体地说
185 | 具体说来
186 | 兼之
187 | 内
188 | 再
189 | 再其次
190 | 再则
191 | 再有
192 | 再者
193 | 再者说
194 | 再说
195 | 冒
196 | 冲
197 | 况且
198 | 几
199 | 几时
200 | 凡
201 | 凡是
202 | 凭
203 | 凭借
204 | 出于
205 | 出来
206 | 分别
207 | 则
208 | 则甚
209 | 别
210 | 别人
211 | 别处
212 | 别是
213 | 别的
214 | 别管
215 | 别说
216 | 到
217 | 前后
218 | 前此
219 | 前者
220 | 加之
221 | 加以
222 | 即
223 | 即令
224 | 即使
225 | 即便
226 | 即如
227 | 即或
228 | 即若
229 | 却
230 | 去
231 | 又
232 | 又及
233 | 及
234 | 及其
235 | 及至
236 | 反之
237 | 反而
238 | 反过来
239 | 反过来说
240 | 受到
241 | 另
242 | 另一方面
243 | 另外
244 | 另悉
245 | 只
246 | 只当
247 | 只怕
248 | 只是
249 | 只有
250 | 只消
251 | 只要
252 | 只限
253 | 叫
254 | 叮咚
255 | 可
256 | 可以
257 | 可是
258 | 可见
259 | 各
260 | 各个
261 | 各位
262 | 各种
263 | 各自
264 | 同
265 | 同时
266 | 后
267 | 后者
268 | 向
269 | 向使
270 | 向着
271 | 吓
272 | 吗
273 | 否则
274 | 吧
275 | 吧哒
276 | 吱
277 | 呀
278 | 呃
279 | 呕
280 | 呗
281 | 呜
282 | 呜呼
283 | 呢
284 | 呵
285 | 呵呵
286 | 呸
287 | 呼哧
288 | 咋
289 | 和
290 | 咚
291 | 咦
292 | 咧
293 | 咱
294 | 咱们
295 | 咳
296 | 哇
297 | 哈
298 | 哈哈
299 | 哉
300 | 哎
301 | 哎呀
302 | 哎哟
303 | 哗
304 | 哟
305 | 哦
306 | 哩
307 | 哪
308 | 哪个
309 | 哪些
310 | 哪儿
311 | 哪天
312 | 哪年
313 | 哪怕
314 | 哪样
315 | 哪边
316 | 哪里
317 | 哼
318 | 哼唷
319 | 唉
320 | 唯有
321 | 啊
322 | 啐
323 | 啥
324 | 啦
325 | 啪达
326 | 啷当
327 | 喂
328 | 喏
329 | 喔唷
330 | 喽
331 | 嗡
332 | 嗡嗡
333 | 嗬
334 | 嗯
335 | 嗳
336 | 嘎
337 | 嘎登
338 | 嘘
339 | 嘛
340 | 嘻
341 | 嘿
342 | 嘿嘿
343 | 因
344 | 因为
345 | 因了
346 | 因此
347 | 因着
348 | 因而
349 | 固然
350 | 在
351 | 在下
352 | 在于
353 | 地
354 | 基于
355 | 处在
356 | 多
357 | 多么
358 | 多少
359 | 大
360 | 大家
361 | 她
362 | 她们
363 | 好
364 | 如
365 | 如上
366 | 如上所述
367 | 如下
368 | 如何
369 | 如其
370 | 如同
371 | 如是
372 | 如果
373 | 如此
374 | 如若
375 | 始而
376 | 孰料
377 | 孰知
378 | 宁
379 | 宁可
380 | 宁愿
381 | 宁肯
382 | 它
383 | 它们
384 | 对
385 | 对于
386 | 对待
387 | 对方
388 | 对比
389 | 将
390 | 小
391 | 尔
392 | 尔后
393 | 尔尔
394 | 尚且
395 | 就
396 | 就是
397 | 就是了
398 | 就是说
399 | 就算
400 | 就要
401 | 尽
402 | 尽管
403 | 尽管如此
404 | 岂但
405 | 己
406 | 已
407 | 已矣
408 | 巴
409 | 巴巴
410 | 并
411 | 并且
412 | 并非
413 | 庶乎
414 | 庶几
415 | 开外
416 | 开始
417 | 归
418 | 归齐
419 | 当
420 | 当地
421 | 当然
422 | 当着
423 | 彼
424 | 彼时
425 | 彼此
426 | 往
427 | 待
428 | 很
429 | 得
430 | 得了
431 | 怎
432 | 怎么
433 | 怎么办
434 | 怎么样
435 | 怎奈
436 | 怎样
437 | 总之
438 | 总的来看
439 | 总的来说
440 | 总的说来
441 | 总而言之
442 | 恰恰相反
443 | 您
444 | 惟其
445 | 慢说
446 | 我
447 | 我们
448 | 或
449 | 或则
450 | 或是
451 | 或曰
452 | 或者
453 | 截至
454 | 所
455 | 所以
456 | 所在
457 | 所幸
458 | 所有
459 | 才
460 | 才能
461 | 打
462 | 打从
463 | 把
464 | 抑或
465 | 拿
466 | 按
467 | 按照
468 | 换句话说
469 | 换言之
470 | 据
471 | 据此
472 | 接着
473 | 故
474 | 故此
475 | 故而
476 | 旁人
477 | 无
478 | 无宁
479 | 无论
480 | 既
481 | 既往
482 | 既是
483 | 既然
484 | 时候
485 | 是
486 | 是以
487 | 是的
488 | 曾
489 | 替
490 | 替代
491 | 最
492 | 有
493 | 有些
494 | 有关
495 | 有及
496 | 有时
497 | 有的
498 | 望
499 | 朝
500 | 朝着
501 | 本
502 | 本人
503 | 本地
504 | 本着
505 | 本身
506 | 来
507 | 来着
508 | 来自
509 | 来说
510 | 极了
511 | 果然
512 | 果真
513 | 某
514 | 某个
515 | 某些
516 | 某某
517 | 根据
518 | 欤
519 | 正值
520 | 正如
521 | 正巧
522 | 正是
523 | 此
524 | 此地
525 | 此处
526 | 此外
527 | 此时
528 | 此次
529 | 此间
530 | 毋宁
531 | 每
532 | 每当
533 | 比
534 | 比及
535 | 比如
536 | 比方
537 | 没奈何
538 | 沿
539 | 沿着
540 | 漫说
541 | 焉
542 | 然则
543 | 然后
544 | 然而
545 | 照
546 | 照着
547 | 犹且
548 | 犹自
549 | 甚且
550 | 甚么
551 | 甚或
552 | 甚而
553 | 甚至
554 | 甚至于
555 | 用
556 | 用来
557 | 由
558 | 由于
559 | 由是
560 | 由此
561 | 由此可见
562 | 的
563 | 的确
564 | 的话
565 | 直到
566 | 相对而言
567 | 省得
568 | 看
569 | 眨眼
570 | 着
571 | 着呢
572 | 矣
573 | 矣乎
574 | 矣哉
575 | 离
576 | 竟而
577 | 第
578 | 等
579 | 等到
580 | 等等
581 | 简言之
582 | 管
583 | 类如
584 | 紧接着
585 | 纵
586 | 纵令
587 | 纵使
588 | 纵然
589 | 经
590 | 经过
591 | 结果
592 | 给
593 | 继之
594 | 继后
595 | 继而
596 | 综上所述
597 | 罢了
598 | 者
599 | 而
600 | 而且
601 | 而况
602 | 而后
603 | 而外
604 | 而已
605 | 而是
606 | 而言
607 | 能
608 | 能否
609 | 腾
610 | 自
611 | 自个儿
612 | 自从
613 | 自各儿
614 | 自后
615 | 自家
616 | 自己
617 | 自打
618 | 自身
619 | 至
620 | 至于
621 | 至今
622 | 至若
623 | 致
624 | 般的
625 | 若
626 | 若夫
627 | 若是
628 | 若果
629 | 若非
630 | 莫不然
631 | 莫如
632 | 莫若
633 | 虽
634 | 虽则
635 | 虽然
636 | 虽说
637 | 被
638 | 要
639 | 要不
640 | 要不是
641 | 要不然
642 | 要么
643 | 要是
644 | 譬喻
645 | 譬如
646 | 让
647 | 许多
648 | 论
649 | 设使
650 | 设或
651 | 设若
652 | 诚如
653 | 诚然
654 | 该
655 | 说来
656 | 诸
657 | 诸位
658 | 诸如
659 | 谁
660 | 谁人
661 | 谁料
662 | 谁知
663 | 贼死
664 | 赖以
665 | 赶
666 | 起
667 | 起见
668 | 趁
669 | 趁着
670 | 越是
671 | 距
672 | 跟
673 | 较
674 | 较之
675 | 边
676 | 过
677 | 还
678 | 还是
679 | 还有
680 | 还要
681 | 这
682 | 这一来
683 | 这个
684 | 这么
685 | 这么些
686 | 这么样
687 | 这么点儿
688 | 这些
689 | 这会儿
690 | 这儿
691 | 这就是说
692 | 这时
693 | 这样
694 | 这次
695 | 这般
696 | 这边
697 | 这里
698 | 进而
699 | 连
700 | 连同
701 | 逐步
702 | 通过
703 | 遵循
704 | 遵照
705 | 那
706 | 那个
707 | 那么
708 | 那么些
709 | 那么样
710 | 那些
711 | 那会儿
712 | 那儿
713 | 那时
714 | 那样
715 | 那般
716 | 那边
717 | 那里
718 | 都
719 | 鄙人
720 | 鉴于
721 | 针对
722 | 阿
723 | 除
724 | 除了
725 | 除外
726 | 除开
727 | 除此之外
728 | 除非
729 | 随
730 | 随后
731 | 随时
732 | 随着
733 | 难道说
734 | 非但
735 | 非徒
736 | 非特
737 | 非独
738 | 靠
739 | 顺
740 | 顺着
741 | 首先
742 | !
743 | ,
744 | :
745 | ;
746 | ?
--------------------------------------------------------------------------------
/uitest/main.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # -*- coding: utf-8 -*-
3 |
4 | import wx
5 |
6 |
--------------------------------------------------------------------------------