├── .github
└── workflows
│ └── documentation.yml
├── .gitignore
├── LICENSE
├── README.md
├── babel.config.js
├── docs
├── basics
│ ├── _category_.json
│ ├── base-recommend.md
│ ├── base.md
│ └── images
│ │ ├── QuickReference.png
│ │ ├── QuickReference_Pytorch.png
│ │ ├── clumsylegalpython.png
│ │ ├── logo.png
│ │ ├── 函数.png
│ │ ├── 基础知识.png
│ │ ├── 文件.png
│ │ ├── 模块.png
│ │ ├── 爬虫.png
│ │ ├── 类和对象.png
│ │ └── 魔法方法.png
├── data-analysis
│ ├── _category_.json
│ ├── files
│ │ ├── 01-choice_data.ipynb
│ │ ├── 02-setting-value.ipynb
│ │ ├── 03-process-dropout-data.ipynb
│ │ ├── 04-import-and-export.ipynb
│ │ ├── 05-concat.ipynb
│ │ ├── 06-merge.ipynb
│ │ ├── 07-plot.ipynb
│ │ └── student.csv
│ ├── img
│ │ ├── 3-1.png
│ │ ├── 3-10.png
│ │ ├── 3-11.png
│ │ ├── 3-12.png
│ │ ├── 3-2.png
│ │ ├── 3-3.png
│ │ ├── 3-4.png
│ │ ├── 3-5.png
│ │ ├── 3-6.png
│ │ ├── 3-7.png
│ │ ├── 3-8.png
│ │ └── 3-9.png
│ ├── numpy.md
│ └── pandas.md
├── deeplearning
│ ├── _category_.json
│ ├── deeplearning.md
│ ├── graph.mdx
│ └── images
│ │ └── pyg.png
├── intro.md
└── scripts
│ ├── 1-extra_letpub.py
│ ├── 2-extra_syntaogf.py
│ ├── _category_.json
│ ├── auto_ocr_framework.md
│ ├── feige_export.md
│ ├── images
│ ├── ref-1-1.png
│ ├── ref-1-2.png
│ └── ref-3-1.png
│ ├── letpub.md
│ └── syntaogf.md
├── docusaurus.config.ts
├── package-lock.json
├── package.json
├── sidebars.ts
├── src
├── components
│ └── HomepageFeatures
│ │ ├── index.tsx
│ │ └── styles.module.css
├── css
│ └── custom.css
└── pages
│ ├── index.module.css
│ ├── index.tsx
│ └── markdown-page.md
├── static
├── .nojekyll
└── img
│ ├── docusaurus-social-card.jpg
│ ├── docusaurus.png
│ ├── favicon.ico
│ ├── logo.png
│ ├── logo.svg
│ ├── undraw_docusaurus_mountain.svg
│ ├── undraw_docusaurus_react.svg
│ └── undraw_docusaurus_tree.svg
└── tsconfig.json
/.github/workflows/documentation.yml:
--------------------------------------------------------------------------------
1 | name: 🚀Deploy Github pages
2 | on:
3 | push:
4 | branches:
5 | - master
6 | jobs:
7 | build-and-deploy:
8 | runs-on: ubuntu-latest
9 | steps:
10 | - name: Checkout 🛎️
11 | uses: actions/checkout@v3
12 |
13 | - name: Install and Build 🔧
14 | run: |
15 | npm install
16 | npm run build
17 |
18 | - name: Deploy 🚀
19 | uses: JamesIves/github-pages-deploy-action@v4.3.3
20 | with:
21 | branch: gh-pages # The branch the action should deploy to.
22 | folder: build # The folder the action should deploy.
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Dependencies
2 | /node_modules
3 |
4 | # Production
5 | /build
6 |
7 | # Generated files
8 | .docusaurus
9 | .cache-loader
10 | .idea
11 |
12 | # Misc
13 | .DS_Store
14 | .env.local
15 | .env.development.local
16 | .env.test.local
17 | .env.production.local
18 |
19 | npm-debug.log*
20 | yarn-debug.log*
21 | yarn-error.log*
22 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2021 Zhiyu
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | ## Python-Basis-Notes
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
Python-Basis-Note 你的Python入门好帮手
12 |
13 | [](https://github.com/zhiyu1998/Python-Basis-Notes)
14 |
15 |
16 |

17 |
18 |
19 |
20 |
21 |
22 | * 🐍 Python基础
23 | * 📊 Numpy基础
24 | * 🐼 Pandas基础
25 | * 🍥 DeepLearning基础 + 路线
26 | * 📚 Scripts 脚本库
27 |
28 | ### 📑 在线阅读
29 | https://zhiyu1998.github.io/Python-Basis-Notes/
30 |
31 | ### 🐍 脚本库
32 |
33 | - [提取论文Reference的期刊/会议信息](./docs/scripts/letpub.md)
34 | - [采集中国企业ESG评级](./docs/scripts/syntaogf.md)
35 | - [飞鸽知识库导出](./docs/scripts/feige_export.md)
36 | - [自动化识别框架](./docs/scripts/auto_ocr_framework.md)
37 |
38 | ### 📈趋势走向图
39 |
40 | 
41 |
42 |
--------------------------------------------------------------------------------
/babel.config.js:
--------------------------------------------------------------------------------
1 | module.exports = {
2 | presets: [require.resolve('@docusaurus/core/lib/babel/preset')],
3 | };
4 |
--------------------------------------------------------------------------------
/docs/basics/_category_.json:
--------------------------------------------------------------------------------
1 | {
2 | "label": "基础知识",
3 | "position": 2,
4 | "link": {
5 | "type": "generated-index",
6 | "description": "5 minutes to learn the most important Docusaurus concepts."
7 | }
8 | }
9 |
--------------------------------------------------------------------------------
/docs/basics/base-recommend.md:
--------------------------------------------------------------------------------
1 | ---
2 | sidebar_position: 2
3 | ---
4 |
5 | # 第二步:备忘录
6 |
7 | ## Practice Makes Perfect - 笨办法学 python
8 | 这本书指导你在Python中通过练习和记忆等技巧慢慢建设和建立技能,然后应用它们解决越来越困难的问题。在这本书的最后,你需要拥有必要的工具开始进行更多复杂程序的学习。我喜欢告诉大家,我的书带给你们“编程黑带”。意思是说你知道的基础知识足够现在就开始学习编程。
9 |
10 | > 体验:https://wizardforcel.gitbooks.io/lpthw/content/
11 |
12 | 
13 |
14 | ## Quick Reference
15 | 这个网站提供一些常用的Python函数的快速查询,可以帮助你快速查找Python函数的用法。
16 |
17 | > 体验:https://quickref.cn/docs/python.html
18 |
19 | 
20 |
21 | ### 深度学习之Pytorch主题
22 |
23 | > 体验:https://quickref.cn/docs/pytorch.html
24 |
25 | 
--------------------------------------------------------------------------------
/docs/basics/base.md:
--------------------------------------------------------------------------------
1 | ---
2 | sidebar_position: 1
3 | ---
4 | # 第一步:思维导图
5 |
6 | ### 🍭序言
7 |
8 | 这个版本是在第一次学python的时候写的思维导图,希望能帮助你,如果你从中获取到了知识或收获,可以点个star哦~
9 |
10 | ### 🐌更新
11 |
12 | 2022年11月23日 增加【自动化处理脚本库】,根据个人需求平时写的脚本
13 |
14 | 2022年6月17日 增加【深度学习相关】,删除脑图源文件。
15 |
16 | 2021年10月23日 项目更名Python-Basis-Notes,【增加数据科学相关】 *Pandas*的jupyter notebook [注:pandas学习地址 https://www.bilibili.com/video/BV1Ex411L7oT]
17 | 2021年7月15日 更名为Python-XMind-Note,【增加数据科学相关】*Numpy*的MarkDown笔记
18 |
19 | ### 🍬基础知识
20 |
21 | 
22 |
23 | ### 📲函数
24 |
25 | 
26 |
27 | ### 📁文件
28 |
29 | 
30 |
31 | ### 🗝️类和对象
32 |
33 | 
34 |
35 | ### 🧙魔法方法
36 |
37 | 
38 |
39 | ### 🃏模块
40 |
41 | 
42 |
43 | ### 🐞爬虫
44 |
45 | 
46 |
47 |
--------------------------------------------------------------------------------
/docs/basics/images/QuickReference.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhiyu1998/Python-Basis-Notes/2be71810a0eaa7624451ab00fe1c4dec73e6dc58/docs/basics/images/QuickReference.png
--------------------------------------------------------------------------------
/docs/basics/images/QuickReference_Pytorch.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhiyu1998/Python-Basis-Notes/2be71810a0eaa7624451ab00fe1c4dec73e6dc58/docs/basics/images/QuickReference_Pytorch.png
--------------------------------------------------------------------------------
/docs/basics/images/clumsylegalpython.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhiyu1998/Python-Basis-Notes/2be71810a0eaa7624451ab00fe1c4dec73e6dc58/docs/basics/images/clumsylegalpython.png
--------------------------------------------------------------------------------
/docs/basics/images/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhiyu1998/Python-Basis-Notes/2be71810a0eaa7624451ab00fe1c4dec73e6dc58/docs/basics/images/logo.png
--------------------------------------------------------------------------------
/docs/basics/images/函数.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhiyu1998/Python-Basis-Notes/2be71810a0eaa7624451ab00fe1c4dec73e6dc58/docs/basics/images/函数.png
--------------------------------------------------------------------------------
/docs/basics/images/基础知识.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhiyu1998/Python-Basis-Notes/2be71810a0eaa7624451ab00fe1c4dec73e6dc58/docs/basics/images/基础知识.png
--------------------------------------------------------------------------------
/docs/basics/images/文件.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhiyu1998/Python-Basis-Notes/2be71810a0eaa7624451ab00fe1c4dec73e6dc58/docs/basics/images/文件.png
--------------------------------------------------------------------------------
/docs/basics/images/模块.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhiyu1998/Python-Basis-Notes/2be71810a0eaa7624451ab00fe1c4dec73e6dc58/docs/basics/images/模块.png
--------------------------------------------------------------------------------
/docs/basics/images/爬虫.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhiyu1998/Python-Basis-Notes/2be71810a0eaa7624451ab00fe1c4dec73e6dc58/docs/basics/images/爬虫.png
--------------------------------------------------------------------------------
/docs/basics/images/类和对象.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhiyu1998/Python-Basis-Notes/2be71810a0eaa7624451ab00fe1c4dec73e6dc58/docs/basics/images/类和对象.png
--------------------------------------------------------------------------------
/docs/basics/images/魔法方法.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhiyu1998/Python-Basis-Notes/2be71810a0eaa7624451ab00fe1c4dec73e6dc58/docs/basics/images/魔法方法.png
--------------------------------------------------------------------------------
/docs/data-analysis/_category_.json:
--------------------------------------------------------------------------------
1 | {
2 | "label": "数据分析",
3 | "position": 3,
4 | "link": {
5 | "type": "generated-index"
6 | }
7 | }
8 |
--------------------------------------------------------------------------------
/docs/data-analysis/files/01-choice_data.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {
6 | "collapsed": true,
7 | "pycharm": {
8 | "name": "#%% md\n"
9 | }
10 | },
11 | "source": [
12 | "## 选择数据"
13 | ]
14 | },
15 | {
16 | "cell_type": "code",
17 | "execution_count": 1,
18 | "outputs": [],
19 | "source": [
20 | "import pandas as pd\n",
21 | "import numpy as np"
22 | ],
23 | "metadata": {
24 | "collapsed": false,
25 | "pycharm": {
26 | "name": "#%%\n"
27 | }
28 | }
29 | },
30 | {
31 | "cell_type": "code",
32 | "execution_count": 4,
33 | "outputs": [],
34 | "source": [
35 | "dates = pd.date_range('20130101', periods=6)\n",
36 | "df = pd.DataFrame(np.arange(24).reshape((6,4)), index=dates, columns=['A', 'B', 'C', 'D'])"
37 | ],
38 | "metadata": {
39 | "collapsed": false,
40 | "pycharm": {
41 | "name": "#%%\n"
42 | }
43 | }
44 | },
45 | {
46 | "cell_type": "code",
47 | "execution_count": 5,
48 | "outputs": [
49 | {
50 | "name": "stdout",
51 | "output_type": "stream",
52 | "text": [
53 | " A B C D\n",
54 | "2013-01-01 0 1 2 3\n",
55 | "2013-01-02 4 5 6 7\n",
56 | "2013-01-03 8 9 10 11\n",
57 | "2013-01-04 12 13 14 15\n",
58 | "2013-01-05 16 17 18 19\n",
59 | "2013-01-06 20 21 22 23\n"
60 | ]
61 | }
62 | ],
63 | "source": [
64 | "print(df)"
65 | ],
66 | "metadata": {
67 | "collapsed": false,
68 | "pycharm": {
69 | "name": "#%%\n"
70 | }
71 | }
72 | },
73 | {
74 | "cell_type": "code",
75 | "execution_count": 6,
76 | "outputs": [
77 | {
78 | "name": "stdout",
79 | "output_type": "stream",
80 | "text": [
81 | "2013-01-01 0\n",
82 | "2013-01-02 4\n",
83 | "2013-01-03 8\n",
84 | "2013-01-04 12\n",
85 | "2013-01-05 16\n",
86 | "2013-01-06 20\n",
87 | "Freq: D, Name: A, dtype: int32\n"
88 | ]
89 | }
90 | ],
91 | "source": [
92 | "print(df['A'])"
93 | ],
94 | "metadata": {
95 | "collapsed": false,
96 | "pycharm": {
97 | "name": "#%%\n"
98 | }
99 | }
100 | },
101 | {
102 | "cell_type": "code",
103 | "execution_count": 7,
104 | "outputs": [
105 | {
106 | "name": "stdout",
107 | "output_type": "stream",
108 | "text": [
109 | "2013-01-01 0\n",
110 | "2013-01-02 4\n",
111 | "2013-01-03 8\n",
112 | "2013-01-04 12\n",
113 | "2013-01-05 16\n",
114 | "2013-01-06 20\n",
115 | "Freq: D, Name: A, dtype: int32\n"
116 | ]
117 | }
118 | ],
119 | "source": [
120 | "print(df.A)"
121 | ],
122 | "metadata": {
123 | "collapsed": false,
124 | "pycharm": {
125 | "name": "#%%\n"
126 | }
127 | }
128 | },
129 | {
130 | "cell_type": "code",
131 | "execution_count": 8,
132 | "outputs": [
133 | {
134 | "data": {
135 | "text/plain": " A B C D\n2013-01-01 0 1 2 3\n2013-01-02 4 5 6 7\n2013-01-03 8 9 10 11",
136 | "text/html": "\n\n
\n \n \n | \n A | \n B | \n C | \n D | \n
\n \n \n \n 2013-01-01 | \n 0 | \n 1 | \n 2 | \n 3 | \n
\n \n 2013-01-02 | \n 4 | \n 5 | \n 6 | \n 7 | \n
\n \n 2013-01-03 | \n 8 | \n 9 | \n 10 | \n 11 | \n
\n \n
\n
"
137 | },
138 | "execution_count": 8,
139 | "metadata": {},
140 | "output_type": "execute_result"
141 | }
142 | ],
143 | "source": [
144 | "df[0:3]"
145 | ],
146 | "metadata": {
147 | "collapsed": false,
148 | "pycharm": {
149 | "name": "#%%\n"
150 | }
151 | }
152 | },
153 | {
154 | "cell_type": "code",
155 | "execution_count": 10,
156 | "outputs": [
157 | {
158 | "data": {
159 | "text/plain": " A B C D\n2013-01-02 4 5 6 7\n2013-01-03 8 9 10 11\n2013-01-04 12 13 14 15",
160 | "text/html": "\n\n
\n \n \n | \n A | \n B | \n C | \n D | \n
\n \n \n \n 2013-01-02 | \n 4 | \n 5 | \n 6 | \n 7 | \n
\n \n 2013-01-03 | \n 8 | \n 9 | \n 10 | \n 11 | \n
\n \n 2013-01-04 | \n 12 | \n 13 | \n 14 | \n 15 | \n
\n \n
\n
"
161 | },
162 | "execution_count": 10,
163 | "metadata": {},
164 | "output_type": "execute_result"
165 | }
166 | ],
167 | "source": [
168 | "df['20130102':'20130104']"
169 | ],
170 | "metadata": {
171 | "collapsed": false,
172 | "pycharm": {
173 | "name": "#%%\n"
174 | }
175 | }
176 | },
177 | {
178 | "cell_type": "markdown",
179 | "source": [
180 | "### loc"
181 | ],
182 | "metadata": {
183 | "collapsed": false,
184 | "pycharm": {
185 | "name": "#%% md\n"
186 | }
187 | }
188 | },
189 | {
190 | "cell_type": "code",
191 | "execution_count": 11,
192 | "outputs": [
193 | {
194 | "data": {
195 | "text/plain": "A 4\nB 5\nC 6\nD 7\nName: 2013-01-02 00:00:00, dtype: int32"
196 | },
197 | "execution_count": 11,
198 | "metadata": {},
199 | "output_type": "execute_result"
200 | }
201 | ],
202 | "source": [
203 | "df.loc['20130102']"
204 | ],
205 | "metadata": {
206 | "collapsed": false,
207 | "pycharm": {
208 | "name": "#%%\n"
209 | }
210 | }
211 | },
212 | {
213 | "cell_type": "code",
214 | "execution_count": 15,
215 | "outputs": [
216 | {
217 | "data": {
218 | "text/plain": "A 4\nB 5\nName: 2013-01-02 00:00:00, dtype: int32"
219 | },
220 | "execution_count": 15,
221 | "metadata": {},
222 | "output_type": "execute_result"
223 | }
224 | ],
225 | "source": [
226 | "# 纵向\n",
227 | "df.loc['20130102', ['A', 'B']]"
228 | ],
229 | "metadata": {
230 | "collapsed": false,
231 | "pycharm": {
232 | "name": "#%%\n"
233 | }
234 | }
235 | },
236 | {
237 | "cell_type": "markdown",
238 | "source": [
239 | "### iloc"
240 | ],
241 | "metadata": {
242 | "collapsed": false,
243 | "pycharm": {
244 | "name": "#%% md\n"
245 | }
246 | }
247 | },
248 | {
249 | "cell_type": "code",
250 | "execution_count": 18,
251 | "outputs": [
252 | {
253 | "data": {
254 | "text/plain": " A B C D\n2013-01-01 0 1 2 3\n2013-01-02 4 5 6 7\n2013-01-03 8 9 10 11\n2013-01-04 12 13 14 15\n2013-01-05 16 17 18 19\n2013-01-06 20 21 22 23",
255 | "text/html": "\n\n
\n \n \n | \n A | \n B | \n C | \n D | \n
\n \n \n \n 2013-01-01 | \n 0 | \n 1 | \n 2 | \n 3 | \n
\n \n 2013-01-02 | \n 4 | \n 5 | \n 6 | \n 7 | \n
\n \n 2013-01-03 | \n 8 | \n 9 | \n 10 | \n 11 | \n
\n \n 2013-01-04 | \n 12 | \n 13 | \n 14 | \n 15 | \n
\n \n 2013-01-05 | \n 16 | \n 17 | \n 18 | \n 19 | \n
\n \n 2013-01-06 | \n 20 | \n 21 | \n 22 | \n 23 | \n
\n \n
\n
"
256 | },
257 | "execution_count": 18,
258 | "metadata": {},
259 | "output_type": "execute_result"
260 | }
261 | ],
262 | "source": [
263 | "df"
264 | ],
265 | "metadata": {
266 | "collapsed": false,
267 | "pycharm": {
268 | "name": "#%%\n"
269 | }
270 | }
271 | },
272 | {
273 | "cell_type": "code",
274 | "execution_count": 16,
275 | "outputs": [
276 | {
277 | "data": {
278 | "text/plain": "A 12\nB 13\nC 14\nD 15\nName: 2013-01-04 00:00:00, dtype: int32"
279 | },
280 | "execution_count": 16,
281 | "metadata": {},
282 | "output_type": "execute_result"
283 | }
284 | ],
285 | "source": [
286 | "# 选择第三行(0开始)\n",
287 | "df.iloc[3]"
288 | ],
289 | "metadata": {
290 | "collapsed": false,
291 | "pycharm": {
292 | "name": "#%%\n"
293 | }
294 | }
295 | },
296 | {
297 | "cell_type": "code",
298 | "execution_count": 17,
299 | "outputs": [
300 | {
301 | "data": {
302 | "text/plain": "13"
303 | },
304 | "execution_count": 17,
305 | "metadata": {},
306 | "output_type": "execute_result"
307 | }
308 | ],
309 | "source": [
310 | "# 选择第三行,第二列(0开始)\n",
311 | "df.iloc[3, 1]"
312 | ],
313 | "metadata": {
314 | "collapsed": false,
315 | "pycharm": {
316 | "name": "#%%\n"
317 | }
318 | }
319 | },
320 | {
321 | "cell_type": "code",
322 | "execution_count": 19,
323 | "outputs": [
324 | {
325 | "data": {
326 | "text/plain": " B C\n2013-01-04 13 14\n2013-01-05 17 18",
327 | "text/html": "\n\n
\n \n \n | \n B | \n C | \n
\n \n \n \n 2013-01-04 | \n 13 | \n 14 | \n
\n \n 2013-01-05 | \n 17 | \n 18 | \n
\n \n
\n
"
328 | },
329 | "execution_count": 19,
330 | "metadata": {},
331 | "output_type": "execute_result"
332 | }
333 | ],
334 | "source": [
335 | "df.iloc[3:5, 1:3]"
336 | ],
337 | "metadata": {
338 | "collapsed": false,
339 | "pycharm": {
340 | "name": "#%%\n"
341 | }
342 | }
343 | },
344 | {
345 | "cell_type": "code",
346 | "execution_count": 20,
347 | "outputs": [
348 | {
349 | "data": {
350 | "text/plain": " B C\n2013-01-02 5 6\n2013-01-04 13 14\n2013-01-06 21 22",
351 | "text/html": "\n\n
\n \n \n | \n B | \n C | \n
\n \n \n \n 2013-01-02 | \n 5 | \n 6 | \n
\n \n 2013-01-04 | \n 13 | \n 14 | \n
\n \n 2013-01-06 | \n 21 | \n 22 | \n
\n \n
\n
"
352 | },
353 | "execution_count": 20,
354 | "metadata": {},
355 | "output_type": "execute_result"
356 | }
357 | ],
358 | "source": [
359 | "df.iloc[[1,3,5], 1:3]"
360 | ],
361 | "metadata": {
362 | "collapsed": false,
363 | "pycharm": {
364 | "name": "#%%\n"
365 | }
366 | }
367 | },
368 | {
369 | "cell_type": "markdown",
370 | "source": [
371 | "### ix [版本过期]"
372 | ],
373 | "metadata": {
374 | "collapsed": false,
375 | "pycharm": {
376 | "name": "#%% md\n"
377 | }
378 | }
379 | },
380 | {
381 | "cell_type": "code",
382 | "execution_count": 21,
383 | "outputs": [
384 | {
385 | "ename": "AttributeError",
386 | "evalue": "'DataFrame' object has no attribute 'ix'",
387 | "output_type": "error",
388 | "traceback": [
389 | "\u001B[1;31m---------------------------------------------------------------------------\u001B[0m",
390 | "\u001B[1;31mAttributeError\u001B[0m Traceback (most recent call last)",
391 | "\u001B[1;32m\u001B[0m in \u001B[0;36m\u001B[1;34m\u001B[0m\n\u001B[1;32m----> 1\u001B[1;33m \u001B[0mdf\u001B[0m\u001B[1;33m.\u001B[0m\u001B[0mix\u001B[0m\u001B[1;33m[\u001B[0m\u001B[1;33m:\u001B[0m\u001B[1;36m3\u001B[0m\u001B[1;33m,\u001B[0m \u001B[1;33m[\u001B[0m\u001B[1;34m'A'\u001B[0m\u001B[1;33m,\u001B[0m \u001B[1;34m'C'\u001B[0m\u001B[1;33m]\u001B[0m\u001B[1;33m]\u001B[0m\u001B[1;33m\u001B[0m\u001B[1;33m\u001B[0m\u001B[0m\n\u001B[0m",
392 | "\u001B[1;32mD:\\anaconda3\\lib\\site-packages\\pandas\\core\\generic.py\u001B[0m in \u001B[0;36m__getattr__\u001B[1;34m(self, name)\u001B[0m\n\u001B[0;32m 5463\u001B[0m \u001B[1;32mif\u001B[0m \u001B[0mself\u001B[0m\u001B[1;33m.\u001B[0m\u001B[0m_info_axis\u001B[0m\u001B[1;33m.\u001B[0m\u001B[0m_can_hold_identifiers_and_holds_name\u001B[0m\u001B[1;33m(\u001B[0m\u001B[0mname\u001B[0m\u001B[1;33m)\u001B[0m\u001B[1;33m:\u001B[0m\u001B[1;33m\u001B[0m\u001B[1;33m\u001B[0m\u001B[0m\n\u001B[0;32m 5464\u001B[0m \u001B[1;32mreturn\u001B[0m \u001B[0mself\u001B[0m\u001B[1;33m[\u001B[0m\u001B[0mname\u001B[0m\u001B[1;33m]\u001B[0m\u001B[1;33m\u001B[0m\u001B[1;33m\u001B[0m\u001B[0m\n\u001B[1;32m-> 5465\u001B[1;33m \u001B[1;32mreturn\u001B[0m \u001B[0mobject\u001B[0m\u001B[1;33m.\u001B[0m\u001B[0m__getattribute__\u001B[0m\u001B[1;33m(\u001B[0m\u001B[0mself\u001B[0m\u001B[1;33m,\u001B[0m \u001B[0mname\u001B[0m\u001B[1;33m)\u001B[0m\u001B[1;33m\u001B[0m\u001B[1;33m\u001B[0m\u001B[0m\n\u001B[0m\u001B[0;32m 5466\u001B[0m \u001B[1;33m\u001B[0m\u001B[0m\n\u001B[0;32m 5467\u001B[0m \u001B[1;32mdef\u001B[0m \u001B[0m__setattr__\u001B[0m\u001B[1;33m(\u001B[0m\u001B[0mself\u001B[0m\u001B[1;33m,\u001B[0m \u001B[0mname\u001B[0m\u001B[1;33m:\u001B[0m \u001B[0mstr\u001B[0m\u001B[1;33m,\u001B[0m \u001B[0mvalue\u001B[0m\u001B[1;33m)\u001B[0m \u001B[1;33m->\u001B[0m \u001B[1;32mNone\u001B[0m\u001B[1;33m:\u001B[0m\u001B[1;33m\u001B[0m\u001B[1;33m\u001B[0m\u001B[0m\n",
393 | "\u001B[1;31mAttributeError\u001B[0m: 'DataFrame' object has no attribute 'ix'"
394 | ]
395 | }
396 | ],
397 | "source": [
398 | "df.ix[:3, ['A', 'C']]"
399 | ],
400 | "metadata": {
401 | "collapsed": false,
402 | "pycharm": {
403 | "name": "#%%\n"
404 | }
405 | }
406 | },
407 | {
408 | "cell_type": "markdown",
409 | "source": [
410 | "### 筛选"
411 | ],
412 | "metadata": {
413 | "collapsed": false,
414 | "pycharm": {
415 | "name": "#%% md\n"
416 | }
417 | }
418 | },
419 | {
420 | "cell_type": "code",
421 | "execution_count": 22,
422 | "outputs": [
423 | {
424 | "data": {
425 | "text/plain": " A B C D\n2013-01-04 12 13 14 15\n2013-01-05 16 17 18 19\n2013-01-06 20 21 22 23",
426 | "text/html": "\n\n
\n \n \n | \n A | \n B | \n C | \n D | \n
\n \n \n \n 2013-01-04 | \n 12 | \n 13 | \n 14 | \n 15 | \n
\n \n 2013-01-05 | \n 16 | \n 17 | \n 18 | \n 19 | \n
\n \n 2013-01-06 | \n 20 | \n 21 | \n 22 | \n 23 | \n
\n \n
\n
"
427 | },
428 | "execution_count": 22,
429 | "metadata": {},
430 | "output_type": "execute_result"
431 | }
432 | ],
433 | "source": [
434 | "df[df.A > 8]"
435 | ],
436 | "metadata": {
437 | "collapsed": false,
438 | "pycharm": {
439 | "name": "#%%\n"
440 | }
441 | }
442 | },
443 | {
444 | "cell_type": "code",
445 | "execution_count": null,
446 | "outputs": [],
447 | "source": [],
448 | "metadata": {
449 | "collapsed": false,
450 | "pycharm": {
451 | "name": "#%%\n"
452 | }
453 | }
454 | }
455 | ],
456 | "metadata": {
457 | "kernelspec": {
458 | "display_name": "Python 3",
459 | "language": "python",
460 | "name": "python3"
461 | },
462 | "language_info": {
463 | "codemirror_mode": {
464 | "name": "ipython",
465 | "version": 2
466 | },
467 | "file_extension": ".py",
468 | "mimetype": "text/x-python",
469 | "name": "python",
470 | "nbconvert_exporter": "python",
471 | "pygments_lexer": "ipython2",
472 | "version": "2.7.6"
473 | }
474 | },
475 | "nbformat": 4,
476 | "nbformat_minor": 0
477 | }
--------------------------------------------------------------------------------
/docs/data-analysis/files/02-setting-value.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {
6 | "collapsed": true,
7 | "pycharm": {
8 | "name": "#%% md\n"
9 | }
10 | },
11 | "source": [
12 | "## 设置值"
13 | ]
14 | },
15 | {
16 | "cell_type": "code",
17 | "execution_count": 18,
18 | "outputs": [],
19 | "source": [
20 | "import pandas as pd\n",
21 | "import numpy as np"
22 | ],
23 | "metadata": {
24 | "collapsed": false,
25 | "pycharm": {
26 | "name": "#%%\n"
27 | }
28 | }
29 | },
30 | {
31 | "cell_type": "code",
32 | "execution_count": 19,
33 | "outputs": [],
34 | "source": [
35 | "dates = pd.date_range('20130101', periods=6)\n",
36 | "df = pd.DataFrame(np.arange(24).reshape((6,4)), index=dates, columns=['A', 'B', 'C', 'D'])"
37 | ],
38 | "metadata": {
39 | "collapsed": false,
40 | "pycharm": {
41 | "name": "#%%\n"
42 | }
43 | }
44 | },
45 | {
46 | "cell_type": "code",
47 | "execution_count": 20,
48 | "outputs": [
49 | {
50 | "data": {
51 | "text/plain": " A B C D\n2013-01-01 0 2222 2 3\n2013-01-02 4 5 6 7\n2013-01-03 8 9 1111 11\n2013-01-04 12 13 14 15\n2013-01-05 16 17 18 19\n2013-01-06 20 21 22 23",
52 | "text/html": "\n\n
\n \n \n | \n A | \n B | \n C | \n D | \n
\n \n \n \n 2013-01-01 | \n 0 | \n 2222 | \n 2 | \n 3 | \n
\n \n 2013-01-02 | \n 4 | \n 5 | \n 6 | \n 7 | \n
\n \n 2013-01-03 | \n 8 | \n 9 | \n 1111 | \n 11 | \n
\n \n 2013-01-04 | \n 12 | \n 13 | \n 14 | \n 15 | \n
\n \n 2013-01-05 | \n 16 | \n 17 | \n 18 | \n 19 | \n
\n \n 2013-01-06 | \n 20 | \n 21 | \n 22 | \n 23 | \n
\n \n
\n
"
53 | },
54 | "execution_count": 20,
55 | "metadata": {},
56 | "output_type": "execute_result"
57 | }
58 | ],
59 | "source": [
60 | "df.iloc[2, 2] = 1111\n",
61 | "df.loc['20130101', 'B'] = 2222\n",
62 | "df"
63 | ],
64 | "metadata": {
65 | "collapsed": false,
66 | "pycharm": {
67 | "name": "#%%\n"
68 | }
69 | }
70 | },
71 | {
72 | "cell_type": "code",
73 | "execution_count": 21,
74 | "outputs": [
75 | {
76 | "data": {
77 | "text/plain": " A B C D\n2013-01-01 0 2222 2 3\n2013-01-02 4 5 6 7\n2013-01-03 8 0 1111 11\n2013-01-04 12 0 14 15\n2013-01-05 16 0 18 19\n2013-01-06 20 0 22 23",
78 | "text/html": "\n\n
\n \n \n | \n A | \n B | \n C | \n D | \n
\n \n \n \n 2013-01-01 | \n 0 | \n 2222 | \n 2 | \n 3 | \n
\n \n 2013-01-02 | \n 4 | \n 5 | \n 6 | \n 7 | \n
\n \n 2013-01-03 | \n 8 | \n 0 | \n 1111 | \n 11 | \n
\n \n 2013-01-04 | \n 12 | \n 0 | \n 14 | \n 15 | \n
\n \n 2013-01-05 | \n 16 | \n 0 | \n 18 | \n 19 | \n
\n \n 2013-01-06 | \n 20 | \n 0 | \n 22 | \n 23 | \n
\n \n
\n
"
79 | },
80 | "execution_count": 21,
81 | "metadata": {},
82 | "output_type": "execute_result"
83 | }
84 | ],
85 | "source": [
86 | "df.B[df.A>4] = 0\n",
87 | "df"
88 | ],
89 | "metadata": {
90 | "collapsed": false,
91 | "pycharm": {
92 | "name": "#%%\n"
93 | }
94 | }
95 | },
96 | {
97 | "cell_type": "code",
98 | "execution_count": 22,
99 | "outputs": [
100 | {
101 | "data": {
102 | "text/plain": " A B C D E\n2013-01-01 0 2222 2 3 NaN\n2013-01-02 4 5 6 7 NaN\n2013-01-03 8 0 1111 11 NaN\n2013-01-04 12 0 14 15 NaN\n2013-01-05 16 0 18 19 NaN\n2013-01-06 20 0 22 23 NaN",
103 | "text/html": "\n\n
\n \n \n | \n A | \n B | \n C | \n D | \n E | \n
\n \n \n \n 2013-01-01 | \n 0 | \n 2222 | \n 2 | \n 3 | \n NaN | \n
\n \n 2013-01-02 | \n 4 | \n 5 | \n 6 | \n 7 | \n NaN | \n
\n \n 2013-01-03 | \n 8 | \n 0 | \n 1111 | \n 11 | \n NaN | \n
\n \n 2013-01-04 | \n 12 | \n 0 | \n 14 | \n 15 | \n NaN | \n
\n \n 2013-01-05 | \n 16 | \n 0 | \n 18 | \n 19 | \n NaN | \n
\n \n 2013-01-06 | \n 20 | \n 0 | \n 22 | \n 23 | \n NaN | \n
\n \n
\n
"
104 | },
105 | "execution_count": 22,
106 | "metadata": {},
107 | "output_type": "execute_result"
108 | }
109 | ],
110 | "source": [
111 | "df['E'] = np.nan\n",
112 | "df"
113 | ],
114 | "metadata": {
115 | "collapsed": false,
116 | "pycharm": {
117 | "name": "#%%\n"
118 | }
119 | }
120 | },
121 | {
122 | "cell_type": "code",
123 | "execution_count": 25,
124 | "outputs": [
125 | {
126 | "data": {
127 | "text/plain": " A B C D E F\n2013-01-01 0 2222 2 3 NaN NaN\n2013-01-02 4 5 6 7 NaN NaN\n2013-01-03 8 0 1111 11 NaN NaN\n2013-01-04 12 0 14 15 NaN NaN\n2013-01-05 16 0 18 19 NaN NaN\n2013-01-06 20 0 22 23 NaN NaN",
128 | "text/html": "\n\n
\n \n \n | \n A | \n B | \n C | \n D | \n E | \n F | \n
\n \n \n \n 2013-01-01 | \n 0 | \n 2222 | \n 2 | \n 3 | \n NaN | \n NaN | \n
\n \n 2013-01-02 | \n 4 | \n 5 | \n 6 | \n 7 | \n NaN | \n NaN | \n
\n \n 2013-01-03 | \n 8 | \n 0 | \n 1111 | \n 11 | \n NaN | \n NaN | \n
\n \n 2013-01-04 | \n 12 | \n 0 | \n 14 | \n 15 | \n NaN | \n NaN | \n
\n \n 2013-01-05 | \n 16 | \n 0 | \n 18 | \n 19 | \n NaN | \n NaN | \n
\n \n 2013-01-06 | \n 20 | \n 0 | \n 22 | \n 23 | \n NaN | \n NaN | \n
\n \n
\n
"
129 | },
130 | "execution_count": 25,
131 | "metadata": {},
132 | "output_type": "execute_result"
133 | }
134 | ],
135 | "source": [
136 | "# 如果没有index=pd.date_range('20130101', periods=6) 就会是NAN\n",
137 | "df['F'] = pd.Series([1,2,3,4,5,6], index=pd.date_range('20130101', periods=6))\n",
138 | "df"
139 | ],
140 | "metadata": {
141 | "collapsed": false,
142 | "pycharm": {
143 | "name": "#%%\n"
144 | }
145 | }
146 | },
147 | {
148 | "cell_type": "code",
149 | "execution_count": null,
150 | "outputs": [],
151 | "source": [],
152 | "metadata": {
153 | "collapsed": false,
154 | "pycharm": {
155 | "name": "#%%\n"
156 | }
157 | }
158 | }
159 | ],
160 | "metadata": {
161 | "kernelspec": {
162 | "display_name": "Python 3",
163 | "language": "python",
164 | "name": "python3"
165 | },
166 | "language_info": {
167 | "codemirror_mode": {
168 | "name": "ipython",
169 | "version": 2
170 | },
171 | "file_extension": ".py",
172 | "mimetype": "text/x-python",
173 | "name": "python",
174 | "nbconvert_exporter": "python",
175 | "pygments_lexer": "ipython2",
176 | "version": "2.7.6"
177 | }
178 | },
179 | "nbformat": 4,
180 | "nbformat_minor": 0
181 | }
--------------------------------------------------------------------------------
/docs/data-analysis/files/03-process-dropout-data.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {
6 | "collapsed": true,
7 | "pycharm": {
8 | "name": "#%% md\n"
9 | }
10 | },
11 | "source": [
12 | "## 处理丢失数据"
13 | ]
14 | },
15 | {
16 | "cell_type": "code",
17 | "execution_count": 2,
18 | "outputs": [],
19 | "source": [
20 | "import pandas as pd\n",
21 | "import numpy as np"
22 | ],
23 | "metadata": {
24 | "collapsed": false,
25 | "pycharm": {
26 | "name": "#%%\n"
27 | }
28 | }
29 | },
30 | {
31 | "cell_type": "code",
32 | "execution_count": 3,
33 | "outputs": [],
34 | "source": [
35 | "dates = pd.date_range('20130101', periods=6)\n",
36 | "df = pd.DataFrame(np.arange(24).reshape((6,4)), index=dates, columns=['A', 'B', 'C', 'D'])"
37 | ],
38 | "metadata": {
39 | "collapsed": false,
40 | "pycharm": {
41 | "name": "#%%\n"
42 | }
43 | }
44 | },
45 | {
46 | "cell_type": "code",
47 | "execution_count": 10,
48 | "outputs": [],
49 | "source": [
50 | "df.iloc[0, 1] = np.nan\n",
51 | "df.iloc[1, 2] = np.nan"
52 | ],
53 | "metadata": {
54 | "collapsed": false,
55 | "pycharm": {
56 | "name": "#%%\n"
57 | }
58 | }
59 | },
60 | {
61 | "cell_type": "code",
62 | "execution_count": 11,
63 | "outputs": [
64 | {
65 | "data": {
66 | "text/plain": " A B C D\n2013-01-01 0 NaN 2.0 3\n2013-01-02 4 5.0 NaN 7\n2013-01-03 8 9.0 10.0 11\n2013-01-04 12 13.0 14.0 15\n2013-01-05 16 17.0 18.0 19\n2013-01-06 20 21.0 22.0 23",
67 | "text/html": "\n\n
\n \n \n | \n A | \n B | \n C | \n D | \n
\n \n \n \n 2013-01-01 | \n 0 | \n NaN | \n 2.0 | \n 3 | \n
\n \n 2013-01-02 | \n 4 | \n 5.0 | \n NaN | \n 7 | \n
\n \n 2013-01-03 | \n 8 | \n 9.0 | \n 10.0 | \n 11 | \n
\n \n 2013-01-04 | \n 12 | \n 13.0 | \n 14.0 | \n 15 | \n
\n \n 2013-01-05 | \n 16 | \n 17.0 | \n 18.0 | \n 19 | \n
\n \n 2013-01-06 | \n 20 | \n 21.0 | \n 22.0 | \n 23 | \n
\n \n
\n
"
68 | },
69 | "execution_count": 11,
70 | "metadata": {},
71 | "output_type": "execute_result"
72 | }
73 | ],
74 | "source": [
75 | "df"
76 | ],
77 | "metadata": {
78 | "collapsed": false,
79 | "pycharm": {
80 | "name": "#%%\n"
81 | }
82 | }
83 | },
84 | {
85 | "cell_type": "code",
86 | "execution_count": 12,
87 | "outputs": [
88 | {
89 | "name": "stdout",
90 | "output_type": "stream",
91 | "text": [
92 | " A B C D\n",
93 | "2013-01-03 8 9.0 10.0 11\n",
94 | "2013-01-04 12 13.0 14.0 15\n",
95 | "2013-01-05 16 17.0 18.0 19\n",
96 | "2013-01-06 20 21.0 22.0 23\n"
97 | ]
98 | }
99 | ],
100 | "source": [
101 | "# 直接删除整行 / axis = 1 删除整列\n",
102 | "print(df.dropna(axis=0, how='any')) # how = {'any', 'all'}"
103 | ],
104 | "metadata": {
105 | "collapsed": false,
106 | "pycharm": {
107 | "name": "#%%\n"
108 | }
109 | }
110 | },
111 | {
112 | "cell_type": "code",
113 | "execution_count": 13,
114 | "outputs": [
115 | {
116 | "data": {
117 | "text/plain": " A B C D\n2013-01-01 0 0.0 2.0 3\n2013-01-02 4 5.0 0.0 7\n2013-01-03 8 9.0 10.0 11\n2013-01-04 12 13.0 14.0 15\n2013-01-05 16 17.0 18.0 19\n2013-01-06 20 21.0 22.0 23",
118 | "text/html": "\n\n
\n \n \n | \n A | \n B | \n C | \n D | \n
\n \n \n \n 2013-01-01 | \n 0 | \n 0.0 | \n 2.0 | \n 3 | \n
\n \n 2013-01-02 | \n 4 | \n 5.0 | \n 0.0 | \n 7 | \n
\n \n 2013-01-03 | \n 8 | \n 9.0 | \n 10.0 | \n 11 | \n
\n \n 2013-01-04 | \n 12 | \n 13.0 | \n 14.0 | \n 15 | \n
\n \n 2013-01-05 | \n 16 | \n 17.0 | \n 18.0 | \n 19 | \n
\n \n 2013-01-06 | \n 20 | \n 21.0 | \n 22.0 | \n 23 | \n
\n \n
\n
"
119 | },
120 | "execution_count": 13,
121 | "metadata": {},
122 | "output_type": "execute_result"
123 | }
124 | ],
125 | "source": [
126 | "df = df.fillna(value=0)\n",
127 | "df"
128 | ],
129 | "metadata": {
130 | "collapsed": false,
131 | "pycharm": {
132 | "name": "#%%\n"
133 | }
134 | }
135 | },
136 | {
137 | "cell_type": "code",
138 | "execution_count": 18,
139 | "outputs": [
140 | {
141 | "name": "stdout",
142 | "output_type": "stream",
143 | "text": [
144 | "False\n"
145 | ]
146 | }
147 | ],
148 | "source": [
149 | "# 是否丢失数据\n",
150 | "print(np.any(df.isnull()) == True)"
151 | ],
152 | "metadata": {
153 | "collapsed": false,
154 | "pycharm": {
155 | "name": "#%%\n"
156 | }
157 | }
158 | },
159 | {
160 | "cell_type": "code",
161 | "execution_count": null,
162 | "outputs": [],
163 | "source": [],
164 | "metadata": {
165 | "collapsed": false,
166 | "pycharm": {
167 | "name": "#%%\n"
168 | }
169 | }
170 | }
171 | ],
172 | "metadata": {
173 | "kernelspec": {
174 | "display_name": "Python 3",
175 | "language": "python",
176 | "name": "python3"
177 | },
178 | "language_info": {
179 | "codemirror_mode": {
180 | "name": "ipython",
181 | "version": 2
182 | },
183 | "file_extension": ".py",
184 | "mimetype": "text/x-python",
185 | "name": "python",
186 | "nbconvert_exporter": "python",
187 | "pygments_lexer": "ipython2",
188 | "version": "2.7.6"
189 | }
190 | },
191 | "nbformat": 4,
192 | "nbformat_minor": 0
193 | }
--------------------------------------------------------------------------------
/docs/data-analysis/files/04-import-and-export.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 1,
6 | "metadata": {
7 | "collapsed": true
8 | },
9 | "outputs": [],
10 | "source": [
11 | "import pandas as pd"
12 | ]
13 | },
14 | {
15 | "cell_type": "code",
16 | "execution_count": 2,
17 | "outputs": [],
18 | "source": [
19 | "students = pd.read_csv('./student.csv')"
20 | ],
21 | "metadata": {
22 | "collapsed": false,
23 | "pycharm": {
24 | "name": "#%%\n"
25 | }
26 | }
27 | },
28 | {
29 | "cell_type": "code",
30 | "execution_count": 3,
31 | "outputs": [
32 | {
33 | "data": {
34 | "text/plain": " Student ID name age gender\n0 1100 Kelly 22 Female\n1 1101 Clo 21 Female\n2 1102 Tilly 22 Female\n3 1103 Tony 24 Male\n4 1104 David 20 Male\n5 1105 Catty 22 Female\n6 1106 M 3 Female\n7 1107 N 43 Male\n8 1108 A 13 Male\n9 1109 S 12 Male\n10 1110 David 33 Male\n11 1111 Dw 3 Female\n12 1112 Q 23 Male\n13 1113 W 21 Female",
35 | "text/html": "\n\n
\n \n \n | \n Student ID | \n name | \n age | \n gender | \n
\n \n \n \n 0 | \n 1100 | \n Kelly | \n 22 | \n Female | \n
\n \n 1 | \n 1101 | \n Clo | \n 21 | \n Female | \n
\n \n 2 | \n 1102 | \n Tilly | \n 22 | \n Female | \n
\n \n 3 | \n 1103 | \n Tony | \n 24 | \n Male | \n
\n \n 4 | \n 1104 | \n David | \n 20 | \n Male | \n
\n \n 5 | \n 1105 | \n Catty | \n 22 | \n Female | \n
\n \n 6 | \n 1106 | \n M | \n 3 | \n Female | \n
\n \n 7 | \n 1107 | \n N | \n 43 | \n Male | \n
\n \n 8 | \n 1108 | \n A | \n 13 | \n Male | \n
\n \n 9 | \n 1109 | \n S | \n 12 | \n Male | \n
\n \n 10 | \n 1110 | \n David | \n 33 | \n Male | \n
\n \n 11 | \n 1111 | \n Dw | \n 3 | \n Female | \n
\n \n 12 | \n 1112 | \n Q | \n 23 | \n Male | \n
\n \n 13 | \n 1113 | \n W | \n 21 | \n Female | \n
\n \n
\n
"
36 | },
37 | "execution_count": 3,
38 | "metadata": {},
39 | "output_type": "execute_result"
40 | }
41 | ],
42 | "source": [
43 | "students"
44 | ],
45 | "metadata": {
46 | "collapsed": false,
47 | "pycharm": {
48 | "name": "#%%\n"
49 | }
50 | }
51 | },
52 | {
53 | "cell_type": "code",
54 | "execution_count": null,
55 | "outputs": [],
56 | "source": [
57 | "# 保存\n",
58 | "students.to_pickle()"
59 | ],
60 | "metadata": {
61 | "collapsed": false,
62 | "pycharm": {
63 | "name": "#%%\n"
64 | }
65 | }
66 | }
67 | ],
68 | "metadata": {
69 | "kernelspec": {
70 | "display_name": "Python 3",
71 | "language": "python",
72 | "name": "python3"
73 | },
74 | "language_info": {
75 | "codemirror_mode": {
76 | "name": "ipython",
77 | "version": 2
78 | },
79 | "file_extension": ".py",
80 | "mimetype": "text/x-python",
81 | "name": "python",
82 | "nbconvert_exporter": "python",
83 | "pygments_lexer": "ipython2",
84 | "version": "2.7.6"
85 | }
86 | },
87 | "nbformat": 4,
88 | "nbformat_minor": 0
89 | }
--------------------------------------------------------------------------------
/docs/data-analysis/files/05-concat.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 1,
6 | "metadata": {
7 | "collapsed": true
8 | },
9 | "outputs": [],
10 | "source": [
11 | "import pandas as pd\n",
12 | "import numpy as np"
13 | ]
14 | },
15 | {
16 | "cell_type": "code",
17 | "execution_count": 2,
18 | "outputs": [],
19 | "source": [
20 | "df1 = pd.DataFrame(np.ones((3,4))*0, columns=['a','b','c','d'])\n",
21 | "df2 = pd.DataFrame(np.ones((3,4))*1, columns=['a','b','c','d'])\n",
22 | "df3 = pd.DataFrame(np.ones((3,4))*2, columns=['a','b','c','d'])"
23 | ],
24 | "metadata": {
25 | "collapsed": false,
26 | "pycharm": {
27 | "name": "#%%\n"
28 | }
29 | }
30 | },
31 | {
32 | "cell_type": "code",
33 | "execution_count": 3,
34 | "outputs": [
35 | {
36 | "data": {
37 | "text/plain": " a b c d\n0 0.0 0.0 0.0 0.0\n1 0.0 0.0 0.0 0.0\n2 0.0 0.0 0.0 0.0",
38 | "text/html": "\n\n
\n \n \n | \n a | \n b | \n c | \n d | \n
\n \n \n \n 0 | \n 0.0 | \n 0.0 | \n 0.0 | \n 0.0 | \n
\n \n 1 | \n 0.0 | \n 0.0 | \n 0.0 | \n 0.0 | \n
\n \n 2 | \n 0.0 | \n 0.0 | \n 0.0 | \n 0.0 | \n
\n \n
\n
"
39 | },
40 | "execution_count": 3,
41 | "metadata": {},
42 | "output_type": "execute_result"
43 | }
44 | ],
45 | "source": [
46 | "df1"
47 | ],
48 | "metadata": {
49 | "collapsed": false,
50 | "pycharm": {
51 | "name": "#%%\n"
52 | }
53 | }
54 | },
55 | {
56 | "cell_type": "code",
57 | "execution_count": 4,
58 | "outputs": [
59 | {
60 | "data": {
61 | "text/plain": " a b c d\n0 1.0 1.0 1.0 1.0\n1 1.0 1.0 1.0 1.0\n2 1.0 1.0 1.0 1.0",
62 | "text/html": "\n\n
\n \n \n | \n a | \n b | \n c | \n d | \n
\n \n \n \n 0 | \n 1.0 | \n 1.0 | \n 1.0 | \n 1.0 | \n
\n \n 1 | \n 1.0 | \n 1.0 | \n 1.0 | \n 1.0 | \n
\n \n 2 | \n 1.0 | \n 1.0 | \n 1.0 | \n 1.0 | \n
\n \n
\n
"
63 | },
64 | "execution_count": 4,
65 | "metadata": {},
66 | "output_type": "execute_result"
67 | }
68 | ],
69 | "source": [
70 | "df2"
71 | ],
72 | "metadata": {
73 | "collapsed": false,
74 | "pycharm": {
75 | "name": "#%%\n"
76 | }
77 | }
78 | },
79 | {
80 | "cell_type": "code",
81 | "execution_count": 5,
82 | "outputs": [
83 | {
84 | "data": {
85 | "text/plain": " a b c d\n0 2.0 2.0 2.0 2.0\n1 2.0 2.0 2.0 2.0\n2 2.0 2.0 2.0 2.0",
86 | "text/html": "\n\n
\n \n \n | \n a | \n b | \n c | \n d | \n
\n \n \n \n 0 | \n 2.0 | \n 2.0 | \n 2.0 | \n 2.0 | \n
\n \n 1 | \n 2.0 | \n 2.0 | \n 2.0 | \n 2.0 | \n
\n \n 2 | \n 2.0 | \n 2.0 | \n 2.0 | \n 2.0 | \n
\n \n
\n
"
87 | },
88 | "execution_count": 5,
89 | "metadata": {},
90 | "output_type": "execute_result"
91 | }
92 | ],
93 | "source": [
94 | "df3"
95 | ],
96 | "metadata": {
97 | "collapsed": false,
98 | "pycharm": {
99 | "name": "#%%\n"
100 | }
101 | }
102 | },
103 | {
104 | "cell_type": "code",
105 | "execution_count": 6,
106 | "outputs": [
107 | {
108 | "data": {
109 | "text/plain": " a b c d\n0 0.0 0.0 0.0 0.0\n1 0.0 0.0 0.0 0.0\n2 0.0 0.0 0.0 0.0\n0 1.0 1.0 1.0 1.0\n1 1.0 1.0 1.0 1.0\n2 1.0 1.0 1.0 1.0\n0 2.0 2.0 2.0 2.0\n1 2.0 2.0 2.0 2.0\n2 2.0 2.0 2.0 2.0",
110 | "text/html": "\n\n
\n \n \n | \n a | \n b | \n c | \n d | \n
\n \n \n \n 0 | \n 0.0 | \n 0.0 | \n 0.0 | \n 0.0 | \n
\n \n 1 | \n 0.0 | \n 0.0 | \n 0.0 | \n 0.0 | \n
\n \n 2 | \n 0.0 | \n 0.0 | \n 0.0 | \n 0.0 | \n
\n \n 0 | \n 1.0 | \n 1.0 | \n 1.0 | \n 1.0 | \n
\n \n 1 | \n 1.0 | \n 1.0 | \n 1.0 | \n 1.0 | \n
\n \n 2 | \n 1.0 | \n 1.0 | \n 1.0 | \n 1.0 | \n
\n \n 0 | \n 2.0 | \n 2.0 | \n 2.0 | \n 2.0 | \n
\n \n 1 | \n 2.0 | \n 2.0 | \n 2.0 | \n 2.0 | \n
\n \n 2 | \n 2.0 | \n 2.0 | \n 2.0 | \n 2.0 | \n
\n \n
\n
"
111 | },
112 | "execution_count": 6,
113 | "metadata": {},
114 | "output_type": "execute_result"
115 | }
116 | ],
117 | "source": [
118 | "res = pd.concat([df1,df2,df3], axis=0)\n",
119 | "res"
120 | ],
121 | "metadata": {
122 | "collapsed": false,
123 | "pycharm": {
124 | "name": "#%%\n"
125 | }
126 | }
127 | },
128 | {
129 | "cell_type": "code",
130 | "execution_count": 7,
131 | "outputs": [
132 | {
133 | "data": {
134 | "text/plain": " a b c d\n0 0.0 0.0 0.0 0.0\n1 0.0 0.0 0.0 0.0\n2 0.0 0.0 0.0 0.0\n3 1.0 1.0 1.0 1.0\n4 1.0 1.0 1.0 1.0\n5 1.0 1.0 1.0 1.0\n6 2.0 2.0 2.0 2.0\n7 2.0 2.0 2.0 2.0\n8 2.0 2.0 2.0 2.0",
135 | "text/html": "\n\n
\n \n \n | \n a | \n b | \n c | \n d | \n
\n \n \n \n 0 | \n 0.0 | \n 0.0 | \n 0.0 | \n 0.0 | \n
\n \n 1 | \n 0.0 | \n 0.0 | \n 0.0 | \n 0.0 | \n
\n \n 2 | \n 0.0 | \n 0.0 | \n 0.0 | \n 0.0 | \n
\n \n 3 | \n 1.0 | \n 1.0 | \n 1.0 | \n 1.0 | \n
\n \n 4 | \n 1.0 | \n 1.0 | \n 1.0 | \n 1.0 | \n
\n \n 5 | \n 1.0 | \n 1.0 | \n 1.0 | \n 1.0 | \n
\n \n 6 | \n 2.0 | \n 2.0 | \n 2.0 | \n 2.0 | \n
\n \n 7 | \n 2.0 | \n 2.0 | \n 2.0 | \n 2.0 | \n
\n \n 8 | \n 2.0 | \n 2.0 | \n 2.0 | \n 2.0 | \n
\n \n
\n
"
136 | },
137 | "execution_count": 7,
138 | "metadata": {},
139 | "output_type": "execute_result"
140 | }
141 | ],
142 | "source": [
143 | "res = pd.concat([df1,df2,df3], axis=0, ignore_index=True)\n",
144 | "res"
145 | ],
146 | "metadata": {
147 | "collapsed": false,
148 | "pycharm": {
149 | "name": "#%%\n"
150 | }
151 | }
152 | },
153 | {
154 | "cell_type": "markdown",
155 | "source": [
156 | "### join\n",
157 | "* inner\n",
158 | "* outer"
159 | ],
160 | "metadata": {
161 | "collapsed": false,
162 | "pycharm": {
163 | "name": "#%% md\n"
164 | }
165 | }
166 | },
167 | {
168 | "cell_type": "code",
169 | "execution_count": 8,
170 | "outputs": [],
171 | "source": [
172 | "df1 = pd.DataFrame(np.ones((3,4))*0, columns=['a','b','c','d'], index=[1,2,3])\n",
173 | "df2 = pd.DataFrame(np.ones((3,4))*1, columns=['b','c','d', 'e'], index=[2,3,4])"
174 | ],
175 | "metadata": {
176 | "collapsed": false,
177 | "pycharm": {
178 | "name": "#%%\n"
179 | }
180 | }
181 | },
182 | {
183 | "cell_type": "code",
184 | "execution_count": 9,
185 | "outputs": [
186 | {
187 | "data": {
188 | "text/plain": " a b c d\n1 0.0 0.0 0.0 0.0\n2 0.0 0.0 0.0 0.0\n3 0.0 0.0 0.0 0.0",
189 | "text/html": "\n\n
\n \n \n | \n a | \n b | \n c | \n d | \n
\n \n \n \n 1 | \n 0.0 | \n 0.0 | \n 0.0 | \n 0.0 | \n
\n \n 2 | \n 0.0 | \n 0.0 | \n 0.0 | \n 0.0 | \n
\n \n 3 | \n 0.0 | \n 0.0 | \n 0.0 | \n 0.0 | \n
\n \n
\n
"
190 | },
191 | "execution_count": 9,
192 | "metadata": {},
193 | "output_type": "execute_result"
194 | }
195 | ],
196 | "source": [
197 | "df1"
198 | ],
199 | "metadata": {
200 | "collapsed": false,
201 | "pycharm": {
202 | "name": "#%%\n"
203 | }
204 | }
205 | },
206 | {
207 | "cell_type": "code",
208 | "execution_count": 10,
209 | "outputs": [
210 | {
211 | "data": {
212 | "text/plain": " b c d e\n2 1.0 1.0 1.0 1.0\n3 1.0 1.0 1.0 1.0\n4 1.0 1.0 1.0 1.0",
213 | "text/html": "\n\n
\n \n \n | \n b | \n c | \n d | \n e | \n
\n \n \n \n 2 | \n 1.0 | \n 1.0 | \n 1.0 | \n 1.0 | \n
\n \n 3 | \n 1.0 | \n 1.0 | \n 1.0 | \n 1.0 | \n
\n \n 4 | \n 1.0 | \n 1.0 | \n 1.0 | \n 1.0 | \n
\n \n
\n
"
214 | },
215 | "execution_count": 10,
216 | "metadata": {},
217 | "output_type": "execute_result"
218 | }
219 | ],
220 | "source": [
221 | "df2"
222 | ],
223 | "metadata": {
224 | "collapsed": false,
225 | "pycharm": {
226 | "name": "#%%\n"
227 | }
228 | }
229 | },
230 | {
231 | "cell_type": "code",
232 | "execution_count": 11,
233 | "outputs": [
234 | {
235 | "data": {
236 | "text/plain": " a b c d e\n1 0.0 0.0 0.0 0.0 NaN\n2 0.0 0.0 0.0 0.0 NaN\n3 0.0 0.0 0.0 0.0 NaN\n2 NaN 1.0 1.0 1.0 1.0\n3 NaN 1.0 1.0 1.0 1.0\n4 NaN 1.0 1.0 1.0 1.0",
237 | "text/html": "\n\n
\n \n \n | \n a | \n b | \n c | \n d | \n e | \n
\n \n \n \n 1 | \n 0.0 | \n 0.0 | \n 0.0 | \n 0.0 | \n NaN | \n
\n \n 2 | \n 0.0 | \n 0.0 | \n 0.0 | \n 0.0 | \n NaN | \n
\n \n 3 | \n 0.0 | \n 0.0 | \n 0.0 | \n 0.0 | \n NaN | \n
\n \n 2 | \n NaN | \n 1.0 | \n 1.0 | \n 1.0 | \n 1.0 | \n
\n \n 3 | \n NaN | \n 1.0 | \n 1.0 | \n 1.0 | \n 1.0 | \n
\n \n 4 | \n NaN | \n 1.0 | \n 1.0 | \n 1.0 | \n 1.0 | \n
\n \n
\n
"
238 | },
239 | "execution_count": 11,
240 | "metadata": {},
241 | "output_type": "execute_result"
242 | }
243 | ],
244 | "source": [
245 | "res = pd.concat([df1, df2], axis=0)\n",
246 | "res"
247 | ],
248 | "metadata": {
249 | "collapsed": false,
250 | "pycharm": {
251 | "name": "#%%\n"
252 | }
253 | }
254 | },
255 | {
256 | "cell_type": "code",
257 | "execution_count": 13,
258 | "outputs": [
259 | {
260 | "data": {
261 | "text/plain": " a b c d e\n1 0.0 0.0 0.0 0.0 NaN\n2 0.0 0.0 0.0 0.0 NaN\n3 0.0 0.0 0.0 0.0 NaN\n2 NaN 1.0 1.0 1.0 1.0\n3 NaN 1.0 1.0 1.0 1.0\n4 NaN 1.0 1.0 1.0 1.0",
262 | "text/html": "\n\n
\n \n \n | \n a | \n b | \n c | \n d | \n e | \n
\n \n \n \n 1 | \n 0.0 | \n 0.0 | \n 0.0 | \n 0.0 | \n NaN | \n
\n \n 2 | \n 0.0 | \n 0.0 | \n 0.0 | \n 0.0 | \n NaN | \n
\n \n 3 | \n 0.0 | \n 0.0 | \n 0.0 | \n 0.0 | \n NaN | \n
\n \n 2 | \n NaN | \n 1.0 | \n 1.0 | \n 1.0 | \n 1.0 | \n
\n \n 3 | \n NaN | \n 1.0 | \n 1.0 | \n 1.0 | \n 1.0 | \n
\n \n 4 | \n NaN | \n 1.0 | \n 1.0 | \n 1.0 | \n 1.0 | \n
\n \n
\n
"
263 | },
264 | "execution_count": 13,
265 | "metadata": {},
266 | "output_type": "execute_result"
267 | }
268 | ],
269 | "source": [
270 | "res = pd.concat([df1, df2], axis=0, join='outer')\n",
271 | "res"
272 | ],
273 | "metadata": {
274 | "collapsed": false,
275 | "pycharm": {
276 | "name": "#%%\n"
277 | }
278 | }
279 | },
280 | {
281 | "cell_type": "code",
282 | "execution_count": 14,
283 | "outputs": [
284 | {
285 | "data": {
286 | "text/plain": " b c d\n1 0.0 0.0 0.0\n2 0.0 0.0 0.0\n3 0.0 0.0 0.0\n2 1.0 1.0 1.0\n3 1.0 1.0 1.0\n4 1.0 1.0 1.0",
287 | "text/html": "\n\n
\n \n \n | \n b | \n c | \n d | \n
\n \n \n \n 1 | \n 0.0 | \n 0.0 | \n 0.0 | \n
\n \n 2 | \n 0.0 | \n 0.0 | \n 0.0 | \n
\n \n 3 | \n 0.0 | \n 0.0 | \n 0.0 | \n
\n \n 2 | \n 1.0 | \n 1.0 | \n 1.0 | \n
\n \n 3 | \n 1.0 | \n 1.0 | \n 1.0 | \n
\n \n 4 | \n 1.0 | \n 1.0 | \n 1.0 | \n
\n \n
\n
"
288 | },
289 | "execution_count": 14,
290 | "metadata": {},
291 | "output_type": "execute_result"
292 | }
293 | ],
294 | "source": [
295 | "res = pd.concat([df1, df2], axis=0, join='inner')\n",
296 | "res"
297 | ],
298 | "metadata": {
299 | "collapsed": false,
300 | "pycharm": {
301 | "name": "#%%\n"
302 | }
303 | }
304 | },
305 | {
306 | "cell_type": "markdown",
307 | "source": [
308 | "### append"
309 | ],
310 | "metadata": {
311 | "collapsed": false,
312 | "pycharm": {
313 | "name": "#%% md\n"
314 | }
315 | }
316 | },
317 | {
318 | "cell_type": "code",
319 | "execution_count": 15,
320 | "outputs": [],
321 | "source": [
322 | "df1 = pd.DataFrame(np.ones((3,4))*0, columns=['a','b','c','d'])\n",
323 | "df2 = pd.DataFrame(np.ones((3,4))*1, columns=['a','b','c','d'])\n",
324 | "# df2 = pd.DataFrame(np.ones((3,4))*1, columns=['b','c','d', 'e'], index=[2,3,4])"
325 | ],
326 | "metadata": {
327 | "collapsed": false,
328 | "pycharm": {
329 | "name": "#%%\n"
330 | }
331 | }
332 | },
333 | {
334 | "cell_type": "code",
335 | "execution_count": 18,
336 | "outputs": [
337 | {
338 | "data": {
339 | "text/plain": " a b c d e\n0 0.0 0.0 0.0 0.0 NaN\n1 0.0 0.0 0.0 0.0 NaN\n2 0.0 0.0 0.0 0.0 NaN\n3 NaN 1.0 1.0 1.0 1.0\n4 NaN 1.0 1.0 1.0 1.0\n5 NaN 1.0 1.0 1.0 1.0",
340 | "text/html": "\n\n
\n \n \n | \n a | \n b | \n c | \n d | \n e | \n
\n \n \n \n 0 | \n 0.0 | \n 0.0 | \n 0.0 | \n 0.0 | \n NaN | \n
\n \n 1 | \n 0.0 | \n 0.0 | \n 0.0 | \n 0.0 | \n NaN | \n
\n \n 2 | \n 0.0 | \n 0.0 | \n 0.0 | \n 0.0 | \n NaN | \n
\n \n 3 | \n NaN | \n 1.0 | \n 1.0 | \n 1.0 | \n 1.0 | \n
\n \n 4 | \n NaN | \n 1.0 | \n 1.0 | \n 1.0 | \n 1.0 | \n
\n \n 5 | \n NaN | \n 1.0 | \n 1.0 | \n 1.0 | \n 1.0 | \n
\n \n
\n
"
341 | },
342 | "execution_count": 18,
343 | "metadata": {},
344 | "output_type": "execute_result"
345 | }
346 | ],
347 | "source": [
348 | "res = df1.append(df2, ignore_index=True)\n",
349 | "res"
350 | ],
351 | "metadata": {
352 | "collapsed": false,
353 | "pycharm": {
354 | "name": "#%%\n"
355 | }
356 | }
357 | },
358 | {
359 | "cell_type": "code",
360 | "execution_count": 20,
361 | "outputs": [
362 | {
363 | "data": {
364 | "text/plain": " a b c d\n0 0.0 0.0 0.0 0.0\n1 0.0 0.0 0.0 0.0\n2 0.0 0.0 0.0 0.0\n3 1.0 2.0 3.0 4.0",
365 | "text/html": "\n\n
\n \n \n | \n a | \n b | \n c | \n d | \n
\n \n \n \n 0 | \n 0.0 | \n 0.0 | \n 0.0 | \n 0.0 | \n
\n \n 1 | \n 0.0 | \n 0.0 | \n 0.0 | \n 0.0 | \n
\n \n 2 | \n 0.0 | \n 0.0 | \n 0.0 | \n 0.0 | \n
\n \n 3 | \n 1.0 | \n 2.0 | \n 3.0 | \n 4.0 | \n
\n \n
\n
"
366 | },
367 | "execution_count": 20,
368 | "metadata": {},
369 | "output_type": "execute_result"
370 | }
371 | ],
372 | "source": [
373 | "s1 = pd.Series([1,2,3,4], index=['a', 'b', 'c', 'd'])\n",
374 | "res = df1.append(s1, ignore_index=True)\n",
375 | "res"
376 | ],
377 | "metadata": {
378 | "collapsed": false,
379 | "pycharm": {
380 | "name": "#%%\n"
381 | }
382 | }
383 | },
384 | {
385 | "cell_type": "code",
386 | "execution_count": null,
387 | "outputs": [],
388 | "source": [],
389 | "metadata": {
390 | "collapsed": false,
391 | "pycharm": {
392 | "name": "#%%\n"
393 | }
394 | }
395 | }
396 | ],
397 | "metadata": {
398 | "kernelspec": {
399 | "display_name": "Python 3",
400 | "language": "python",
401 | "name": "python3"
402 | },
403 | "language_info": {
404 | "codemirror_mode": {
405 | "name": "ipython",
406 | "version": 2
407 | },
408 | "file_extension": ".py",
409 | "mimetype": "text/x-python",
410 | "name": "python",
411 | "nbconvert_exporter": "python",
412 | "pygments_lexer": "ipython2",
413 | "version": "2.7.6"
414 | }
415 | },
416 | "nbformat": 4,
417 | "nbformat_minor": 0
418 | }
--------------------------------------------------------------------------------
/docs/data-analysis/files/06-merge.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {
6 | "collapsed": true,
7 | "pycharm": {
8 | "name": "#%% md\n"
9 | }
10 | },
11 | "source": [
12 | "## merge"
13 | ]
14 | },
15 | {
16 | "cell_type": "code",
17 | "execution_count": 1,
18 | "outputs": [],
19 | "source": [
20 | "import pandas as pd"
21 | ],
22 | "metadata": {
23 | "collapsed": false,
24 | "pycharm": {
25 | "name": "#%%\n"
26 | }
27 | }
28 | },
29 | {
30 | "cell_type": "code",
31 | "execution_count": 2,
32 | "outputs": [],
33 | "source": [
34 | "left = pd.DataFrame({'key': ['K0', 'K1', 'K2', 'K3'],\n",
35 | " 'A': ['A0', 'A1', 'A2', 'A3'],\n",
36 | " 'B': ['B0', 'B1', 'B2', 'B3']})\n",
37 | "right = pd.DataFrame({'key': ['K0', 'K1', 'K2', 'K3'],\n",
38 | " 'C': ['C0', 'C1', 'C2', 'C3'],\n",
39 | " 'D': ['D0', 'D1', 'D2', 'D3']})"
40 | ],
41 | "metadata": {
42 | "collapsed": false,
43 | "pycharm": {
44 | "name": "#%%\n"
45 | }
46 | }
47 | },
48 | {
49 | "cell_type": "code",
50 | "execution_count": 3,
51 | "outputs": [
52 | {
53 | "data": {
54 | "text/plain": " key A B\n0 K0 A0 B0\n1 K1 A1 B1\n2 K2 A2 B2\n3 K3 A3 B3",
55 | "text/html": "\n\n
\n \n \n | \n key | \n A | \n B | \n
\n \n \n \n 0 | \n K0 | \n A0 | \n B0 | \n
\n \n 1 | \n K1 | \n A1 | \n B1 | \n
\n \n 2 | \n K2 | \n A2 | \n B2 | \n
\n \n 3 | \n K3 | \n A3 | \n B3 | \n
\n \n
\n
"
56 | },
57 | "execution_count": 3,
58 | "metadata": {},
59 | "output_type": "execute_result"
60 | }
61 | ],
62 | "source": [
63 | "left"
64 | ],
65 | "metadata": {
66 | "collapsed": false,
67 | "pycharm": {
68 | "name": "#%%\n"
69 | }
70 | }
71 | },
72 | {
73 | "cell_type": "code",
74 | "execution_count": 4,
75 | "outputs": [
76 | {
77 | "data": {
78 | "text/plain": " key C D\n0 K0 C0 D0\n1 K1 C1 D1\n2 K2 C2 D2\n3 K3 C3 D3",
79 | "text/html": "\n\n
\n \n \n | \n key | \n C | \n D | \n
\n \n \n \n 0 | \n K0 | \n C0 | \n D0 | \n
\n \n 1 | \n K1 | \n C1 | \n D1 | \n
\n \n 2 | \n K2 | \n C2 | \n D2 | \n
\n \n 3 | \n K3 | \n C3 | \n D3 | \n
\n \n
\n
"
80 | },
81 | "execution_count": 4,
82 | "metadata": {},
83 | "output_type": "execute_result"
84 | }
85 | ],
86 | "source": [
87 | "right"
88 | ],
89 | "metadata": {
90 | "collapsed": false,
91 | "pycharm": {
92 | "name": "#%%\n"
93 | }
94 | }
95 | },
96 | {
97 | "cell_type": "code",
98 | "execution_count": 7,
99 | "outputs": [
100 | {
101 | "data": {
102 | "text/plain": " key A B C D\n0 K0 A0 B0 C0 D0\n1 K1 A1 B1 C1 D1\n2 K2 A2 B2 C2 D2\n3 K3 A3 B3 C3 D3",
103 | "text/html": "\n\n
\n \n \n | \n key | \n A | \n B | \n C | \n D | \n
\n \n \n \n 0 | \n K0 | \n A0 | \n B0 | \n C0 | \n D0 | \n
\n \n 1 | \n K1 | \n A1 | \n B1 | \n C1 | \n D1 | \n
\n \n 2 | \n K2 | \n A2 | \n B2 | \n C2 | \n D2 | \n
\n \n 3 | \n K3 | \n A3 | \n B3 | \n C3 | \n D3 | \n
\n \n
\n
"
104 | },
105 | "execution_count": 7,
106 | "metadata": {},
107 | "output_type": "execute_result"
108 | }
109 | ],
110 | "source": [
111 | "res = pd.merge(left, right, on='key')\n",
112 | "res"
113 | ],
114 | "metadata": {
115 | "collapsed": false,
116 | "pycharm": {
117 | "name": "#%%\n"
118 | }
119 | }
120 | },
121 | {
122 | "cell_type": "code",
123 | "execution_count": 8,
124 | "outputs": [],
125 | "source": [
126 | "left = pd.DataFrame({'key1': ['K0', 'K0', 'K1', 'K2'],\n",
127 | " 'key2': ['K0', 'K1', 'K0', 'K1'],\n",
128 | " 'A': ['A0', 'A1', 'A2', 'A3'],\n",
129 | " 'B': ['B0', 'B1', 'B2', 'B3']})\n",
130 | "right = pd.DataFrame({'key1': ['K0', 'K1', 'K1', 'K2'],\n",
131 | " 'key2': ['K0', 'K0', 'K0', 'K0'],\n",
132 | " 'C': ['C0', 'C1', 'C2', 'C3'],\n",
133 | " 'D': ['D0', 'D1', 'D2', 'D3']})"
134 | ],
135 | "metadata": {
136 | "collapsed": false,
137 | "pycharm": {
138 | "name": "#%%\n"
139 | }
140 | }
141 | },
142 | {
143 | "cell_type": "code",
144 | "execution_count": 9,
145 | "outputs": [
146 | {
147 | "data": {
148 | "text/plain": " key1 key2 A B\n0 K0 K0 A0 B0\n1 K0 K1 A1 B1\n2 K1 K0 A2 B2\n3 K2 K1 A3 B3",
149 | "text/html": "\n\n
\n \n \n | \n key1 | \n key2 | \n A | \n B | \n
\n \n \n \n 0 | \n K0 | \n K0 | \n A0 | \n B0 | \n
\n \n 1 | \n K0 | \n K1 | \n A1 | \n B1 | \n
\n \n 2 | \n K1 | \n K0 | \n A2 | \n B2 | \n
\n \n 3 | \n K2 | \n K1 | \n A3 | \n B3 | \n
\n \n
\n
"
150 | },
151 | "execution_count": 9,
152 | "metadata": {},
153 | "output_type": "execute_result"
154 | }
155 | ],
156 | "source": [
157 | "left"
158 | ],
159 | "metadata": {
160 | "collapsed": false,
161 | "pycharm": {
162 | "name": "#%%\n"
163 | }
164 | }
165 | },
166 | {
167 | "cell_type": "code",
168 | "execution_count": 10,
169 | "outputs": [
170 | {
171 | "data": {
172 | "text/plain": " key1 key2 C D\n0 K0 K0 C0 D0\n1 K1 K0 C1 D1\n2 K1 K0 C2 D2\n3 K2 K0 C3 D3",
173 | "text/html": "\n\n
\n \n \n | \n key1 | \n key2 | \n C | \n D | \n
\n \n \n \n 0 | \n K0 | \n K0 | \n C0 | \n D0 | \n
\n \n 1 | \n K1 | \n K0 | \n C1 | \n D1 | \n
\n \n 2 | \n K1 | \n K0 | \n C2 | \n D2 | \n
\n \n 3 | \n K2 | \n K0 | \n C3 | \n D3 | \n
\n \n
\n
"
174 | },
175 | "execution_count": 10,
176 | "metadata": {},
177 | "output_type": "execute_result"
178 | }
179 | ],
180 | "source": [
181 | "right"
182 | ],
183 | "metadata": {
184 | "collapsed": false,
185 | "pycharm": {
186 | "name": "#%%\n"
187 | }
188 | }
189 | },
190 | {
191 | "cell_type": "code",
192 | "execution_count": 12,
193 | "outputs": [
194 | {
195 | "data": {
196 | "text/plain": " key1 key2 A B C D\n0 K0 K0 A0 B0 C0 D0\n1 K1 K0 A2 B2 C1 D1\n2 K1 K0 A2 B2 C2 D2",
197 | "text/html": "\n\n
\n \n \n | \n key1 | \n key2 | \n A | \n B | \n C | \n D | \n
\n \n \n \n 0 | \n K0 | \n K0 | \n A0 | \n B0 | \n C0 | \n D0 | \n
\n \n 1 | \n K1 | \n K0 | \n A2 | \n B2 | \n C1 | \n D1 | \n
\n \n 2 | \n K1 | \n K0 | \n A2 | \n B2 | \n C2 | \n D2 | \n
\n \n
\n
"
198 | },
199 | "execution_count": 12,
200 | "metadata": {},
201 | "output_type": "execute_result"
202 | }
203 | ],
204 | "source": [
205 | "res = pd.merge(left, right, on=['key1', 'key2'])\n",
206 | "res"
207 | ],
208 | "metadata": {
209 | "collapsed": false,
210 | "pycharm": {
211 | "name": "#%%\n"
212 | }
213 | }
214 | },
215 | {
216 | "cell_type": "code",
217 | "execution_count": 13,
218 | "outputs": [
219 | {
220 | "data": {
221 | "text/plain": " key1 key2 A B C D\n0 K0 K0 A0 B0 C0 D0\n1 K1 K0 A2 B2 C1 D1\n2 K1 K0 A2 B2 C2 D2",
222 | "text/html": "\n\n
\n \n \n | \n key1 | \n key2 | \n A | \n B | \n C | \n D | \n
\n \n \n \n 0 | \n K0 | \n K0 | \n A0 | \n B0 | \n C0 | \n D0 | \n
\n \n 1 | \n K1 | \n K0 | \n A2 | \n B2 | \n C1 | \n D1 | \n
\n \n 2 | \n K1 | \n K0 | \n A2 | \n B2 | \n C2 | \n D2 | \n
\n \n
\n
"
223 | },
224 | "execution_count": 13,
225 | "metadata": {},
226 | "output_type": "execute_result"
227 | }
228 | ],
229 | "source": [
230 | "res = pd.merge(left, right, on=['key1', 'key2'], how='inner')\n",
231 | "res"
232 | ],
233 | "metadata": {
234 | "collapsed": false,
235 | "pycharm": {
236 | "name": "#%%\n"
237 | }
238 | }
239 | },
240 | {
241 | "cell_type": "code",
242 | "execution_count": 14,
243 | "outputs": [
244 | {
245 | "data": {
246 | "text/plain": " key1 key2 A B C D\n0 K0 K0 A0 B0 C0 D0\n1 K0 K1 A1 B1 NaN NaN\n2 K1 K0 A2 B2 C1 D1\n3 K1 K0 A2 B2 C2 D2\n4 K2 K1 A3 B3 NaN NaN\n5 K2 K0 NaN NaN C3 D3",
247 | "text/html": "\n\n
\n \n \n | \n key1 | \n key2 | \n A | \n B | \n C | \n D | \n
\n \n \n \n 0 | \n K0 | \n K0 | \n A0 | \n B0 | \n C0 | \n D0 | \n
\n \n 1 | \n K0 | \n K1 | \n A1 | \n B1 | \n NaN | \n NaN | \n
\n \n 2 | \n K1 | \n K0 | \n A2 | \n B2 | \n C1 | \n D1 | \n
\n \n 3 | \n K1 | \n K0 | \n A2 | \n B2 | \n C2 | \n D2 | \n
\n \n 4 | \n K2 | \n K1 | \n A3 | \n B3 | \n NaN | \n NaN | \n
\n \n 5 | \n K2 | \n K0 | \n NaN | \n NaN | \n C3 | \n D3 | \n
\n \n
\n
"
248 | },
249 | "execution_count": 14,
250 | "metadata": {},
251 | "output_type": "execute_result"
252 | }
253 | ],
254 | "source": [
255 | "res = pd.merge(left, right, on=['key1', 'key2'], how='outer')\n",
256 | "res"
257 | ],
258 | "metadata": {
259 | "collapsed": false,
260 | "pycharm": {
261 | "name": "#%%\n"
262 | }
263 | }
264 | },
265 | {
266 | "cell_type": "code",
267 | "execution_count": 15,
268 | "outputs": [
269 | {
270 | "data": {
271 | "text/plain": " key1 key2 A B C D\n0 K0 K0 A0 B0 C0 D0\n1 K1 K0 A2 B2 C1 D1\n2 K1 K0 A2 B2 C2 D2\n3 K2 K0 NaN NaN C3 D3",
272 | "text/html": "\n\n
\n \n \n | \n key1 | \n key2 | \n A | \n B | \n C | \n D | \n
\n \n \n \n 0 | \n K0 | \n K0 | \n A0 | \n B0 | \n C0 | \n D0 | \n
\n \n 1 | \n K1 | \n K0 | \n A2 | \n B2 | \n C1 | \n D1 | \n
\n \n 2 | \n K1 | \n K0 | \n A2 | \n B2 | \n C2 | \n D2 | \n
\n \n 3 | \n K2 | \n K0 | \n NaN | \n NaN | \n C3 | \n D3 | \n
\n \n
\n
"
273 | },
274 | "execution_count": 15,
275 | "metadata": {},
276 | "output_type": "execute_result"
277 | }
278 | ],
279 | "source": [
280 | "res = pd.merge(left, right, on=['key1', 'key2'], how='right')\n",
281 | "res"
282 | ],
283 | "metadata": {
284 | "collapsed": false,
285 | "pycharm": {
286 | "name": "#%%\n"
287 | }
288 | }
289 | },
290 | {
291 | "cell_type": "code",
292 | "execution_count": 16,
293 | "outputs": [
294 | {
295 | "data": {
296 | "text/plain": " key1 key2 A B C D _merge\n0 K0 K0 A0 B0 C0 D0 both\n1 K0 K1 A1 B1 NaN NaN left_only\n2 K1 K0 A2 B2 C1 D1 both\n3 K1 K0 A2 B2 C2 D2 both\n4 K2 K1 A3 B3 NaN NaN left_only\n5 K2 K0 NaN NaN C3 D3 right_only",
297 | "text/html": "\n\n
\n \n \n | \n key1 | \n key2 | \n A | \n B | \n C | \n D | \n _merge | \n
\n \n \n \n 0 | \n K0 | \n K0 | \n A0 | \n B0 | \n C0 | \n D0 | \n both | \n
\n \n 1 | \n K0 | \n K1 | \n A1 | \n B1 | \n NaN | \n NaN | \n left_only | \n
\n \n 2 | \n K1 | \n K0 | \n A2 | \n B2 | \n C1 | \n D1 | \n both | \n
\n \n 3 | \n K1 | \n K0 | \n A2 | \n B2 | \n C2 | \n D2 | \n both | \n
\n \n 4 | \n K2 | \n K1 | \n A3 | \n B3 | \n NaN | \n NaN | \n left_only | \n
\n \n 5 | \n K2 | \n K0 | \n NaN | \n NaN | \n C3 | \n D3 | \n right_only | \n
\n \n
\n
"
298 | },
299 | "execution_count": 16,
300 | "metadata": {},
301 | "output_type": "execute_result"
302 | }
303 | ],
304 | "source": [
305 | "res = pd.merge(left, right, on=['key1', 'key2'], how='outer', indicator=True)\n",
306 | "res"
307 | ],
308 | "metadata": {
309 | "collapsed": false,
310 | "pycharm": {
311 | "name": "#%%\n"
312 | }
313 | }
314 | },
315 | {
316 | "cell_type": "markdown",
317 | "source": [
318 | "### index"
319 | ],
320 | "metadata": {
321 | "collapsed": false,
322 | "pycharm": {
323 | "name": "#%% md\n"
324 | }
325 | }
326 | },
327 | {
328 | "cell_type": "code",
329 | "execution_count": 17,
330 | "outputs": [],
331 | "source": [
332 | "left = pd.DataFrame({'A': ['A0', 'A1', 'A2'],\n",
333 | " 'B': ['B0', 'B1', 'B2']},\n",
334 | " index=['K0', 'K1', 'K2'])\n",
335 | "right = pd.DataFrame({'C': ['C0', 'C2', 'C3'],\n",
336 | " 'D': ['D0', 'D2', 'D3']},\n",
337 | " index=['K0', 'K2', 'K3'])"
338 | ],
339 | "metadata": {
340 | "collapsed": false,
341 | "pycharm": {
342 | "name": "#%%\n"
343 | }
344 | }
345 | },
346 | {
347 | "cell_type": "code",
348 | "execution_count": 18,
349 | "outputs": [
350 | {
351 | "data": {
352 | "text/plain": " A B C D\nK0 A0 B0 C0 D0\nK1 A1 B1 NaN NaN\nK2 A2 B2 C2 D2\nK3 NaN NaN C3 D3",
353 | "text/html": "\n\n
\n \n \n | \n A | \n B | \n C | \n D | \n
\n \n \n \n K0 | \n A0 | \n B0 | \n C0 | \n D0 | \n
\n \n K1 | \n A1 | \n B1 | \n NaN | \n NaN | \n
\n \n K2 | \n A2 | \n B2 | \n C2 | \n D2 | \n
\n \n K3 | \n NaN | \n NaN | \n C3 | \n D3 | \n
\n \n
\n
"
354 | },
355 | "execution_count": 18,
356 | "metadata": {},
357 | "output_type": "execute_result"
358 | }
359 | ],
360 | "source": [
361 | "res = pd.merge(left, right, left_index=True, right_index=True, how='outer')\n",
362 | "res"
363 | ],
364 | "metadata": {
365 | "collapsed": false,
366 | "pycharm": {
367 | "name": "#%%\n"
368 | }
369 | }
370 | },
371 | {
372 | "cell_type": "markdown",
373 | "source": [
374 | "### 解决名字相同内涵不同的数据"
375 | ],
376 | "metadata": {
377 | "collapsed": false,
378 | "pycharm": {
379 | "name": "#%% md\n"
380 | }
381 | }
382 | },
383 | {
384 | "cell_type": "code",
385 | "execution_count": 19,
386 | "outputs": [],
387 | "source": [
388 | "boys = pd.DataFrame({'k': ['K0', 'K1', 'K2'], 'age': [1, 2, 3]})\n",
389 | "girls = pd.DataFrame({'k': ['K0', 'K0', 'K3'], 'age': [4, 5, 6]})\n"
390 | ],
391 | "metadata": {
392 | "collapsed": false,
393 | "pycharm": {
394 | "name": "#%%\n"
395 | }
396 | }
397 | },
398 | {
399 | "cell_type": "code",
400 | "execution_count": 20,
401 | "outputs": [
402 | {
403 | "data": {
404 | "text/plain": " k age\n0 K0 1\n1 K1 2\n2 K2 3",
405 | "text/html": "\n\n
\n \n \n | \n k | \n age | \n
\n \n \n \n 0 | \n K0 | \n 1 | \n
\n \n 1 | \n K1 | \n 2 | \n
\n \n 2 | \n K2 | \n 3 | \n
\n \n
\n
"
406 | },
407 | "execution_count": 20,
408 | "metadata": {},
409 | "output_type": "execute_result"
410 | }
411 | ],
412 | "source": [
413 | "boys"
414 | ],
415 | "metadata": {
416 | "collapsed": false,
417 | "pycharm": {
418 | "name": "#%%\n"
419 | }
420 | }
421 | },
422 | {
423 | "cell_type": "code",
424 | "execution_count": 21,
425 | "outputs": [
426 | {
427 | "data": {
428 | "text/plain": " k age\n0 K0 4\n1 K0 5\n2 K3 6",
429 | "text/html": "\n\n
\n \n \n | \n k | \n age | \n
\n \n \n \n 0 | \n K0 | \n 4 | \n
\n \n 1 | \n K0 | \n 5 | \n
\n \n 2 | \n K3 | \n 6 | \n
\n \n
\n
"
430 | },
431 | "execution_count": 21,
432 | "metadata": {},
433 | "output_type": "execute_result"
434 | }
435 | ],
436 | "source": [
437 | "girls"
438 | ],
439 | "metadata": {
440 | "collapsed": false,
441 | "pycharm": {
442 | "name": "#%%\n"
443 | }
444 | }
445 | },
446 | {
447 | "cell_type": "code",
448 | "execution_count": 22,
449 | "outputs": [
450 | {
451 | "data": {
452 | "text/plain": " k age_boy age_girl\n0 K0 1 4\n1 K0 1 5",
453 | "text/html": "\n\n
\n \n \n | \n k | \n age_boy | \n age_girl | \n
\n \n \n \n 0 | \n K0 | \n 1 | \n 4 | \n
\n \n 1 | \n K0 | \n 1 | \n 5 | \n
\n \n
\n
"
454 | },
455 | "execution_count": 22,
456 | "metadata": {},
457 | "output_type": "execute_result"
458 | }
459 | ],
460 | "source": [
461 | "res = pd.merge(boys, girls, on='k', suffixes=['_boy', '_girl'], how='inner')\n",
462 | "res"
463 | ],
464 | "metadata": {
465 | "collapsed": false,
466 | "pycharm": {
467 | "name": "#%%\n"
468 | }
469 | }
470 | },
471 | {
472 | "cell_type": "code",
473 | "execution_count": null,
474 | "outputs": [],
475 | "source": [],
476 | "metadata": {
477 | "collapsed": false,
478 | "pycharm": {
479 | "name": "#%%\n"
480 | }
481 | }
482 | }
483 | ],
484 | "metadata": {
485 | "kernelspec": {
486 | "display_name": "Python 3",
487 | "language": "python",
488 | "name": "python3"
489 | },
490 | "language_info": {
491 | "codemirror_mode": {
492 | "name": "ipython",
493 | "version": 2
494 | },
495 | "file_extension": ".py",
496 | "mimetype": "text/x-python",
497 | "name": "python",
498 | "nbconvert_exporter": "python",
499 | "pygments_lexer": "ipython2",
500 | "version": "2.7.6"
501 | }
502 | },
503 | "nbformat": 4,
504 | "nbformat_minor": 0
505 | }
--------------------------------------------------------------------------------
/docs/data-analysis/files/student.csv:
--------------------------------------------------------------------------------
1 | Student ID,name ,age,gender
2 | 1100,Kelly,22,Female
3 | 1101,Clo,21,Female
4 | 1102,Tilly,22,Female
5 | 1103,Tony,24,Male
6 | 1104,David,20,Male
7 | 1105,Catty,22,Female
8 | 1106,M,3,Female
9 | 1107,N,43,Male
10 | 1108,A,13,Male
11 | 1109,S,12,Male
12 | 1110,David,33,Male
13 | 1111,Dw,3,Female
14 | 1112,Q,23,Male
15 | 1113,W,21,Female
--------------------------------------------------------------------------------
/docs/data-analysis/img/3-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhiyu1998/Python-Basis-Notes/2be71810a0eaa7624451ab00fe1c4dec73e6dc58/docs/data-analysis/img/3-1.png
--------------------------------------------------------------------------------
/docs/data-analysis/img/3-10.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhiyu1998/Python-Basis-Notes/2be71810a0eaa7624451ab00fe1c4dec73e6dc58/docs/data-analysis/img/3-10.png
--------------------------------------------------------------------------------
/docs/data-analysis/img/3-11.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhiyu1998/Python-Basis-Notes/2be71810a0eaa7624451ab00fe1c4dec73e6dc58/docs/data-analysis/img/3-11.png
--------------------------------------------------------------------------------
/docs/data-analysis/img/3-12.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhiyu1998/Python-Basis-Notes/2be71810a0eaa7624451ab00fe1c4dec73e6dc58/docs/data-analysis/img/3-12.png
--------------------------------------------------------------------------------
/docs/data-analysis/img/3-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhiyu1998/Python-Basis-Notes/2be71810a0eaa7624451ab00fe1c4dec73e6dc58/docs/data-analysis/img/3-2.png
--------------------------------------------------------------------------------
/docs/data-analysis/img/3-3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhiyu1998/Python-Basis-Notes/2be71810a0eaa7624451ab00fe1c4dec73e6dc58/docs/data-analysis/img/3-3.png
--------------------------------------------------------------------------------
/docs/data-analysis/img/3-4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhiyu1998/Python-Basis-Notes/2be71810a0eaa7624451ab00fe1c4dec73e6dc58/docs/data-analysis/img/3-4.png
--------------------------------------------------------------------------------
/docs/data-analysis/img/3-5.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhiyu1998/Python-Basis-Notes/2be71810a0eaa7624451ab00fe1c4dec73e6dc58/docs/data-analysis/img/3-5.png
--------------------------------------------------------------------------------
/docs/data-analysis/img/3-6.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhiyu1998/Python-Basis-Notes/2be71810a0eaa7624451ab00fe1c4dec73e6dc58/docs/data-analysis/img/3-6.png
--------------------------------------------------------------------------------
/docs/data-analysis/img/3-7.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhiyu1998/Python-Basis-Notes/2be71810a0eaa7624451ab00fe1c4dec73e6dc58/docs/data-analysis/img/3-7.png
--------------------------------------------------------------------------------
/docs/data-analysis/img/3-8.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhiyu1998/Python-Basis-Notes/2be71810a0eaa7624451ab00fe1c4dec73e6dc58/docs/data-analysis/img/3-8.png
--------------------------------------------------------------------------------
/docs/data-analysis/img/3-9.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhiyu1998/Python-Basis-Notes/2be71810a0eaa7624451ab00fe1c4dec73e6dc58/docs/data-analysis/img/3-9.png
--------------------------------------------------------------------------------
/docs/data-analysis/pandas.md:
--------------------------------------------------------------------------------
1 | ---
2 | sidebar_position: 2
3 | ---
4 |
5 | # 数据分析:pandas
6 |
7 | [跳转查看文件](https://github.com/zhiyu1998/Python-Basis-Notes/blob/master/docs/pandas/01-choice_data.ipynb)
8 |
9 | @文档todo
--------------------------------------------------------------------------------
/docs/deeplearning/_category_.json:
--------------------------------------------------------------------------------
1 | {
2 | "label": "深度学习",
3 | "position": 4,
4 | "link": {
5 | "type": "generated-index"
6 | }
7 | }
8 |
--------------------------------------------------------------------------------
/docs/deeplearning/deeplearning.md:
--------------------------------------------------------------------------------
1 | ---
2 | sidebar_position: 1
3 | ---
4 |
5 | # 深度学习之学习路线
6 |
7 | ## :checkered_flag: 前言
8 |
9 | 我的研究生期间(研究方向:异常检测)深度学习的读书/学习笔记,包含
10 | * 图形分类(入门)
11 | * 目标识别(纯属游戏向 :arrow_right: 自动瞄准)
12 | * 异常检测(交通时序)
13 |
14 |
15 |
16 | ## :mountain_cableway: 路线推荐
17 |
18 | 机器学习 :arrow_right: 数据操作(推荐观看我的仓库) :arrow_right: 深度学习基础(推荐阅读pytorch版本《动手深度学习》:https://zh.d2l.ai/) :arrow_right: 深度学习基础网络【分类】(推荐观看:https://space.bilibili.com/18161609/channel/series) :arrow_right: 分支(看研究方向):arrow_double_up:
19 |
20 | > 注:个人建议《动手深度学习》不用读到基础网络(alexnet...)
21 |
22 |
23 |
24 | ## :bookmark_tabs: 推荐书籍和网址
25 |
26 | * pytorch内功修炼(Pytorch中文百科): https://www.pytorch.wiki/
27 | * 论文理论支撑 (神经网络与深度学习):https://nndl.github.io/
28 | * 语法精益(流畅的Python)、数据处理(深入浅出pandas)
29 | * 奇葩review大赏(娱乐):https://shitmyreviewerssay.tumblr.com/
30 |
31 |
32 |
33 | 💨其他推荐
34 |
35 | * [Pytorch官网](https://pytorch.org/)
36 | * [飞桨PaddlePaddle官网](https://www.paddlepaddle.org.cn/)
37 | * [scikit-learn中文社区](https://scikit-learn.org.cn/)
38 | * [Matplotlib: Python plotting — Matplotlib 3.4.2 documentation](https://matplotlib.org/stable/index.html)
39 | * [Jittor(计图): 即时编译深度学习框架 — Jittor](https://cg.cs.tsinghua.edu.cn/jittor/)
40 | * [Dataset Search:数据集搜索](https://datasetsearch.research.google.com/)
41 | * [TensorFlow官方教程](https://tensorflow.google.cn/tutorials)
42 | * [Keras:TF封装](https://keras.io/)
43 | * [Hydra 九头蛇:简化深度学习配置](https://hydra.cc/)
44 | * [ml-tooling/best-of-ml-python: 🏆 深度学习开源库排行榜](https://github.com/ml-tooling/best-of-ml-python#graph-data)
45 | * [NumPy 中文](https://www.numpy.org.cn/)
46 | * [Kaggle: 深度学习竞赛](https://www.kaggle.com/)
47 | * [Pillow (PIL Fork) ](https://www.osgeo.cn/pillow/index.html)
48 | * [复杂网络软件 — NetworkX](https://www.osgeo.cn/networkx/index.html)
49 | * [深度学习在图像处理中的应用教程](https://github.com/WZMIAOMIAO/deep-learning-for-image-processing)
50 | * [pandas中文教程](http://joyfulpandas.datawhale.club/Content/index.html)
51 | * [External-Attention-pytorch: 🍀 现成轮子](https://github.com/xmu-xiaoma666/External-Attention-pytorch)
52 | * [VIT汇总](https://github.com/lucidrains/vit-pytorch)
53 | * [深度学习500问](https://github.com/scutan90/DeepLearning-500-questions)
54 | * [深度学习论文阅读路线图](https://github.com/floodsung/Deep-Learning-Papers-Reading-Roadmap)
55 | * [深度学习论文注释实现](https://github.com/labmlai/annotated_deep_learning_paper_implementations)
56 | * [深度学习入门教程, 优秀文章](https://github.com/Mikoto10032/DeepLearning)
57 | * [吴恩达深度学习课程笔记](https://github.com/fengdu78/deeplearning_ai_books)
58 | * [tensorflow2中文教程,持续更新(当前版本:tensorflow2.0)](https://github.com/czy36mengfei/tensorflow2_tutorials_chinese)
59 | * [初学者的TensorFlow教程和例子 (support TF v1 & v2)](https://github.com/aymericdamien/TensorFlow-Examples)
60 | * [简单且准备使用 TensorFlow 的教程](https://github.com/instillai/TensorFlow-Course#why-use-tensorflow)
61 | * [PyTorch 对于研究人员的教程](https://github.com/yunjey/pytorch-tutorial)
62 | * [吴恩达机器学习个人笔记](https://github.com/fengdu78/Coursera-ML-AndrewNg-Notes)
63 | * [Matplotlib 中文](https://www.matplotlib.org.cn/)
64 | * [pytorch-image-models:PyTorch 图像模型、脚本、预训练权重](https://github.com/rwightman/pytorch-image-models)
65 | * [Flops counter](https://github.com/sovrasov/flops-counter.pytorch)
66 | * [CVPR 2022 论文和开源项目合集](https://github.com/amusi/CVPR2021-Papers-with-Code)
67 | * [PyTorch implementations of GAN:对抗神经网络合集](https://github.com/eriklindernoren/PyTorch-GAN)
68 | * [the-gan-zoo: A list of all named GANs!:对抗神经网络合集](https://github.com/hindupuravinash/the-gan-zoo)
69 |
70 |
71 |
72 | 📚书
73 |
74 | * [《计算机视觉实战演练:算法与应用》](https://github.com/Charmve/computer-vision-in-action)
75 | * [Sklearn 与 TensorFlow 机器学习实用指南第二版](https://hands1ml.apachecn.org/#/)
76 | * [利用 Python 进行数据分析 · 第 2 版](https://pyda.apachecn.org/#/)
77 | * [南瓜书PumpkinBook](https://datawhalechina.github.io/pumpkin-book/#/)
78 |
79 |
80 |
81 | 🏣社区
82 |
83 | * [Hugging Face 自然语言处理](https://huggingface.co/)
84 | * [Sieun Park – Medium](https://sieunpark77.medium.com/)
85 | * [Distill — Latest articles about machine learning](https://distill.pub/)
86 | * [Towards Data Science](https://towardsdatascience.com/)
87 | * [Neurohive - Neural Networks](https://neurohive.io/en/)
88 | * [974 questions with answers in COMPUTER SCIENCE | Science topic](https://www.researchgate.net/topic/Computer-Science)
89 | * [devRant](https://devrant.com/feed)
90 |
91 |
92 |
93 | ## :pencil2: 论文推荐阅读
94 |
95 | ### 图像分类(Classification)
96 |
97 | - LeNet [http://yann.lecun.com/exdb/lenet/index.html](http://yann.lecun.com/exdb/lenet/index.html)
98 | - AlexNet [http://papers.nips.cc/paper/4824-imagenet-classification-with-deep-convolutional-neural-networks.pdf](http://papers.nips.cc/paper/4824-imagenet-classification-with-deep-convolutional-neural-networks.pdf)
99 | - ZFNet(Visualizing and Understanding Convolutional Networks) [https://arxiv.org/abs/1311.2901](https://arxiv.org/abs/1311.2901)
100 | - VGG [https://arxiv.org/abs/1409.1556](https://arxiv.org/abs/1409.1556)
101 | - GoogLeNet, Inceptionv1(Going deeper with convolutions) [https://arxiv.org/abs/1409.4842](https://arxiv.org/abs/1409.4842)
102 | - Batch Normalization [https://arxiv.org/abs/1502.03167](https://arxiv.org/abs/1502.03167)
103 | - Inceptionv3(Rethinking the Inception Architecture for Computer Vision) [https://arxiv.org/abs/1512.00567](https://arxiv.org/abs/1512.00567)
104 | - Inceptionv4, Inception-ResNet [https://arxiv.org/abs/1602.07261](https://arxiv.org/abs/1602.07261)
105 | - Xception(Deep Learning with Depthwise Separable Convolutions) [https://arxiv.org/abs/1610.02357](https://arxiv.org/abs/1610.02357)
106 | - ResNet [https://arxiv.org/abs/1512.03385](https://arxiv.org/abs/1512.03385)
107 | - ResNeXt [https://arxiv.org/abs/1611.05431](https://arxiv.org/abs/1611.05431)
108 | - DenseNet [https://arxiv.org/abs/1608.06993](https://arxiv.org/abs/1608.06993)
109 | - NASNet-A(Learning Transferable Architectures for Scalable Image Recognition) [https://arxiv.org/abs/1707.07012](https://arxiv.org/abs/1707.07012)
110 | - SENet(Squeeze-and-Excitation Networks) [https://arxiv.org/abs/1709.01507](https://arxiv.org/abs/1709.01507)
111 | - MobileNet(v1) [https://arxiv.org/abs/1704.04861](https://arxiv.org/abs/1704.04861)
112 | - MobileNet(v2) [https://arxiv.org/abs/1801.04381](https://arxiv.org/abs/1801.04381)
113 | - MobileNet(v3) [https://arxiv.org/abs/1905.02244](https://arxiv.org/abs/1905.02244)
114 | - ShuffleNet(v1) [https://arxiv.org/abs/1707.01083](https://arxiv.org/abs/1707.01083)
115 | - ShuffleNet(v2) [https://arxiv.org/abs/1807.11164](https://arxiv.org/abs/1807.11164)
116 | - Bag of Tricks for Image Classification with Convolutional Neural Networks [https://arxiv.org/abs/1812.01187](https://arxiv.org/abs/1812.01187)
117 | - EfficientNet(v1) [https://arxiv.org/abs/1905.11946](https://arxiv.org/abs/1905.11946)
118 | - EfficientNet(v2) [https://arxiv.org/abs/2104.00298](https://arxiv.org/abs/2104.00298)
119 | - CSPNet [https://arxiv.org/abs/1911.11929](https://arxiv.org/abs/1911.11929)
120 | - RegNet [https://arxiv.org/abs/2003.13678](https://arxiv.org/abs/2003.13678)
121 | - NFNets(High-Performance Large-Scale Image Recognition Without Normalization) [https://arxiv.org/abs/2102.06171](https://arxiv.org/abs/2102.06171)
122 | - Attention Is All You Need [https://arxiv.org/abs/1706.03762](https://arxiv.org/abs/1706.03762)
123 | - Vision Transformer [https://arxiv.org/abs/2010.11929](https://arxiv.org/abs/2010.11929)
124 | - DeiT(Training data-efficient image transformers ) [https://arxiv.org/abs/2012.12877](https://arxiv.org/abs/2012.12877)
125 | - Swin Transformer [https://arxiv.org/abs/2103.14030](https://arxiv.org/abs/2103.14030)
126 | - Swin Transformer V2: Scaling Up Capacity and Resolution [https://arxiv.org/abs/2111.09883](https://arxiv.org/abs/2111.09883)
127 | - BEiT: BERT Pre-Training of Image Transformers [https://arxiv.org/abs/2106.08254](https://arxiv.org/abs/2106.08254)
128 | - MAE(Masked Autoencoders Are Scalable Vision Learners) [https://arxiv.org/abs/2111.06377](https://arxiv.org/abs/2111.06377)
129 | - CoAtNet [https://arxiv.org/pdf/2106.04803v2.pdf](https://arxiv.org/pdf/2106.04803v2.pdf)
130 |
131 |
132 |
133 | ### 目标检测(Object Detection)
134 |
135 | - R-CNN [https://arxiv.org/abs/1311.2524](https://arxiv.org/abs/1311.2524)
136 | - Fast R-CNN [https://arxiv.org/abs/1504.08083](https://arxiv.org/abs/1504.08083)
137 | - Faster R-CNN [https://arxiv.org/abs/1506.01497](https://arxiv.org/abs/1506.01497)
138 | - Cascade R-CNN: Delving into High Quality Object Detection [https://arxiv.org/abs/1712.00726](https://arxiv.org/abs/1712.00726)
139 | - Mask R-CNN [https://arxiv.org/abs/1703.06870](https://arxiv.org/abs/1703.06870)
140 | - SSD [https://arxiv.org/abs/1512.02325](https://arxiv.org/abs/1512.02325)
141 | - FPN(Feature Pyramid Networks for Object Detection) [https://arxiv.org/abs/1612.03144](https://arxiv.org/abs/1612.03144)
142 | - RetinaNet(Focal Loss for Dense Object Detection) [https://arxiv.org/abs/1708.02002](https://arxiv.org/abs/1708.02002)
143 | - Bag of Freebies for Training Object Detection Neural Networks [https://arxiv.org/abs/1902.04103](https://arxiv.org/abs/1902.04103)
144 | - YOLOv1 [https://arxiv.org/abs/1506.02640](https://arxiv.org/abs/1506.02640)
145 | - YOLOv2 [https://arxiv.org/abs/1612.08242](https://arxiv.org/abs/1612.08242)
146 | - YOLOv3 [https://arxiv.org/abs/1804.02767](https://arxiv.org/abs/1804.02767)
147 | - YOLOv4 [https://arxiv.org/abs/2004.10934](https://arxiv.org/abs/2004.10934)
148 | - Scaled-YOLOv4 [https://arxiv.org/abs/2011.08036](https://arxiv.org/abs/2011.08036)
149 | - PP-YOLO [https://arxiv.org/abs/2007.12099](https://arxiv.org/abs/2007.12099)
150 | - PP-YOLOv2 [https://arxiv.org/abs/2104.10419](https://arxiv.org/abs/2104.10419)
151 | - YOLOX [http://arxiv.org/abs/2107.08430](http://arxiv.org/abs/2107.08430)
152 | - CornerNet [https://arxiv.org/abs/1808.01244](https://arxiv.org/abs/1808.01244)
153 | - FCOS [https://arxiv.org/abs/1904.01355](https://arxiv.org/abs/1904.01355)
154 | - CenterNet [https://arxiv.org/abs/1904.07850](https://arxiv.org/abs/1904.07850)
155 | - Mask R-CNN https://arxiv.org/abs/1703.06870)
156 |
157 |
158 |
159 | ### 异常检测(Anomaly Detection)
160 |
161 | - Anomaly Transformer [http://arxiv.org/abs/2110.02642](http://arxiv.org/abs/2110.02642)
162 | - DL-Traff [http://arxiv.org/abs/2108.09091](http://arxiv.org/abs/2108.09091)
163 | - Generative adversarial networks in time series: A survey and taxonomy [http://arxiv.org/abs/2107.11098](http://arxiv.org/abs/2107.11098)
164 | - Learning Graph Neural Networks for Multivariate Time Series Anomaly Detection [http://arxiv.org/abs/2111.08082](http://arxiv.org/abs/2111.08082)
165 | - Long-Range Transformers [http://arxiv.org/abs/2109.12218](http://arxiv.org/abs/2109.12218)
166 | - Sig-Wasserstein GANs [http://arxiv.org/abs/2111.01207](http://arxiv.org/abs/2111.01207)
167 |
168 |
169 |
170 | ### Others
171 |
172 | - Microsoft COCO: Common Objects in Context [https://arxiv.org/abs/1405.0312](https://arxiv.org/abs/1405.0312)
173 | - The PASCALVisual Object Classes Challenge: A Retrospective [http://host.robots.ox.ac.uk/pascal/VOC/pubs/everingham15.pdf](http://host.robots.ox.ac.uk/pascal/VOC/pubs/everingham15.pdf)
174 |
175 |
176 |
177 | ## :surfer: 感谢
178 |
179 | 特别致谢以下仓库对于我学习的帮助:
180 | [ WZMIAOMIAO /
181 | 深度学习在图像处理中的应用教程](https://github.com/WZMIAOMIAO/deep-learning-for-image-processing)
182 | [《神经网络与深度学习》 邱锡鹏著](https://github.com/nndl/nndl.github.io)
183 | [ d2l-ai /《动手学深度学习》](https://github.com/d2l-ai/d2l-zh)
184 |
--------------------------------------------------------------------------------
/docs/deeplearning/graph.mdx:
--------------------------------------------------------------------------------
1 | ---
2 | sidebar_position: 2
3 | ---
4 |
5 | export const Highlight = ({children, color}) => (
6 | {
15 | alert(`You clicked the color ${color} with label ${children}`);
16 | }}>
17 | {children}
18 |
19 | );
20 |
21 | # 图卷积神经网络(持续更新)
22 |
23 | ## 文献分享
24 |
25 |
26 | ### 通用参考の交通流预测专题
27 | - Graph convolutional networks: a comprehensive review:https://computationalsocialnetworks.springeropen.com/articles/10.1186/s40649-019-0069-y
28 | - A Survey of Traffic Prediction: from Spatio-Temporal Data to Intelligent Transportation 通用参考の交通流预测:https://link.springer.com/article/10.1007/s41019-020-00151-z
29 | - A Survey on Modern Deep Neural Network for Traffic Prediction: Trends, Methods and Challenges 通用参考の交通流预测:https://ieeexplore.ieee.org/document/9112608
30 | - A Summary of Traffic Flow Forecasting Methods 通用参考の交通流预测:http://www.gljtkj.com/EN/Y2004/V21/I3/82
31 | - A comprehensive survey on graph neural networks 通用参考の交通流预测:https://ieeexplore.ieee.org/abstract/document/9046288?casa_token=_-IU9Ixzx8kAAAAA:vcOheOMCzaaZRi5lykrhdY0CwfuoOiRU3lrdmA8uSXv1Auu8z9LrB67_JfrnSyjhoNEHbCAauz9atg
32 | - 切比雪夫多项式(Chebyshev polynomials) 通用参考の交通流预测:https://proceedings.neurips.cc/paper_files/paper/2016/hash/04df4d434d481c5bb723be1b6df1ee65-Abstract.html
33 | - 2023年全球道路安全状况报告(Global status report on road safety 2023) 通用参考の交通流预测:https://www.who.int/teams/social-determinants-of-health/safety-and-mobility/global-status-report-on-road-safety-2023
34 |
35 | ### 图嵌入
36 | - Graph embedding techniques, applications, and performance: A survey:https://www.sciencedirect.com/science/article/pii/S0950705118301540
37 | - A comprehensive survey of graph embedding: Problems, techniques, and applications:https://ieeexplore.ieee.org/abstract/document/8294302/?casa_token=RPHDwCwRd_sAAAAA:Us_qNvVZ0rIkhicT8MUJI87qKpF5diSGURb5rBkEtEn_Sru7qd_N5j4SESctQvL8kAM-bJLvzxQAVE8
38 |
39 | ### 通用参考の异常检测专题
40 | - Detecting Road Traffic Events by Coupling Multiple Timeseries With a Nonparametric Bayesian Method:https://ieeexplore.ieee.org/abstract/document/6763098?casa_token=wPKB1S938vcAAAAA:il9gnh6pKOssqEYkYuzKor8XoYvhwYM_veqgVUjyCMoOqMMfnYtrnfnh7x4UKjw9UgsJaglC6we2nQ
41 | - Investigating the impact of weather conditions and time of day on traffic flow characteristics:https://journals.ametsoc.org/view/journals/wcas/14/3/WCAS-D-22-0012.1.xml
42 | - Variational Disentangled Graph Auto-Encoders for Link Prediction:https://arxiv.org/abs/2306.11315
43 | - Graph neural networks for anomaly detection in industrial internet of things:https://ieeexplore.ieee.org/abstract/document/9471816?casa_token=c93zsFxKTZQAAAAA:Ud0fjHwZxW4orRAXglbEJnLVnZKSZJnmhwH0qH7dCGOlVBwODXGyVaD9Frzo2yV3ZOuXsCPA8FAaoA
44 | - Perceiving spatiotemporal traffic anomalies from sparse representation-modeled city dynamics:https://link.springer.com/article/10.1007/s00779-020-01474-4
45 | - Urban anomaly analytics: Description, detection, and prediction:https://ieeexplore.ieee.org/abstract/document/9080109/
46 | - Graph convolutional adversarial networks for spatiotemporal anomaly detection:https://ieeexplore.ieee.org/abstract/document/9669110/
47 | - Anomaly detection and inter-sensor transfer learning on smart manufacturing datasets:https://www.mdpi.com/1424-8220/23/1/486
48 | - Graph neural network-based anomaly detection in multivariate time series:https://ojs.aaai.org/index.php/AAAI/article/view/16523
49 | - GMAT-DU: Traffic anomaly prediction with fine spatiotemporal granularity in sparse data:https://ieeexplore.ieee.org/abstract/document/10061355/
50 | - Graph anomaly detection with graph neural networks: Current status and challenges:https://ieeexplore.ieee.org/abstract/document/9906987/
51 | - Anomaly detection with generative adversarial networks for multivariate time series:https://arxiv.org/abs/1809.04758
52 | - A multimodal anomaly detector for robot-assisted feeding using an lstm-based variational autoencoder:https://ieeexplore.ieee.org/abstract/document/8279425/
53 |
54 | ### 图卷积神经网络
55 | - GCN:https://arxiv.org/abs/1609.02907
56 | - GAT:https://arxiv.org/abs/1710.10903
57 | - GraphSAGE:https://proceedings.neurips.cc/paper_files/paper/2017/hash/5dd9db5e033da9c6fb5ba83c7a7ebea9-Abstract.html
58 | - GIN:https://arxiv.org/abs/1810.00826
59 | - DeepGCN:https://arxiv.org/abs/1904.03751
60 | - PMLP:https://arxiv.org/abs/2212.09034
61 | - DeepGCN:http://openaccess.thecvf.com/content_ICCV_2019/html/Li_DeepGCNs_Can_GCNs_Go_As_Deep_As_CNNs_ICCV_2019_paper.html
62 | - 图的对比(Neighbor contrastive learning on learnable graph augmentation):https://ojs.aaai.org/index.php/AAAI/article/view/26168
63 |
64 | 🔥 【强力推荐】PyTorch 的图神经网络库:https://pyg.org/
65 | 
--------------------------------------------------------------------------------
/docs/deeplearning/images/pyg.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhiyu1998/Python-Basis-Notes/2be71810a0eaa7624451ab00fe1c4dec73e6dc58/docs/deeplearning/images/pyg.png
--------------------------------------------------------------------------------
/docs/intro.md:
--------------------------------------------------------------------------------
1 | ## 目录
2 |
3 | ### 跳转
4 | * 🐍 [Python基础](./basics/base.md)
5 | * 📊 [Numpy基础](./data-analysis/numpy.md)
6 | * 🐼 [Pandas基础](./data-analysis/pandas.md)
7 | * 🍥 [DeepLearning基础 + 路线](./deeplearning/deeplearning.md)
8 | * 📚 [Scripts 脚本库](./scripts/letpub)
9 |
10 | ### 📑 在线阅读
11 |
12 | https://zhiyu1998.github.io/Python-Basis-Notes/docs/#/
13 |
14 | ### 🐍 脚本库
15 |
16 | - [提取论文Reference的期刊/会议信息](/docs/scripts/letpub)
17 | - [采集中国企业ESG评级](/docs/scripts/syntaogf)
18 | - [飞鸽知识库导出](/docs/scripts/feige_export)
19 | - [自动化识别框架](/docs/scripts/auto_ocr_framework)
20 |
21 | ### 📈 趋势走向图
22 |
23 | 
24 |
25 |
--------------------------------------------------------------------------------
/docs/scripts/1-extra_letpub.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # -*- coding: utf-8 -*-
3 | # @Time : 2022/11/21/0021 下午 23:59
4 | # @Author : zhiyu1998
5 |
6 | import json
7 | import os.path
8 | import re
9 | import time
10 | import random
11 | import logging
12 |
13 | import pandas as pd
14 | import requests
15 |
16 | from bs4 import BeautifulSoup
17 |
18 | # 头请求
19 | headers = {
20 | 'User-Agent': 'Mozilla/5.0 (Linux; Android 5.0; SM-G900P Build/LRX21T) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.25 Mobile Safari/537.36',
21 | 'Reference': 'https://www.letpub.com/journalapp/',
22 | }
23 | # 排除名单
24 | exclude_list = ['arXiv']
25 |
26 |
27 | def logger_config(log_path, logging_name):
28 | """
29 | 配置log
30 | logger是日志对象,handler是流处理器,console是控制台输出(没有console也可以,将不会在控制台输出,会在日志文件中输出)
31 | :param log_path: 输出log路径
32 | :param logging_name: 记录中name,可随意
33 | :return:
34 | """
35 | # 获取logger对象,取名
36 | logger = logging.getLogger(logging_name)
37 | # 输出DEBUG及以上级别的信息,针对所有输出的第一层过滤
38 | logger.setLevel(level=logging.DEBUG)
39 | # 获取文件日志句柄并设置日志级别,第二层过滤
40 | handler = logging.FileHandler(log_path, encoding='UTF-8')
41 | handler.setLevel(logging.INFO)
42 | # 生成并设置文件日志格式
43 | formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
44 | handler.setFormatter(formatter)
45 | # console相当于控制台输出,handler文件输出。获取流句柄并设置日志级别,第二层过滤
46 | console = logging.StreamHandler()
47 | console.setLevel(logging.DEBUG)
48 | # 为logger对象添加句柄
49 | logger.addHandler(handler)
50 | logger.addHandler(console)
51 | return logger
52 |
53 |
54 | logger = logger_config(log_path='log.txt', logging_name='日志')
55 |
56 |
57 | def extra_title_journal() -> pd.DataFrame:
58 | """
59 | 提取论文标题和会议/期刊
60 | :return:
61 | """
62 | res: pd.DataFrame = pd.DataFrame(columns=['title', 'journal'])
63 | with open("ref.bib") as f:
64 | temp = []
65 | for line in f:
66 | if 'title' in line:
67 | temp.append(re.findall(r'({.*?})', line)[0].replace('{', '').replace('}', ''))
68 | if 'journal' in line or 'booktitle' in line:
69 | temp.append(re.findall(r'({.*?})', line)[0].replace('{', '').replace('}', ''))
70 | if line == '\n':
71 | # 校准
72 | if len(temp) > 2:
73 | temp.pop(2)
74 | # 补全
75 | if len(temp) < 2:
76 | temp.append(' ')
77 | res.loc[len(res)] = temp
78 | temp.clear()
79 | return res
80 |
81 |
82 | def save_excel(res: pd.DataFrame) -> None:
83 | """
84 | 保存为excel
85 | :param res:
86 | :return:
87 | """
88 | if os.path.exists('./ref.xlsx'):
89 | os.remove('./ref.xlsx')
90 | res.to_excel('ref.xlsx', index=False)
91 |
92 |
93 | def get_msg_from_letpub(journal_name: str) -> list:
94 | """
95 | 从letpub获取期刊数据
96 | :return: ISSN 期刊名 期刊指标 中科院分区 学科领域 SCI/SCIE 是否OA 录用比例 审稿周期 近期文章 查看数
97 | """
98 | url: str = f'https://www.letpub.com.cn/journalappAjaxXS.php?querytype=autojournal&term={journal_name}'
99 | r: requests.Response = requests.get(url=url, headers=headers)
100 | # 提取精准匹配的结果 -- [0]
101 | try:
102 | issn: str = json.loads(r.text)[0]['issn']
103 | if issn == '':
104 | return []
105 | except Exception as e:
106 | logger.info(f'请求错误:{e}')
107 | return []
108 | # 请求信息
109 | postUrl: str = 'https://www.letpub.com.cn/index.php?page=journalapp&view=search'
110 | request_params: dict = {
111 | "searchname": "",
112 | "searchissn": issn,
113 | "searchfield": "",
114 | "searchimpactlow": "",
115 | "searchimpacthigh": "",
116 | "searchscitype": "",
117 | "view": "search",
118 | "searchcategory1": "",
119 | "searchcategory2": "",
120 | "searchjcrkind": "",
121 | "searchopenaccess": "",
122 | "searchsort": "relevance"
123 | }
124 | # 二次请求查询更快
125 | r2: requests.Response = requests.post(url=postUrl, headers=headers, data=request_params)
126 | # 爬取信息
127 | soup = BeautifulSoup(r2.text, 'lxml')
128 | td = soup.find_all('td', attrs={
129 | 'style': 'border:1px #DDD solid; border-collapse:collapse; text-align:left; padding:8px 8px 8px 8px;'})
130 | temp_letpub_data = [d.text for d in td]
131 | return temp_letpub_data
132 |
133 |
134 | def insert_sci_msg(payload: pd.DataFrame) -> pd.DataFrame:
135 | """
136 | 获取SCI信息
137 | :return:
138 | """
139 | res_dict: dict = {}
140 | # 遍历每个期刊
141 | for line in payload.loc[:, 'journal']:
142 | # TODO: 排除不想查询的 (line in exclude_list or)
143 | if line.isspace():
144 | continue
145 | journal_data = get_msg_from_letpub(line)
146 | # 爬取结果判空
147 | if len(journal_data) == 0:
148 | continue
149 | res_dict[line] = journal_data
150 | time.sleep(round(random.uniform(0, 1), 2))
151 | # 增加期刊的列
152 | payload_res: pd.DataFrame = payload.assign(issn='', journal_name='', target='', area='', field='', sci='', is_oa='',
153 | employment_ratio='',
154 | review_cycle='', recent='', view='')
155 | # 把爬取的数据填充进去
156 | for index, row in payload_res.iterrows():
157 | print(f'已解决:{row["title"]}')
158 | if row['journal'] in res_dict:
159 | '''
160 | 0-ISSN
161 | 1-期刊名
162 | 2-期刊指标
163 | 3-中科院分区
164 | 4-学科领域
165 | 5-SCI/SCIE
166 | 6-是否OA
167 | 7-录用比例
168 | 8-审稿周期
169 | 9-近期文章
170 | 10-查看数
171 | '''
172 | match_item = res_dict[row['journal']]
173 | row['issn'] = match_item[0]
174 | row['journal_name'] = match_item[1]
175 | row['target'] = match_item[2]
176 | row['area'] = match_item[3]
177 | row['field'] = match_item[4]
178 | row['sci'] = match_item[5]
179 | row['is_oa'] = match_item[6]
180 | row['employment_ratio'] = match_item[7]
181 | row['review_cycle'] = match_item[8]
182 | row['recent'] = match_item[9]
183 | row['view'] = match_item[10]
184 | payload_res.iloc[index] = row
185 | return payload_res
186 |
187 |
188 | if __name__ == '__main__':
189 | start = time.time()
190 | # 提取论文名/期刊
191 | res: pd.DataFrame = extra_title_journal()
192 | # 获取期刊信息
193 | sci_res: pd.DataFrame = insert_sci_msg(res)
194 | # 保存EXCEL
195 | save_excel(sci_res)
196 | print(f"耗时:{time.time() - start:.2f}秒")
197 |
--------------------------------------------------------------------------------
/docs/scripts/2-extra_syntaogf.py:
--------------------------------------------------------------------------------
1 | import re
2 | import random
3 | import pandas as pd
4 |
5 | from selenium import webdriver
6 | from selenium.webdriver.edge.service import Service
7 | from selenium.webdriver.common.by import By
8 | from selenium.webdriver.support.ui import WebDriverWait
9 | from selenium.webdriver.support import expected_conditions as EC
10 | from selenium.common.exceptions import ElementNotInteractableException
11 |
12 | import time
13 |
14 | # pandas读取数据
15 | my_excel = pd.read_excel("./data.xls")
16 | company_names = my_excel.iloc[:, 2]
17 |
18 | # TODO 从第n条数据开始爬
19 | # selenium爬取数据
20 | n = 0
21 |
22 | options = webdriver.EdgeOptions()
23 | options.add_argument('--headless')
24 | options.add_argument('--disable-animations')
25 | # TODO 添加浏览器引擎,例如:C:\\Users\\Administrator\\Documents\\PythonWorkSpace\\Test\\msedgedriver.exe
26 | s = Service(r"")
27 |
28 | driver = webdriver.Edge(options=options, service=s)
29 |
30 | driver.get("https://www.syntaogf.com/")
31 | #隐性等待3秒,打不开页面才报错
32 | driver.implicitly_wait(3)
33 |
34 | wait = WebDriverWait(driver, 10)
35 | driver.execute_script("window.scrollBy(0,700)")
36 | # 所有数据
37 | test_company = company_names
38 | # 第一次的位置
39 | with open("res_2.txt", "a") as f:
40 | ele = wait.until(EC.presence_of_element_located((By.XPATH, '//*[@id="search_form_1"]/form/input[1]')))
41 | ele.send_keys(test_company[n])
42 | btn = wait.until(EC.presence_of_element_located((By.XPATH, '//*[@id="search_form_1"]/form/input[2]'))).click()
43 | # 等待标签
44 | try:
45 | label = wait.until(EC.presence_of_element_located((By.XPATH, '//*[@id="search_html"]/div/div[1]/a'))).click()
46 | except ElementNotInteractableException:
47 | wait.until(
48 | EC.presence_of_element_located((By.XPATH, '//*[@id="no_search_html"]/div/div[2]/a'))).click()
49 | time.sleep(2)
50 | wait.until(EC.presence_of_element_located((By.XPATH, '//*[@id="search_form_1"]/form/i'))).click()
51 | ele = wait.until(EC.presence_of_element_located((By.XPATH, '//*[@id="search_form_1"]/form/input[1]')))
52 | ele.send_keys("航锦科技股份有限公司")
53 | btn = wait.until(
54 | EC.presence_of_element_located((By.XPATH, '//*[@id="search_form_1"]/form/input[2]'))).click()
55 | ele = wait.until(
56 | EC.presence_of_element_located((By.XPATH, '//*[@id="search_form_2"]/form/input[1]'))).clear()
57 | time.sleep(2)
58 | # list = wait.until(EC.presence_of_element_located((By.XPATH, '//*[@id="search_html"]/div/div[1]/div[5]')))
59 | list = wait.until(EC.presence_of_all_elements_located((By.XPATH, '//*[@id="search_html"]/div/div[1]/div[5]/div')))
60 | reg = r'^\d{4}|[A-Z][+-]?$'
61 | temp_str = test_company[n] + " "
62 | for item in list:
63 | match_res = re.findall(reg, item.text)
64 | temp_str = temp_str + ":".join(match_res) + " "
65 | temp_str += "\n"
66 | f.write(temp_str)
67 | time.sleep(2)
68 | print(test_company[n] + "完成")
69 |
70 | for com in test_company[n+1:]:
71 | temp_str = com + " "
72 | ele = wait.until(EC.presence_of_element_located((By.XPATH, '//*[@id="search_form_2"]/form/input[1]')))
73 | time.sleep(random.randint(1, 2))
74 | ele.send_keys(com)
75 | time.sleep(random.randint(1, 2))
76 | btn = wait.until(EC.presence_of_element_located((By.XPATH, '//*[@id="search_form_2"]/form/input[2]'))).click()
77 | time.sleep(random.randint(1, 2))
78 |
79 | # 等待标签
80 | # label = wait.until(EC.presence_of_element_located((By.XPATH, '//*[@id="search_html"]/div/div[1]/a'))).click()
81 | # time.sleep(1)
82 | # list = wait.until(EC.presence_of_element_located((By.XPATH, '//*[@id="search_html"]/div/div[1]/div[5]')))
83 | list = wait.until(
84 | EC.presence_of_all_elements_located((By.XPATH, '//*[@id="search_html"]/div/div[1]/div[5]/div')))
85 | for item in list:
86 | match_res: list = re.findall(reg, item.text)
87 | temp_str = temp_str + ":".join(match_res) + " "
88 | temp_str += "\n"
89 | f.write(temp_str)
90 | # 如果不存在这个公司
91 | try:
92 | ele = wait.until(
93 | EC.presence_of_element_located((By.XPATH, '//*[@id="search_form_2"]/form/input[1]'))).clear()
94 | except ElementNotInteractableException:
95 | wait.until(
96 | EC.presence_of_element_located((By.XPATH, '//*[@id="no_search_html"]/div/div[2]/a'))).click()
97 | time.sleep(2)
98 | wait.until(EC.presence_of_element_located((By.XPATH, '//*[@id="search_form_1"]/form/i'))).click()
99 | ele = wait.until(EC.presence_of_element_located((By.XPATH, '//*[@id="search_form_1"]/form/input[1]')))
100 | ele.send_keys("航锦科技股份有限公司")
101 | btn = wait.until(
102 | EC.presence_of_element_located((By.XPATH, '//*[@id="search_form_1"]/form/input[2]'))).click()
103 | ele = wait.until(
104 | EC.presence_of_element_located((By.XPATH, '//*[@id="search_form_2"]/form/input[1]'))).clear()
105 | time.sleep(2)
106 | print(com + "完成")
107 | continue
108 | time.sleep(random.randint(1, 2))
109 | print(com + "完成")
--------------------------------------------------------------------------------
/docs/scripts/_category_.json:
--------------------------------------------------------------------------------
1 | {
2 | "label": "脚本集",
3 | "position": 5,
4 | "link": {
5 | "type": "generated-index"
6 | }
7 | }
8 |
--------------------------------------------------------------------------------
/docs/scripts/auto_ocr_framework.md:
--------------------------------------------------------------------------------
1 | # 自动化识别框架
2 |
3 | 这是在帮助我的好朋友实现 `抖店自动发送消息` 的时候实现的一个框架,本来很简陋,然后被我硬生生的拿来做了一个简单的自动化识别框架,现在手游一开刷个牙回来就刷完了
4 |
5 | ## 技术栈
6 |
7 | - pandas
8 | - cv2
9 | - pyautogui
10 | - functools
11 |
12 | ## 常用的几个方法
13 |
14 | - `click_image` 点击图片
15 | - `click_image_until_another_appears` 点击图片直到下一个图片出现
16 | - `click_image_sequence` 点击一系列图片,可以传入 List
17 | - `type_text` 输入文字
18 | - `screenshot_and_click` OCR后找到指定文字点击
19 | - `process_screenshot_for_ocr` OCR图片得到数据
20 |
21 | ## 心理测试自动化逻辑(招聘用)
22 |
23 | @todo 主要是有图片,有时间单独开一个仓库公开代码
24 |
25 | ## 物华弥新自动化逻辑
26 |
27 | @todo 主要是有图片,有时间单独开一个仓库公开代码
28 |
29 | ## 框架代码
30 |
31 | ```python
32 | import hashlib
33 | import math
34 | import pandas as pd
35 | import pyperclip
36 | import requests
37 | import base64
38 | import cv2
39 | import pyautogui
40 | import time
41 | import random
42 | import numpy as np
43 | import logging
44 | from io import BytesIO
45 | from functools import wraps
46 | from functools import lru_cache
47 | from config import SCREENSHOT_REGION
48 |
49 | logging.basicConfig(level=logging.INFO)
50 |
51 |
52 | def retry_on_failure(retries=3, delay=1):
53 | """
54 | 装饰器,用于在函数失败时重试
55 | :param retries: 重试次数
56 | :param delay: 重试间隔时间
57 | """
58 |
59 | def decorator(func):
60 | @wraps(func)
61 | def wrapper(*args, **kwargs):
62 | for attempt in range(retries):
63 | result = func(*args, **kwargs)
64 | if result:
65 | return result
66 | logging.warning(f"尝试 {attempt + 1} 失败,正在重试...")
67 | time.sleep(delay)
68 | logging.error(f"所有 {retries} 次尝试失败。")
69 | return None
70 |
71 | return wrapper
72 |
73 | return decorator
74 |
75 |
76 | class AutomationTool:
77 | # 定义常量
78 | LEFT = 'left'
79 | RIGHT = 'right'
80 | FULL = 'full'
81 |
82 | # 缓存最近一次的截图和 OCR 结果
83 | _last_screenshot = None
84 | _last_screenshot_time = 0
85 | _last_screenshot_hash = None
86 | _last_ocr_result = None
87 | _screenshot_cache_duration = 1 # 缓存持续时间(秒)
88 |
89 | UMI_OCR_URL = "http://127.0.0.1:1224/api/ocr"
90 |
91 | @staticmethod
92 | @lru_cache(maxsize=None)
93 | def read_excel(excel_path, usecols="A") -> pd.DataFrame:
94 | """
95 | 读取Excel文件中指定的列数据
96 | :param excel_path: Excel文件路径
97 | :param usecols: 要读取的列(默认读取第A列)
98 | :return: 包含指定列数据的DataFrame
99 | """
100 | df = pd.read_excel(excel_path, usecols=usecols)
101 | return df
102 |
103 | @staticmethod
104 | def ocr_image(base64_image_data):
105 | """
106 | 发送HTTP请求到Umi-OCR
107 | :param base64_image_data:
108 | :return:
109 | """
110 | try:
111 | response = requests.post(AutomationTool.UMI_OCR_URL, json={ "base64": base64_image_data })
112 | response.raise_for_status()
113 | return response.json()
114 | except requests.RequestException as e:
115 | logging.error(f"OCR请求失败: {e}")
116 | return None
117 |
118 | @staticmethod
119 | def capture_screenshot():
120 | """
121 | 截取屏幕并返回PIL格式的图像
122 | :return:
123 | """
124 | # 如果SCREENSHOT_REGION为空
125 | region = SCREENSHOT_REGION if SCREENSHOT_REGION else AutomationTool.FULL
126 | # 判断SCREENSHOT_REGION(截图是左半部分还是全屏幕)
127 | if region == AutomationTool.LEFT:
128 | logging.info("截取屏幕的左半部分")
129 | return AutomationTool.capture_screenshot_half(AutomationTool.LEFT)
130 | elif region == AutomationTool.RIGHT:
131 | logging.info("截取屏幕的右半部分")
132 | return AutomationTool.capture_screenshot_half(AutomationTool.RIGHT)
133 | elif region == AutomationTool.FULL:
134 | logging.info("截取整个屏幕")
135 | return pyautogui.screenshot()
136 | else:
137 | raise ValueError("截图区域无效。请使用'left'、'right'或'full'")
138 |
139 | @staticmethod
140 | def capture_screenshot_half(side=LEFT):
141 | """
142 | 截取屏幕的左半部分或右半部分
143 | :param side: 'left' 或 'right',默认为 'left'
144 | :return: 截取的图像
145 | """
146 | # 获取屏幕的宽度和高度
147 | screen_width, screen_height = pyautogui.size()
148 | # 截取整个屏幕
149 | screenshot = pyautogui.screenshot()
150 | # 计算宽度的 73%
151 | width_73_percent = int(screen_width * 0.73)
152 |
153 | if side == AutomationTool.LEFT:
154 | # 裁剪出左半部分
155 | half = screenshot.crop((0, 0, width_73_percent, screen_height))
156 | elif side == AutomationTool.RIGHT:
157 | # 裁剪出右半部分
158 | half = screenshot.crop((screen_width - width_73_percent, 0, screen_width, screen_height))
159 | else:
160 | raise ValueError("无效的side参数。请使用'left'或'right'")
161 |
162 | # half.save(f"{side}_region.png") # 调试使用
163 | return half
164 |
165 | @staticmethod
166 | def convert_image_to_base64(pil_image) -> str:
167 | """
168 | 将PIL格式图像转换为Base64编码
169 | :param pil_image:
170 | :return:
171 | """
172 | buffered = BytesIO()
173 | pil_image.save(buffered, format="PNG")
174 | img_base64 = base64.b64encode(buffered.getvalue()).decode("utf-8")
175 | return img_base64
176 |
177 | @staticmethod
178 | def convert_image_to_opencv(pil_image):
179 | """
180 | 将PIL格式图像转换为OpenCV格式
181 | :param pil_image:
182 | :return:
183 | """
184 | np_image = np.array(pil_image)
185 | return cv2.cvtColor(np_image, cv2.COLOR_RGB2BGR)
186 |
187 | @staticmethod
188 | def extract_text_in_box(ocr_data, x1, y1, x2, y2):
189 | """
190 | 提取给定坐标框内的文字。
191 | :param ocr_data: OCR 结果数据
192 | :param x1, y1, x2, y2: 指定的坐标框 (左上角 x1, y1 和 右下角 x2, y2)
193 | :return: 识别到的文字
194 | """
195 | for item in ocr_data['data']:
196 | box = item['box']
197 | text = item['text']
198 | x_min = min([point[0] for point in box])
199 | y_min = min([point[1] for point in box])
200 | x_max = max([point[0] for point in box])
201 | y_max = max([point[1] for point in box])
202 | # 判断 box 是否在指定范围内
203 | if x_min >= x1 and y_min >= y1 and x_max <= x2 and y_max <= y2:
204 | return text
205 | return None
206 |
207 | @staticmethod
208 | def click_on_text(ocr_data, target_text):
209 | """
210 | 根据识别到的文字,移动鼠标并点击目标文字的位置
211 | :param ocr_data:
212 | :param target_text:
213 | :return:
214 | """
215 | for item in ocr_data['data']:
216 | text = item['text']
217 | if target_text in text:
218 | box = item['box']
219 | x_min = min([point[0] for point in box])
220 | y_min = min([point[1] for point in box])
221 | x_max = max([point[0] for point in box])
222 | y_max = max([point[1] for point in box])
223 | # 计算中心位置并添加随机偏移
224 | center_x = (x_min + x_max) // 2 + AutomationTool.human_like_offset()
225 | center_y = (y_min + y_max) // 2 + AutomationTool.human_like_offset()
226 | # 获取当前鼠标位置
227 | current_x, current_y = pyautogui.position()
228 | # 模拟人类鼠标移动
229 | AutomationTool.move_mouse_smoothly((current_x, current_y), (center_x, center_y))
230 | # 等待随机时间
231 | time.sleep(AutomationTool.human_like_delay())
232 | # 点击
233 | pyautogui.click()
234 | logging.info(f"点击了文字: {text}, 位置: {center_x}, {center_y}")
235 | return True
236 | logging.warning(f"未找到目标文字: {target_text}")
237 | return False
238 |
239 | @staticmethod
240 | def type_text(input_text):
241 | """
242 | 像粘贴一样在当前焦点输入框中快速输入指定文字
243 | :param input_text:
244 | :return:
245 | """
246 | try:
247 | # 将文本复制到剪贴板
248 | pyperclip.copy(str(input_text))
249 | # 模拟 Ctrl + V 粘贴(Windows/Linux),或者 Command + V(macOS)
250 | pyautogui.hotkey('ctrl', 'v')
251 | except Exception as e:
252 | logging.error(f"输入文字失败: {e}")
253 |
254 | @staticmethod
255 | @retry_on_failure(retries=3, delay=1)
256 | def screenshot_and_click(target_text):
257 | """
258 | 截图并点击指定文字
259 | :param target_text:
260 | :return:
261 | """
262 | ocr_data = AutomationTool.process_screenshot_for_ocr()
263 | if ocr_data:
264 | # 根据目标文字进行点击
265 | clicked = AutomationTool.click_on_text(ocr_data, target_text)
266 | if clicked:
267 | time.sleep(1)
268 | logging.info(f"成功点击目标文字: {target_text}")
269 | return True
270 | else:
271 | logging.warning(f"未找到目标文字: {target_text}")
272 | return False
273 |
274 | @staticmethod
275 | def find_text_in_screen(target_text: str) -> bool:
276 | """
277 | 截图并判断是否存在某个文字
278 | :param target_text:
279 | :return:
280 | """
281 | ocr_data = AutomationTool.process_screenshot_for_ocr()
282 | if ocr_data:
283 | # 遍历所有识别到的文字,判断是否已经包含了发送的消息
284 | for item in ocr_data['data']:
285 | if target_text in item['text']:
286 | logging.info(f"找到相应目标文字:{target_text}")
287 | return True
288 | logging.warning(f"未找到相应目标文字:{target_text}")
289 | return False
290 |
291 | @staticmethod
292 | def find_image_in_screenshot(template_path, threshold=0.8):
293 | """
294 | 在屏幕截图中查找给定图片模板(使用灰度图)
295 | :param template_path:
296 | :param threshold: 匹配阈值
297 | :return:
298 | """
299 | screenshot = AutomationTool.capture_screenshot()
300 | screenshot_cv = AutomationTool.convert_image_to_opencv(screenshot)
301 | # 转换为灰度图
302 | screenshot_gray = cv2.cvtColor(screenshot_cv, cv2.COLOR_BGR2GRAY)
303 | # 读取模板图片并转换为灰度图
304 | template = cv2.imread(template_path, cv2.IMREAD_GRAYSCALE)
305 | if template is None:
306 | logging.error(f"无法读取模板图片:{template_path}")
307 | return None
308 | # 获取模板的宽高
309 | h, w = template.shape[:2]
310 | # 使用模板匹配查找模板
311 | res = cv2.matchTemplate(screenshot_gray, template, cv2.TM_CCOEFF_NORMED)
312 | # 获取最佳匹配位置
313 | min_val, max_val, min_loc, max_loc = cv2.minMaxLoc(res)
314 | if max_val > threshold:
315 | top_left = max_loc
316 | center_x = top_left[0] + w // 2
317 | center_y = top_left[1] + h // 2
318 | return center_x, center_y
319 | else:
320 | logging.info("未找到匹配的图片")
321 | return None
322 |
323 | @staticmethod
324 | @retry_on_failure(retries=3, delay=2)
325 | def click_image(template_path):
326 | """
327 | 在屏幕上查找图片并点击
328 | :param template_path:
329 | :return:
330 | """
331 | position = AutomationTool.find_image_in_screenshot(template_path)
332 | logging.info(f"图片位置: {position}")
333 | if position:
334 | # 获取当前鼠标位置
335 | current_x, current_y = pyautogui.position()
336 | # 移动鼠标到目标位置
337 | AutomationTool.move_mouse_smoothly((current_x, current_y), position, duration=0.3)
338 | # 等待随机时间
339 | time.sleep(AutomationTool.human_like_delay())
340 | # 点击
341 | pyautogui.click()
342 | logging.info(f"点击了图片位置: {position}")
343 | return True
344 | else:
345 | logging.warning(f"未找到图片: {template_path}")
346 | return False
347 |
348 | @staticmethod
349 | def click_image_until_another_appears(click_image_path, stop_image_path, max_attempts=10, delay_between_clicks=1):
350 | """
351 | 持续点击一个图片,直到另一个图片出现为止。
352 |
353 | :param click_image_path: 要点击的图片路径
354 | :param stop_image_path: 出现后停止点击的图片路径
355 | :param max_attempts: 最大尝试次数
356 | :param delay_between_clicks: 每次点击之间的延迟(秒)
357 | :return: 如果成功找到停止图片返回True,否则返回False
358 | """
359 | for attempt in range(max_attempts):
360 | # 检查停止图片是否出现
361 | if AutomationTool.find_image_in_screenshot(stop_image_path):
362 | logging.info(f"找到停止图片: {stop_image_path}")
363 | return True
364 |
365 | # 点击指定图片
366 | AutomationTool.click_image(click_image_path)
367 | logging.info(f"点击图片: {click_image_path},尝试次数: {attempt + 1}")
368 |
369 | # 等待指定时间
370 | time.sleep(delay_between_clicks)
371 |
372 | logging.warning(f"达到最大尝试次数 {max_attempts},未找到停止图片: {stop_image_path}")
373 | return False
374 |
375 | @staticmethod
376 | def process_screenshot_for_ocr():
377 | """
378 | 截取屏幕并进行 OCR 处理,使用缓存优化
379 | :return: OCR 数据
380 | """
381 | current_time = time.time()
382 | # 检查缓存是否有效
383 | if AutomationTool._last_screenshot is not None:
384 | if current_time - AutomationTool._last_screenshot_time < AutomationTool._screenshot_cache_duration:
385 | logging.info("使用缓存的 OCR 结果")
386 | return AutomationTool._last_ocr_result
387 |
388 | # 截取屏幕
389 | image = AutomationTool.capture_screenshot()
390 | # 计算截图的哈希值
391 | image_hash = AutomationTool._calculate_image_hash(image)
392 |
393 | # 如果截图内容未变化,直接返回缓存的 OCR 结果
394 | if AutomationTool._last_screenshot_hash == image_hash:
395 | logging.info("截图内容未变化,使用缓存的 OCR 结果")
396 | AutomationTool._last_screenshot_time = current_time
397 | return AutomationTool._last_ocr_result
398 |
399 | # 更新缓存
400 | AutomationTool._last_screenshot = image
401 | AutomationTool._last_screenshot_hash = image_hash
402 | AutomationTool._last_screenshot_time = current_time
403 |
404 | # 进行 OCR 识别
405 | image_base64 = AutomationTool.convert_image_to_base64(image)
406 | ocr_result = AutomationTool.ocr_image(image_base64)
407 |
408 | # 缓存 OCR 结果
409 | AutomationTool._last_ocr_result = ocr_result
410 |
411 | return ocr_result
412 |
413 | @staticmethod
414 | def _calculate_image_hash(image):
415 | """
416 | 计算图像的哈希值
417 | :param image: PIL 图像
418 | :return: 哈希值字符串
419 | """
420 | buffered = BytesIO()
421 | image.save(buffered, format="PNG")
422 | image_bytes = buffered.getvalue()
423 | return hashlib.md5(image_bytes).hexdigest()
424 |
425 | @staticmethod
426 | def click_image_sequence(image_paths, delay_between=1, max_wait=10):
427 | """
428 | 按顺序识别并点击一系列图片。
429 | :param image_paths: 图片路径列表
430 | :param delay_between: 每次尝试之间的延迟
431 | :param max_wait: 等待第二张图片出现的最大时间(秒)
432 | :return: 如果成功点击所有图片返回True,否则返回False
433 | """
434 | for image_path in image_paths:
435 | start_time = time.time()
436 | while True:
437 | if AutomationTool.click_image(image_path):
438 | break
439 | if time.time() - start_time > max_wait:
440 | logging.warning(f"未能在规定时间内找到图片: {image_path}")
441 | return False
442 | time.sleep(delay_between)
443 | logging.info("成功点击所有图片")
444 | return True
445 |
446 | @staticmethod
447 | def move_and_swipe_with_hold(image_path, swipe_distance=200, direction='right', duration=0.5, button='left'):
448 | """
449 | 将鼠标移动到图片的位置,然后向右滑动指定的距离。
450 |
451 | :param image_path: 图片的位置
452 | :param swipe_distance: 向右滑动的距离(像素)
453 | :param direction: 滑动的方向,可以是 'right', 'left', 'top', 'bottom'
454 | :param duration: 移动和滑动的持续时间(秒),
455 | :param button: 按住的鼠标按钮,可以是 'left', 'right', 'middle'
456 | """
457 | # 移动到目标位置
458 | position = AutomationTool.find_image_in_screenshot(image_path, 0.7)
459 | if position is None:
460 | logging.error(f"未找到图片:{image_path}")
461 | return False
462 | x, y = position
463 |
464 | # 获取当前鼠标位置
465 | current_x, current_y = pyautogui.position()
466 | # 移动鼠标到图片位置
467 | AutomationTool.move_mouse_smoothly((current_x, current_y), (x, y), duration=0.3)
468 |
469 | # 等待一段时间,确保鼠标已经移动到目标位置
470 | time.sleep(AutomationTool.human_like_delay())
471 |
472 | # 按下鼠标按钮
473 | pyautogui.mouseDown(button=button)
474 |
475 | # 根据方向参数计算目标位置
476 | if direction == 'right':
477 | target_x = x + swipe_distance
478 | target_y = y
479 | elif direction == 'left':
480 | target_x = x - swipe_distance
481 | target_y = y
482 | elif direction == 'top':
483 | target_x = x
484 | target_y = y - swipe_distance
485 | elif direction == 'bottom':
486 | target_x = x
487 | target_y = y + swipe_distance
488 | else:
489 | raise ValueError("Invalid direction. Use 'right', 'left', 'top', or 'bottom'.")
490 |
491 | # 按住鼠标并滑动到目标位置
492 | AutomationTool.move_mouse_smoothly((x, y), (target_x, target_y), duration=duration, hold_button=button)
493 | # 等待随机时间
494 | time.sleep(AutomationTool.human_like_delay())
495 |
496 | logging.info(f"从位置 ({x}, {y}) 滑动到 ({target_x}, {target_y}),方向:{direction}")
497 | return True
498 |
499 | @staticmethod
500 | def press_enter():
501 | """
502 | 按下回车键
503 | """
504 | pyautogui.press('enter')
505 |
506 | @staticmethod
507 | def press_esc():
508 | """
509 | 按下esc键
510 | """
511 | pyautogui.press('esc')
512 |
513 | @staticmethod
514 | def human_like_delay(min_delay=0.1, max_delay=0.3):
515 | """
516 | 返回一个介于 min_delay 和 max_delay 之间的随机等待时间
517 | """
518 | return random.uniform(min_delay, max_delay)
519 |
520 | @staticmethod
521 | def human_like_offset(offset_range=2):
522 | """
523 | 返回一个在 -offset_range 到 offset_range 之间的随机偏移
524 | """
525 | return random.randint(-offset_range, offset_range)
526 |
527 | @staticmethod
528 | def move_mouse_smoothly(start_pos, end_pos, duration=0.5, hold_button=None):
529 | """
530 | 模拟人类的鼠标移动,使用 pyautogui 的 tween 函数
531 | :param start_pos: 起始位置 (x, y)
532 | :param end_pos: 结束位置 (x, y)
533 | :param duration: 总持续时间(秒)
534 | :param hold_button: 如果需要在移动过程中按住鼠标按钮,可以指定 'left', 'right', 'middle'
535 | """
536 | # 添加随机偏移到结束位置
537 | offset_x = AutomationTool.human_like_offset()
538 | offset_y = AutomationTool.human_like_offset()
539 | end_pos = (end_pos[0] + offset_x, end_pos[1] + offset_y)
540 |
541 | # 随机选择一个缓动函数
542 | tween_funcs = [
543 | pyautogui.easeInQuad,
544 | pyautogui.easeOutQuad,
545 | pyautogui.easeInOutQuad,
546 | pyautogui.easeInBounce,
547 | pyautogui.easeOutBounce,
548 | pyautogui.easeInElastic,
549 | pyautogui.easeOutElastic
550 | ]
551 | tween_func = random.choice(tween_funcs)
552 |
553 | # 按下鼠标按钮(如果需要)
554 | if hold_button:
555 | pyautogui.mouseDown(button=hold_button)
556 |
557 | # 使用 pyautogui 的 moveTo 函数,指定持续时间和缓动函数
558 | pyautogui.moveTo(end_pos[0], end_pos[1], duration=duration, tween=tween_func)
559 |
560 | # 释放鼠标按钮(如果需要)
561 | if hold_button:
562 | pyautogui.mouseUp(button=hold_button)
563 |
564 | def custom_tween(x):
565 | """
566 | 自定义缓动函数,可以调整 x 的幂次来控制速度曲线
567 | """
568 | return x ** 2 # 或者其他数学函数
569 |
570 | @staticmethod
571 | def _bezier_curve(points, n=50):
572 | """
573 | 生成贝塞尔曲线的点集
574 | :param points: 控制点列表
575 | :param n: 点的数量
576 | :return: 点的列表
577 | """
578 | result = []
579 | for i in range(n + 1):
580 | t = i / n
581 | x = 0
582 | y = 0
583 | n_points = len(points)
584 | for j, (px, py) in enumerate(points):
585 | bernstein = AutomationTool._bernstein_poly(j, n_points - 1, t)
586 | x += px * bernstein
587 | y += py * bernstein
588 | result.append((x, y))
589 | return result
590 |
591 | @staticmethod
592 | def _bernstein_poly(i, n, t):
593 | """
594 | 计算伯恩斯坦多项式值
595 | """
596 | return math.comb(n, i) * (t ** i) * ((1 - t) ** (n - i))
597 |
598 | ```
599 |
--------------------------------------------------------------------------------
/docs/scripts/feige_export.md:
--------------------------------------------------------------------------------
1 | # 飞鸽知识库导出
2 |
3 | 这个需求是帮助我的好朋友导出飞鸽(来自于抖店机器人)的知识库数据,界面相当复杂,每天0点数据变化还会导致 xpath 发生变化,如果下面的脚本失效更改 xpath 即可解决
4 |
5 | ## 技术栈
6 | - pandas
7 | - selenium
8 |
9 | ## 大致逻辑介绍
10 |
11 | ### 文件系统
12 |
13 | 启动&主要逻辑 -- main.py
14 | Excel操作 -- excel_io.py
15 | 实例化对象 -- knowledge.py
16 | 关闭Edge,方便selenium -- shutdown_edge.bat
17 | 一些工具 -- util.py
18 |
19 | ### 逻辑
20 |
21 | 1. 下载驱动 && 找到 Edge 所在的位置
22 |
23 | 本次使用的是 Edge 浏览器
24 | > https://developer.microsoft.com/en-us/microsoft-edge/tools/webdriver/?form=MA13LH
25 |
26 | 大部分直接安装都会在:
27 | > C:\Program Files (x86)\Microsoft\Edge\Application\msedge.exe
28 |
29 | 2. 首先使用默认用户的数据,尝试过携带 Cookie 会失效
30 |
31 | ```python
32 | brave_path = r"C:\Program Files (x86)\Microsoft\Edge\Application\msedge.exe"
33 | options = webdriver.EdgeOptions()
34 | # options.binary_location = brave_path
35 | # 加载已有的用户数据目录
36 | options.add_argument(r"--user-data-dir=C:\Users\Administrator\AppData\Local\Microsoft\Edge\User Data")
37 | options.add_argument(r"--profile-directory=Default") # 加载默认的用户配置文件
38 |
39 | service = webdriver.EdgeService(executable_path=r"C:\Users\Administrator\Desktop\Migration\msedgedriver.exe")
40 |
41 | driver = webdriver.Edge(options=options, service=service)
42 | ```
43 |
44 | 3. 打开飞鸽的知识库
45 | ```python
46 | driver.get('https://im.jinritemai.com/pc_seller_v2/main/setting/robot/knowledge')
47 | ```
48 |
49 | 4. 一些主要逻辑
50 |
51 | - a. 获取主要分类
52 | - b. 点击分类
53 | - c. 找到二级分类
54 | - d. 点击自定义知识
55 | - e. 在二级分类下找到所有数据 -- 找到页码(计算) -- 找到下一页(跳转) -- 点击每页显示100条数据(最大获取)
56 | - f. 在显示100条数据这里考虑如果不存在的一些情况,以及页面为空的情况
57 | - g. 计算分页逻辑,计算的公式:`total_pages = (total_data_count + page_size - 1) // page_size`
58 | - h. 然后就开始分页的每一页循环,最大为分页逻辑计算出来的最大页码
59 | - i. 获取每一个知识点的信息 -> 转换为 Knowledge 实体 -> 保存到 list -> 保存到excel
60 | - j. excel 每进行 20 次数据增量,就备份一次,一是防止数据丢失,二是方便我的好朋友进行观测
61 | - k. 等待完成
62 |
63 | ## 效果
64 |
65 | 已经落地实现,由于商业问题所以不公开任何数据
66 |
67 | 
68 |
69 | [点击查看代码](https://github.com/zhiyu1998/feige_knowledge_export)
70 |
--------------------------------------------------------------------------------
/docs/scripts/images/ref-1-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhiyu1998/Python-Basis-Notes/2be71810a0eaa7624451ab00fe1c4dec73e6dc58/docs/scripts/images/ref-1-1.png
--------------------------------------------------------------------------------
/docs/scripts/images/ref-1-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhiyu1998/Python-Basis-Notes/2be71810a0eaa7624451ab00fe1c4dec73e6dc58/docs/scripts/images/ref-1-2.png
--------------------------------------------------------------------------------
/docs/scripts/images/ref-3-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhiyu1998/Python-Basis-Notes/2be71810a0eaa7624451ab00fe1c4dec73e6dc58/docs/scripts/images/ref-3-1.png
--------------------------------------------------------------------------------
/docs/scripts/letpub.md:
--------------------------------------------------------------------------------
1 | # 提取论文Reference的期刊/会议信息
2 | > 学术性的脚本基本来自我导师,因为不想搬砖,就让python替我搬砖吧!
3 |
4 | **使用说明**
5 |
6 | 需求:
7 |
8 | ● 参考论文的期刊
9 |
10 | ● 筛选有没有周期短的期刊
11 |
12 | overleaf参考论文示例:
13 |
14 | > 每个之间要有空格
15 |
16 |
17 |
18 | 
19 |
20 | ```
21 | @article{yu2019review,
22 | title={A review of recurrent neural networks: LSTM cells and network architectures},
23 | author={Yu, Yong and Si, Xiaosheng and Hu, Changhua and Zhang, Jianxun},
24 | journal={Neural computation},
25 | volume={31},
26 | number={7},
27 | pages={1235--1270},
28 | year={2019},
29 | publisher={MIT Press One Rogers Street, Cambridge, MA 02142-1209, USA journals-info~…}
30 | }
31 |
32 | @article{huang2015bidirectional,
33 | title={Bidirectional LSTM-CRF models for sequence tagging},
34 | author={Huang, Zhiheng and Xu, Wei and Yu, Kai},
35 | journal={arXiv preprint arXiv:1508.01991},
36 | year={2015}
37 | }
38 |
39 | @inproceedings{sundermeyer2012lstm,
40 | title={LSTM neural networks for language modeling},
41 | author={Sundermeyer, Martin and Schl{\"u}ter, Ralf and Ney, Hermann},
42 | booktitle={Thirteenth annual conference of the international speech communication association},
43 | year={2012}
44 | }
45 | ```
46 |
47 | 要求安装库:
48 |
49 | ```properties
50 | pandas~=1.5.1
51 | requests~=2.28.1
52 | beautifulsoup4~=4.11.1
53 | lxml~=4.9.1
54 | openpyxl~=3.0.10
55 | ```
56 |
57 | 创建一个python脚本:
58 |
59 | > 把源码放入到和ref.bib同级位置的地方
60 |
61 | [查看详细代码](1-extra_letpub.py)
62 |
63 | 结果示例:
64 |
65 | 
66 |
67 | #### 版本更新说明
68 |
69 | 1.0
70 |
71 | - 2022年11月22日 12点22分 增加日志、修复部分问题
72 | - 2022年11月22日 00点17分 第一次初始化脚本
--------------------------------------------------------------------------------
/docs/scripts/syntaogf.md:
--------------------------------------------------------------------------------
1 | # 采集中国企业ESG评级
2 | 这个需求是帮助我的好朋友采集中国企业的ESG评级数据,这个数据是从一个网站上采集的,网站的地址是:[http://www.esgchina.org/](http://www.esgchina.org/)。
3 |
4 | ## 技术栈
5 | - pandas
6 | - selenium
7 |
8 | ## 逻辑
9 | 大致逻辑是打开网页搜索把数据放到Excel中,并且支持断开后下次还能继续采集。
10 |
11 | 1. 首先你要找到你要采集的Excel有哪些数据
12 | ```python
13 | # pandas读取数据
14 | my_excel = pd.read_excel("./data.xls")
15 | company_names = my_excel.iloc[:, 2]
16 | ```
17 | 2. 断开后继续开始的位置
18 | ```python
19 | # TODO 从第n条数据开始爬
20 | # selenium爬取数据
21 | n = 0
22 | ```
23 | 3. 下载驱动后写一下驱动的位置
24 | - 驱动(要和浏览器版本相同):https://developer.microsoft.com/en-us/microsoft-edge/tools/webdriver/?form=MA13LH
25 | ```python
26 | options = webdriver.EdgeOptions()
27 | options.add_argument('--headless')
28 | options.add_argument('--disable-animations')
29 | # TODO 添加浏览器引擎,例如:C:\\Users\\Administrator\\Documents\\PythonWorkSpace\\Test\\msedgedriver.exe
30 | s = Service(r"")
31 | ```
32 | 4. 开始采集数据
33 |
34 | [点击查看代码](2-extra_syntaogf.py)
--------------------------------------------------------------------------------
/docusaurus.config.ts:
--------------------------------------------------------------------------------
1 | import {themes as prismThemes} from 'prism-react-renderer';
2 | import type {Config} from '@docusaurus/types';
3 | import type * as Preset from '@docusaurus/preset-classic';
4 |
5 | const config: Config = {
6 | title: '🐍 Python-Basis-Notes',
7 | tagline: '你的Python入门好帮手:一份包含了Python基础学习需要的知识框架 🐍 + 爬虫基础 🕷️ + numpy基础 📊 + pandas基础 🐼 + 深度学习 🍥 + 脚本库 📚',
8 | favicon: 'img/favicon.ico',
9 |
10 | // Set the production url of your site here
11 | url: 'https://zhiyu1998.github.io',
12 | // Set the // pathname under which your site is served
13 | // For GitHub pages deployment, it is often '//'
14 | baseUrl: '/Python-Basis-Notes/',
15 |
16 | // GitHub pages deployment config.
17 | // If you aren't using GitHub pages, you don't need these.
18 | organizationName: 'facebook', // Usually your GitHub org/user name.
19 | projectName: 'docusaurus', // Usually your repo name.
20 |
21 | onBrokenLinks: 'throw',
22 | onBrokenMarkdownLinks: 'warn',
23 |
24 | // Even if you don't use internationalization, you can use this field to set
25 | // useful metadata like html lang. For example, if your site is Chinese, you
26 | // may want to replace "en" with "zh-Hans".
27 | i18n: {
28 | defaultLocale: 'en',
29 | locales: ['en'],
30 | },
31 |
32 | presets: [
33 | [
34 | 'classic',
35 | {
36 | docs: {
37 | sidebarPath: './sidebars.ts',
38 | // Please change this to your repo.
39 | // Remove this to remove the "edit this page" links.
40 | editUrl:
41 | 'https://github.com/facebook/docusaurus/tree/main/packages/create-docusaurus/templates/shared/',
42 | },
43 | blog: {
44 | showReadingTime: true,
45 | // Please change this to your repo.
46 | // Remove this to remove the "edit this page" links.
47 | editUrl:
48 | 'https://github.com/facebook/docusaurus/tree/main/packages/create-docusaurus/templates/shared/',
49 | },
50 | theme: {
51 | customCss: './src/css/custom.css',
52 | },
53 | } satisfies Preset.Options,
54 | ],
55 | ],
56 |
57 | themeConfig: {
58 | // Replace with your project's social card
59 | image: 'img/docusaurus-social-card.jpg',
60 | navbar: {
61 | title: 'Python-Basis-Notes',
62 | logo: {
63 | alt: 'My Site Logo',
64 | src: 'img/logo.svg',
65 | },
66 | items: [
67 | {
68 | type: 'docSidebar',
69 | sidebarId: 'tutorialSidebar',
70 | position: 'left',
71 | label: '文档',
72 | },
73 | // {to: '/blog', label: 'Blog', position: 'left'},
74 | {
75 | href: 'https://github.com/zhiyu1998/Python-Basis-Notes',
76 | label: 'GitHub',
77 | position: 'right',
78 | },
79 | ],
80 | },
81 | footer: {
82 | style: 'dark',
83 | links: [
84 | {
85 | title: '文档',
86 | items: [
87 | {
88 | label: '点击进入',
89 | to: '/docs/intro',
90 | },
91 | ],
92 | },
93 | {
94 | title: '另一个文档',
95 | items: [
96 | {
97 | label: 'Java基础',
98 | href: 'https://zhiyu1998.github.io/Computer-Science-Learn-Notes/Java/basic/basic.html',
99 | },
100 | {
101 | label: 'Java大厂面试',
102 | href: 'https://zhiyu1998.github.io/Computer-Science-Learn-Notes/Java/eightpart/giant.html',
103 | },
104 | ],
105 | },
106 | {
107 | title: '更多',
108 | items: [
109 | // {
110 | // label: '博客(未来)',
111 | // to: '/blog',
112 | // },
113 | {
114 | label: 'GitHub',
115 | href: 'https://github.com/zhiyu1998/Python-Basis-Notes',
116 | },
117 | ],
118 | },
119 | ],
120 | copyright: `Copyright © ${new Date().getFullYear()} Python-Basis-Notes, Inc. Built with zhiyu1998.`,
121 | },
122 | prism: {
123 | theme: prismThemes.github,
124 | darkTheme: prismThemes.dracula,
125 | },
126 | } satisfies Preset.ThemeConfig,
127 | };
128 |
129 | export default config;
130 |
--------------------------------------------------------------------------------
/package.json:
--------------------------------------------------------------------------------
1 | {
2 | "name": "mine",
3 | "version": "0.0.0",
4 | "private": true,
5 | "scripts": {
6 | "docusaurus": "docusaurus",
7 | "start": "docusaurus start",
8 | "build": "docusaurus build",
9 | "swizzle": "docusaurus swizzle",
10 | "deploy": "docusaurus deploy",
11 | "clear": "docusaurus clear",
12 | "serve": "docusaurus serve",
13 | "write-translations": "docusaurus write-translations",
14 | "write-heading-ids": "docusaurus write-heading-ids",
15 | "typecheck": "tsc"
16 | },
17 | "dependencies": {
18 | "@docusaurus/core": "3.1.1",
19 | "@docusaurus/preset-classic": "3.1.1",
20 | "@mdx-js/react": "^3.0.0",
21 | "clsx": "^2.0.0",
22 | "prism-react-renderer": "^2.3.0",
23 | "react": "^18.0.0",
24 | "react-dom": "^18.0.0"
25 | },
26 | "devDependencies": {
27 | "@docusaurus/module-type-aliases": "3.1.1",
28 | "@docusaurus/tsconfig": "3.1.1",
29 | "@docusaurus/types": "3.1.1",
30 | "typescript": "~5.2.2"
31 | },
32 | "browserslist": {
33 | "production": [
34 | ">0.5%",
35 | "not dead",
36 | "not op_mini all"
37 | ],
38 | "development": [
39 | "last 3 chrome version",
40 | "last 3 firefox version",
41 | "last 5 safari version"
42 | ]
43 | },
44 | "engines": {
45 | "node": ">=18.0"
46 | }
47 | }
48 |
--------------------------------------------------------------------------------
/sidebars.ts:
--------------------------------------------------------------------------------
1 | import type {SidebarsConfig} from '@docusaurus/plugin-content-docs';
2 |
3 | /**
4 | * Creating a sidebar enables you to:
5 | - create an ordered group of docs
6 | - render a sidebar for each doc of that group
7 | - provide next/previous navigation
8 |
9 | The sidebars can be generated from the filesystem, or explicitly defined here.
10 |
11 | Create as many sidebars as you want.
12 | */
13 | const sidebars: SidebarsConfig = {
14 | // By default, Docusaurus generates a sidebar from the docs folder structure
15 | tutorialSidebar: [{type: 'autogenerated', dirName: '.'}],
16 |
17 | // But you can create a sidebar manually
18 | /*
19 | tutorialSidebar: [
20 | 'intro',
21 | 'hello',
22 | {
23 | type: 'category',
24 | label: 'Tutorial',
25 | items: ['basics/create-a-document'],
26 | },
27 | ],
28 | */
29 | };
30 |
31 | export default sidebars;
32 |
--------------------------------------------------------------------------------
/src/components/HomepageFeatures/index.tsx:
--------------------------------------------------------------------------------
1 | import clsx from 'clsx';
2 | import Heading from '@theme/Heading';
3 | import styles from './styles.module.css';
4 |
5 | type FeatureItem = {
6 | title: string;
7 | Svg: React.ComponentType>;
8 | description: JSX.Element;
9 | };
10 |
11 | const FeatureList: FeatureItem[] = [
12 | {
13 | title: 'Python基础 & 常用脚本',
14 | Svg: require('@site/static/img/undraw_docusaurus_mountain.svg').default,
15 | description: (
16 | <>
17 | Python思维详细的思维导图和积累多年的Python脚本,用到爽.
18 | >
19 | ),
20 | },
21 | {
22 | title: '数据分析',
23 | Svg: require('@site/static/img/undraw_docusaurus_tree.svg').default,
24 | description: (
25 | <>
26 | Numpy、Pandas学到起飞🛫
27 | >
28 | ),
29 | },
30 | {
31 | title: '深度学习',
32 | Svg: require('@site/static/img/undraw_docusaurus_react.svg').default,
33 | description: (
34 | <>
35 | 新手如何快速入门深度学习、图像识别、时序预测等。以及我的研究领域(图卷积神经网络)。
36 | >
37 | ),
38 | },
39 | ];
40 |
41 | function Feature({title, Svg, description}: FeatureItem) {
42 | return (
43 |
44 |
45 |
46 |
47 |
48 |
{title}
49 |
{description}
50 |
51 |
52 | );
53 | }
54 |
55 | export default function HomepageFeatures(): JSX.Element {
56 | return (
57 |
58 |
59 |
60 | {FeatureList.map((props, idx) => (
61 |
62 | ))}
63 |
64 |
65 |
66 | );
67 | }
68 |
--------------------------------------------------------------------------------
/src/components/HomepageFeatures/styles.module.css:
--------------------------------------------------------------------------------
1 | .features {
2 | display: flex;
3 | align-items: center;
4 | padding: 2rem 0;
5 | width: 100%;
6 | }
7 |
8 | .featureSvg {
9 | height: 200px;
10 | width: 200px;
11 | }
12 |
--------------------------------------------------------------------------------
/src/css/custom.css:
--------------------------------------------------------------------------------
1 | /**
2 | * Any CSS included here will be global. The classic template
3 | * bundles Infima by default. Infima is a CSS framework designed to
4 | * work well for content-centric websites.
5 | */
6 |
7 | /* You can override the default Infima variables here. */
8 | :root {
9 | --ifm-color-primary: #2e8555;
10 | --ifm-color-primary-dark: #29784c;
11 | --ifm-color-primary-darker: #277148;
12 | --ifm-color-primary-darkest: #205d3b;
13 | --ifm-color-primary-light: #33925d;
14 | --ifm-color-primary-lighter: #359962;
15 | --ifm-color-primary-lightest: #3cad6e;
16 | --ifm-code-font-size: 95%;
17 | --docusaurus-highlighted-code-line-bg: rgba(0, 0, 0, 0.1);
18 | }
19 |
20 | /* For readability concerns, you should choose a lighter palette in dark mode. */
21 | [data-theme='dark'] {
22 | --ifm-color-primary: #25c2a0;
23 | --ifm-color-primary-dark: #21af90;
24 | --ifm-color-primary-darker: #1fa588;
25 | --ifm-color-primary-darkest: #1a8870;
26 | --ifm-color-primary-light: #29d5b0;
27 | --ifm-color-primary-lighter: #32d8b4;
28 | --ifm-color-primary-lightest: #4fddbf;
29 | --docusaurus-highlighted-code-line-bg: rgba(0, 0, 0, 0.3);
30 | }
31 |
--------------------------------------------------------------------------------
/src/pages/index.module.css:
--------------------------------------------------------------------------------
1 | /**
2 | * CSS files with the .module.css suffix will be treated as CSS modules
3 | * and scoped locally.
4 | */
5 |
6 | .heroBanner {
7 | padding: 4rem 0;
8 | text-align: center;
9 | position: relative;
10 | overflow: hidden;
11 | }
12 |
13 | @media screen and (max-width: 996px) {
14 | .heroBanner {
15 | padding: 2rem;
16 | }
17 | }
18 |
19 | .buttons {
20 | display: flex;
21 | align-items: center;
22 | justify-content: center;
23 | }
24 |
--------------------------------------------------------------------------------
/src/pages/index.tsx:
--------------------------------------------------------------------------------
1 | import clsx from 'clsx';
2 | import Link from '@docusaurus/Link';
3 | import useDocusaurusContext from '@docusaurus/useDocusaurusContext';
4 | import Layout from '@theme/Layout';
5 | import HomepageFeatures from '@site/src/components/HomepageFeatures';
6 | import Heading from '@theme/Heading';
7 |
8 | import styles from './index.module.css';
9 |
10 | function HomepageHeader() {
11 | const {siteConfig} = useDocusaurusContext();
12 | return (
13 |
28 | );
29 | }
30 |
31 | export default function Home(): JSX.Element {
32 | const {siteConfig} = useDocusaurusContext();
33 | return (
34 |
37 |
38 |
39 |
40 |
41 |
42 | );
43 | }
44 |
--------------------------------------------------------------------------------
/src/pages/markdown-page.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: Markdown page example
3 | ---
4 |
5 | # Markdown page example
6 |
7 | You don't need React to write simple standalone pages.
8 |
--------------------------------------------------------------------------------
/static/.nojekyll:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhiyu1998/Python-Basis-Notes/2be71810a0eaa7624451ab00fe1c4dec73e6dc58/static/.nojekyll
--------------------------------------------------------------------------------
/static/img/docusaurus-social-card.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhiyu1998/Python-Basis-Notes/2be71810a0eaa7624451ab00fe1c4dec73e6dc58/static/img/docusaurus-social-card.jpg
--------------------------------------------------------------------------------
/static/img/docusaurus.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhiyu1998/Python-Basis-Notes/2be71810a0eaa7624451ab00fe1c4dec73e6dc58/static/img/docusaurus.png
--------------------------------------------------------------------------------
/static/img/favicon.ico:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhiyu1998/Python-Basis-Notes/2be71810a0eaa7624451ab00fe1c4dec73e6dc58/static/img/favicon.ico
--------------------------------------------------------------------------------
/static/img/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhiyu1998/Python-Basis-Notes/2be71810a0eaa7624451ab00fe1c4dec73e6dc58/static/img/logo.png
--------------------------------------------------------------------------------
/static/img/logo.svg:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/static/img/undraw_docusaurus_mountain.svg:
--------------------------------------------------------------------------------
1 |
172 |
--------------------------------------------------------------------------------
/static/img/undraw_docusaurus_tree.svg:
--------------------------------------------------------------------------------
1 |
41 |
--------------------------------------------------------------------------------
/tsconfig.json:
--------------------------------------------------------------------------------
1 | {
2 | // This file is not used in compilation. It is here just for a nice editor experience.
3 | "extends": "@docusaurus/tsconfig",
4 | "compilerOptions": {
5 | "baseUrl": "."
6 | }
7 | }
8 |
--------------------------------------------------------------------------------