├── .github └── workflows │ └── documentation.yml ├── .gitignore ├── LICENSE ├── README.md ├── babel.config.js ├── docs ├── basics │ ├── _category_.json │ ├── base-recommend.md │ ├── base.md │ └── images │ │ ├── QuickReference.png │ │ ├── QuickReference_Pytorch.png │ │ ├── clumsylegalpython.png │ │ ├── logo.png │ │ ├── 函数.png │ │ ├── 基础知识.png │ │ ├── 文件.png │ │ ├── 模块.png │ │ ├── 爬虫.png │ │ ├── 类和对象.png │ │ └── 魔法方法.png ├── data-analysis │ ├── _category_.json │ ├── files │ │ ├── 01-choice_data.ipynb │ │ ├── 02-setting-value.ipynb │ │ ├── 03-process-dropout-data.ipynb │ │ ├── 04-import-and-export.ipynb │ │ ├── 05-concat.ipynb │ │ ├── 06-merge.ipynb │ │ ├── 07-plot.ipynb │ │ └── student.csv │ ├── img │ │ ├── 3-1.png │ │ ├── 3-10.png │ │ ├── 3-11.png │ │ ├── 3-12.png │ │ ├── 3-2.png │ │ ├── 3-3.png │ │ ├── 3-4.png │ │ ├── 3-5.png │ │ ├── 3-6.png │ │ ├── 3-7.png │ │ ├── 3-8.png │ │ └── 3-9.png │ ├── numpy.md │ └── pandas.md ├── deeplearning │ ├── _category_.json │ ├── deeplearning.md │ ├── graph.mdx │ └── images │ │ └── pyg.png ├── intro.md └── scripts │ ├── 1-extra_letpub.py │ ├── 2-extra_syntaogf.py │ ├── _category_.json │ ├── auto_ocr_framework.md │ ├── feige_export.md │ ├── images │ ├── ref-1-1.png │ ├── ref-1-2.png │ └── ref-3-1.png │ ├── letpub.md │ └── syntaogf.md ├── docusaurus.config.ts ├── package-lock.json ├── package.json ├── sidebars.ts ├── src ├── components │ └── HomepageFeatures │ │ ├── index.tsx │ │ └── styles.module.css ├── css │ └── custom.css └── pages │ ├── index.module.css │ ├── index.tsx │ └── markdown-page.md ├── static ├── .nojekyll └── img │ ├── docusaurus-social-card.jpg │ ├── docusaurus.png │ ├── favicon.ico │ ├── logo.png │ ├── logo.svg │ ├── undraw_docusaurus_mountain.svg │ ├── undraw_docusaurus_react.svg │ └── undraw_docusaurus_tree.svg └── tsconfig.json /.github/workflows/documentation.yml: -------------------------------------------------------------------------------- 1 | name: 🚀Deploy Github pages 2 | on: 3 | push: 4 | branches: 5 | - master 6 | jobs: 7 | build-and-deploy: 8 | runs-on: ubuntu-latest 9 | steps: 10 | - name: Checkout 🛎️ 11 | uses: actions/checkout@v3 12 | 13 | - name: Install and Build 🔧 14 | run: | 15 | npm install 16 | npm run build 17 | 18 | - name: Deploy 🚀 19 | uses: JamesIves/github-pages-deploy-action@v4.3.3 20 | with: 21 | branch: gh-pages # The branch the action should deploy to. 22 | folder: build # The folder the action should deploy. -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Dependencies 2 | /node_modules 3 | 4 | # Production 5 | /build 6 | 7 | # Generated files 8 | .docusaurus 9 | .cache-loader 10 | .idea 11 | 12 | # Misc 13 | .DS_Store 14 | .env.local 15 | .env.development.local 16 | .env.test.local 17 | .env.production.local 18 | 19 | npm-debug.log* 20 | yarn-debug.log* 21 | yarn-error.log* 22 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2021 Zhiyu 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ## Python-Basis-Notes 2 | 3 |

4 | 5 | 6 | 7 |

8 | 9 |
10 | 11 | Python-Basis-Note 你的Python入门好帮手 12 | 13 | [![访问量](https://profile-counter.glitch.me/Python-Basis-Notes/count.svg)](https://github.com/zhiyu1998/Python-Basis-Notes) 14 | 15 | 16 | 17 | 18 |
19 |
20 |
21 | 22 | * 🐍 Python基础 23 | * 📊 Numpy基础 24 | * 🐼 Pandas基础 25 | * 🍥 DeepLearning基础 + 路线 26 | * 📚 Scripts 脚本库 27 | 28 | ### 📑 在线阅读 29 | https://zhiyu1998.github.io/Python-Basis-Notes/ 30 | 31 | ### 🐍 脚本库 32 | 33 | - [提取论文Reference的期刊/会议信息](./docs/scripts/letpub.md) 34 | - [采集中国企业ESG评级](./docs/scripts/syntaogf.md) 35 | - [飞鸽知识库导出](./docs/scripts/feige_export.md) 36 | - [自动化识别框架](./docs/scripts/auto_ocr_framework.md) 37 | 38 | ### 📈趋势走向图 39 | 40 | ![](https://api.star-history.com/svg?repos=zhiyu1998/Python-Basis-Notes&type=Date) 41 | 42 | -------------------------------------------------------------------------------- /babel.config.js: -------------------------------------------------------------------------------- 1 | module.exports = { 2 | presets: [require.resolve('@docusaurus/core/lib/babel/preset')], 3 | }; 4 | -------------------------------------------------------------------------------- /docs/basics/_category_.json: -------------------------------------------------------------------------------- 1 | { 2 | "label": "基础知识", 3 | "position": 2, 4 | "link": { 5 | "type": "generated-index", 6 | "description": "5 minutes to learn the most important Docusaurus concepts." 7 | } 8 | } 9 | -------------------------------------------------------------------------------- /docs/basics/base-recommend.md: -------------------------------------------------------------------------------- 1 | --- 2 | sidebar_position: 2 3 | --- 4 | 5 | # 第二步:备忘录 6 | 7 | ## Practice Makes Perfect - 笨办法学 python 8 | 这本书指导你在Python中通过练习和记忆等技巧慢慢建设和建立技能,然后应用它们解决越来越困难的问题。在这本书的最后,你需要拥有必要的工具开始进行更多复杂程序的学习。我喜欢告诉大家,我的书带给你们“编程黑带”。意思是说你知道的基础知识足够现在就开始学习编程。 9 | 10 | > 体验:https://wizardforcel.gitbooks.io/lpthw/content/ 11 | 12 | ![clumsylegalpython](./images/clumsylegalpython.png) 13 | 14 | ## Quick Reference 15 | 这个网站提供一些常用的Python函数的快速查询,可以帮助你快速查找Python函数的用法。 16 | 17 | > 体验:https://quickref.cn/docs/python.html 18 | 19 | ![QuickReference.png](./images/QuickReference.png) 20 | 21 | ### 深度学习之Pytorch主题 22 | 23 | > 体验:https://quickref.cn/docs/pytorch.html 24 | 25 | ![QuickReference.png](./images/QuickReference_Pytorch.png) -------------------------------------------------------------------------------- /docs/basics/base.md: -------------------------------------------------------------------------------- 1 | --- 2 | sidebar_position: 1 3 | --- 4 | # 第一步:思维导图 5 | 6 | ### 🍭序言 7 | 8 | 这个版本是在第一次学python的时候写的思维导图,希望能帮助你,如果你从中获取到了知识或收获,可以点个star哦~ 9 | 10 | ### 🐌更新 11 | 12 | 2022年11月23日 增加【自动化处理脚本库】,根据个人需求平时写的脚本 13 | 14 | 2022年6月17日 增加【深度学习相关】,删除脑图源文件。 15 | 16 | 2021年10月23日 项目更名Python-Basis-Notes,【增加数据科学相关】 *Pandas*的jupyter notebook [注:pandas学习地址 https://www.bilibili.com/video/BV1Ex411L7oT] 17 | 2021年7月15日 更名为Python-XMind-Note,【增加数据科学相关】*Numpy*的MarkDown笔记 18 | 19 | ### 🍬基础知识 20 | 21 | ![基础知识](./images/%E5%9F%BA%E7%A1%80%E7%9F%A5%E8%AF%86.png) 22 | 23 | ### 📲函数 24 | 25 | ![函数](./images/%E5%87%BD%E6%95%B0.png) 26 | 27 | ### 📁文件 28 | 29 | ![文件](./images/%E6%96%87%E4%BB%B6.png) 30 | 31 | ### 🗝️类和对象 32 | 33 | ![类和对象](./images/%E7%B1%BB%E5%92%8C%E5%AF%B9%E8%B1%A1.png) 34 | 35 | ### 🧙魔法方法 36 | 37 | ![魔法方法](./images/%E9%AD%94%E6%B3%95%E6%96%B9%E6%B3%95.png) 38 | 39 | ### 🃏模块 40 | 41 | ![模块](./images/%E6%A8%A1%E5%9D%97.png) 42 | 43 | ### 🐞爬虫 44 | 45 | ![爬虫](./images/%E7%88%AC%E8%99%AB.png) 46 | 47 | -------------------------------------------------------------------------------- /docs/basics/images/QuickReference.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhiyu1998/Python-Basis-Notes/2be71810a0eaa7624451ab00fe1c4dec73e6dc58/docs/basics/images/QuickReference.png -------------------------------------------------------------------------------- /docs/basics/images/QuickReference_Pytorch.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhiyu1998/Python-Basis-Notes/2be71810a0eaa7624451ab00fe1c4dec73e6dc58/docs/basics/images/QuickReference_Pytorch.png -------------------------------------------------------------------------------- /docs/basics/images/clumsylegalpython.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhiyu1998/Python-Basis-Notes/2be71810a0eaa7624451ab00fe1c4dec73e6dc58/docs/basics/images/clumsylegalpython.png -------------------------------------------------------------------------------- /docs/basics/images/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhiyu1998/Python-Basis-Notes/2be71810a0eaa7624451ab00fe1c4dec73e6dc58/docs/basics/images/logo.png -------------------------------------------------------------------------------- /docs/basics/images/函数.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhiyu1998/Python-Basis-Notes/2be71810a0eaa7624451ab00fe1c4dec73e6dc58/docs/basics/images/函数.png -------------------------------------------------------------------------------- /docs/basics/images/基础知识.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhiyu1998/Python-Basis-Notes/2be71810a0eaa7624451ab00fe1c4dec73e6dc58/docs/basics/images/基础知识.png -------------------------------------------------------------------------------- /docs/basics/images/文件.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhiyu1998/Python-Basis-Notes/2be71810a0eaa7624451ab00fe1c4dec73e6dc58/docs/basics/images/文件.png -------------------------------------------------------------------------------- /docs/basics/images/模块.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhiyu1998/Python-Basis-Notes/2be71810a0eaa7624451ab00fe1c4dec73e6dc58/docs/basics/images/模块.png -------------------------------------------------------------------------------- /docs/basics/images/爬虫.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhiyu1998/Python-Basis-Notes/2be71810a0eaa7624451ab00fe1c4dec73e6dc58/docs/basics/images/爬虫.png -------------------------------------------------------------------------------- /docs/basics/images/类和对象.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhiyu1998/Python-Basis-Notes/2be71810a0eaa7624451ab00fe1c4dec73e6dc58/docs/basics/images/类和对象.png -------------------------------------------------------------------------------- /docs/basics/images/魔法方法.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhiyu1998/Python-Basis-Notes/2be71810a0eaa7624451ab00fe1c4dec73e6dc58/docs/basics/images/魔法方法.png -------------------------------------------------------------------------------- /docs/data-analysis/_category_.json: -------------------------------------------------------------------------------- 1 | { 2 | "label": "数据分析", 3 | "position": 3, 4 | "link": { 5 | "type": "generated-index" 6 | } 7 | } 8 | -------------------------------------------------------------------------------- /docs/data-analysis/files/01-choice_data.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "collapsed": true, 7 | "pycharm": { 8 | "name": "#%% md\n" 9 | } 10 | }, 11 | "source": [ 12 | "## 选择数据" 13 | ] 14 | }, 15 | { 16 | "cell_type": "code", 17 | "execution_count": 1, 18 | "outputs": [], 19 | "source": [ 20 | "import pandas as pd\n", 21 | "import numpy as np" 22 | ], 23 | "metadata": { 24 | "collapsed": false, 25 | "pycharm": { 26 | "name": "#%%\n" 27 | } 28 | } 29 | }, 30 | { 31 | "cell_type": "code", 32 | "execution_count": 4, 33 | "outputs": [], 34 | "source": [ 35 | "dates = pd.date_range('20130101', periods=6)\n", 36 | "df = pd.DataFrame(np.arange(24).reshape((6,4)), index=dates, columns=['A', 'B', 'C', 'D'])" 37 | ], 38 | "metadata": { 39 | "collapsed": false, 40 | "pycharm": { 41 | "name": "#%%\n" 42 | } 43 | } 44 | }, 45 | { 46 | "cell_type": "code", 47 | "execution_count": 5, 48 | "outputs": [ 49 | { 50 | "name": "stdout", 51 | "output_type": "stream", 52 | "text": [ 53 | " A B C D\n", 54 | "2013-01-01 0 1 2 3\n", 55 | "2013-01-02 4 5 6 7\n", 56 | "2013-01-03 8 9 10 11\n", 57 | "2013-01-04 12 13 14 15\n", 58 | "2013-01-05 16 17 18 19\n", 59 | "2013-01-06 20 21 22 23\n" 60 | ] 61 | } 62 | ], 63 | "source": [ 64 | "print(df)" 65 | ], 66 | "metadata": { 67 | "collapsed": false, 68 | "pycharm": { 69 | "name": "#%%\n" 70 | } 71 | } 72 | }, 73 | { 74 | "cell_type": "code", 75 | "execution_count": 6, 76 | "outputs": [ 77 | { 78 | "name": "stdout", 79 | "output_type": "stream", 80 | "text": [ 81 | "2013-01-01 0\n", 82 | "2013-01-02 4\n", 83 | "2013-01-03 8\n", 84 | "2013-01-04 12\n", 85 | "2013-01-05 16\n", 86 | "2013-01-06 20\n", 87 | "Freq: D, Name: A, dtype: int32\n" 88 | ] 89 | } 90 | ], 91 | "source": [ 92 | "print(df['A'])" 93 | ], 94 | "metadata": { 95 | "collapsed": false, 96 | "pycharm": { 97 | "name": "#%%\n" 98 | } 99 | } 100 | }, 101 | { 102 | "cell_type": "code", 103 | "execution_count": 7, 104 | "outputs": [ 105 | { 106 | "name": "stdout", 107 | "output_type": "stream", 108 | "text": [ 109 | "2013-01-01 0\n", 110 | "2013-01-02 4\n", 111 | "2013-01-03 8\n", 112 | "2013-01-04 12\n", 113 | "2013-01-05 16\n", 114 | "2013-01-06 20\n", 115 | "Freq: D, Name: A, dtype: int32\n" 116 | ] 117 | } 118 | ], 119 | "source": [ 120 | "print(df.A)" 121 | ], 122 | "metadata": { 123 | "collapsed": false, 124 | "pycharm": { 125 | "name": "#%%\n" 126 | } 127 | } 128 | }, 129 | { 130 | "cell_type": "code", 131 | "execution_count": 8, 132 | "outputs": [ 133 | { 134 | "data": { 135 | "text/plain": " A B C D\n2013-01-01 0 1 2 3\n2013-01-02 4 5 6 7\n2013-01-03 8 9 10 11", 136 | "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
ABCD
2013-01-010123
2013-01-024567
2013-01-03891011
\n
" 137 | }, 138 | "execution_count": 8, 139 | "metadata": {}, 140 | "output_type": "execute_result" 141 | } 142 | ], 143 | "source": [ 144 | "df[0:3]" 145 | ], 146 | "metadata": { 147 | "collapsed": false, 148 | "pycharm": { 149 | "name": "#%%\n" 150 | } 151 | } 152 | }, 153 | { 154 | "cell_type": "code", 155 | "execution_count": 10, 156 | "outputs": [ 157 | { 158 | "data": { 159 | "text/plain": " A B C D\n2013-01-02 4 5 6 7\n2013-01-03 8 9 10 11\n2013-01-04 12 13 14 15", 160 | "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
ABCD
2013-01-024567
2013-01-03891011
2013-01-0412131415
\n
" 161 | }, 162 | "execution_count": 10, 163 | "metadata": {}, 164 | "output_type": "execute_result" 165 | } 166 | ], 167 | "source": [ 168 | "df['20130102':'20130104']" 169 | ], 170 | "metadata": { 171 | "collapsed": false, 172 | "pycharm": { 173 | "name": "#%%\n" 174 | } 175 | } 176 | }, 177 | { 178 | "cell_type": "markdown", 179 | "source": [ 180 | "### loc" 181 | ], 182 | "metadata": { 183 | "collapsed": false, 184 | "pycharm": { 185 | "name": "#%% md\n" 186 | } 187 | } 188 | }, 189 | { 190 | "cell_type": "code", 191 | "execution_count": 11, 192 | "outputs": [ 193 | { 194 | "data": { 195 | "text/plain": "A 4\nB 5\nC 6\nD 7\nName: 2013-01-02 00:00:00, dtype: int32" 196 | }, 197 | "execution_count": 11, 198 | "metadata": {}, 199 | "output_type": "execute_result" 200 | } 201 | ], 202 | "source": [ 203 | "df.loc['20130102']" 204 | ], 205 | "metadata": { 206 | "collapsed": false, 207 | "pycharm": { 208 | "name": "#%%\n" 209 | } 210 | } 211 | }, 212 | { 213 | "cell_type": "code", 214 | "execution_count": 15, 215 | "outputs": [ 216 | { 217 | "data": { 218 | "text/plain": "A 4\nB 5\nName: 2013-01-02 00:00:00, dtype: int32" 219 | }, 220 | "execution_count": 15, 221 | "metadata": {}, 222 | "output_type": "execute_result" 223 | } 224 | ], 225 | "source": [ 226 | "# 纵向\n", 227 | "df.loc['20130102', ['A', 'B']]" 228 | ], 229 | "metadata": { 230 | "collapsed": false, 231 | "pycharm": { 232 | "name": "#%%\n" 233 | } 234 | } 235 | }, 236 | { 237 | "cell_type": "markdown", 238 | "source": [ 239 | "### iloc" 240 | ], 241 | "metadata": { 242 | "collapsed": false, 243 | "pycharm": { 244 | "name": "#%% md\n" 245 | } 246 | } 247 | }, 248 | { 249 | "cell_type": "code", 250 | "execution_count": 18, 251 | "outputs": [ 252 | { 253 | "data": { 254 | "text/plain": " A B C D\n2013-01-01 0 1 2 3\n2013-01-02 4 5 6 7\n2013-01-03 8 9 10 11\n2013-01-04 12 13 14 15\n2013-01-05 16 17 18 19\n2013-01-06 20 21 22 23", 255 | "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
ABCD
2013-01-010123
2013-01-024567
2013-01-03891011
2013-01-0412131415
2013-01-0516171819
2013-01-0620212223
\n
" 256 | }, 257 | "execution_count": 18, 258 | "metadata": {}, 259 | "output_type": "execute_result" 260 | } 261 | ], 262 | "source": [ 263 | "df" 264 | ], 265 | "metadata": { 266 | "collapsed": false, 267 | "pycharm": { 268 | "name": "#%%\n" 269 | } 270 | } 271 | }, 272 | { 273 | "cell_type": "code", 274 | "execution_count": 16, 275 | "outputs": [ 276 | { 277 | "data": { 278 | "text/plain": "A 12\nB 13\nC 14\nD 15\nName: 2013-01-04 00:00:00, dtype: int32" 279 | }, 280 | "execution_count": 16, 281 | "metadata": {}, 282 | "output_type": "execute_result" 283 | } 284 | ], 285 | "source": [ 286 | "# 选择第三行(0开始)\n", 287 | "df.iloc[3]" 288 | ], 289 | "metadata": { 290 | "collapsed": false, 291 | "pycharm": { 292 | "name": "#%%\n" 293 | } 294 | } 295 | }, 296 | { 297 | "cell_type": "code", 298 | "execution_count": 17, 299 | "outputs": [ 300 | { 301 | "data": { 302 | "text/plain": "13" 303 | }, 304 | "execution_count": 17, 305 | "metadata": {}, 306 | "output_type": "execute_result" 307 | } 308 | ], 309 | "source": [ 310 | "# 选择第三行,第二列(0开始)\n", 311 | "df.iloc[3, 1]" 312 | ], 313 | "metadata": { 314 | "collapsed": false, 315 | "pycharm": { 316 | "name": "#%%\n" 317 | } 318 | } 319 | }, 320 | { 321 | "cell_type": "code", 322 | "execution_count": 19, 323 | "outputs": [ 324 | { 325 | "data": { 326 | "text/plain": " B C\n2013-01-04 13 14\n2013-01-05 17 18", 327 | "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
BC
2013-01-041314
2013-01-051718
\n
" 328 | }, 329 | "execution_count": 19, 330 | "metadata": {}, 331 | "output_type": "execute_result" 332 | } 333 | ], 334 | "source": [ 335 | "df.iloc[3:5, 1:3]" 336 | ], 337 | "metadata": { 338 | "collapsed": false, 339 | "pycharm": { 340 | "name": "#%%\n" 341 | } 342 | } 343 | }, 344 | { 345 | "cell_type": "code", 346 | "execution_count": 20, 347 | "outputs": [ 348 | { 349 | "data": { 350 | "text/plain": " B C\n2013-01-02 5 6\n2013-01-04 13 14\n2013-01-06 21 22", 351 | "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
BC
2013-01-0256
2013-01-041314
2013-01-062122
\n
" 352 | }, 353 | "execution_count": 20, 354 | "metadata": {}, 355 | "output_type": "execute_result" 356 | } 357 | ], 358 | "source": [ 359 | "df.iloc[[1,3,5], 1:3]" 360 | ], 361 | "metadata": { 362 | "collapsed": false, 363 | "pycharm": { 364 | "name": "#%%\n" 365 | } 366 | } 367 | }, 368 | { 369 | "cell_type": "markdown", 370 | "source": [ 371 | "### ix [版本过期]" 372 | ], 373 | "metadata": { 374 | "collapsed": false, 375 | "pycharm": { 376 | "name": "#%% md\n" 377 | } 378 | } 379 | }, 380 | { 381 | "cell_type": "code", 382 | "execution_count": 21, 383 | "outputs": [ 384 | { 385 | "ename": "AttributeError", 386 | "evalue": "'DataFrame' object has no attribute 'ix'", 387 | "output_type": "error", 388 | "traceback": [ 389 | "\u001B[1;31m---------------------------------------------------------------------------\u001B[0m", 390 | "\u001B[1;31mAttributeError\u001B[0m Traceback (most recent call last)", 391 | "\u001B[1;32m\u001B[0m in \u001B[0;36m\u001B[1;34m\u001B[0m\n\u001B[1;32m----> 1\u001B[1;33m \u001B[0mdf\u001B[0m\u001B[1;33m.\u001B[0m\u001B[0mix\u001B[0m\u001B[1;33m[\u001B[0m\u001B[1;33m:\u001B[0m\u001B[1;36m3\u001B[0m\u001B[1;33m,\u001B[0m \u001B[1;33m[\u001B[0m\u001B[1;34m'A'\u001B[0m\u001B[1;33m,\u001B[0m \u001B[1;34m'C'\u001B[0m\u001B[1;33m]\u001B[0m\u001B[1;33m]\u001B[0m\u001B[1;33m\u001B[0m\u001B[1;33m\u001B[0m\u001B[0m\n\u001B[0m", 392 | "\u001B[1;32mD:\\anaconda3\\lib\\site-packages\\pandas\\core\\generic.py\u001B[0m in \u001B[0;36m__getattr__\u001B[1;34m(self, name)\u001B[0m\n\u001B[0;32m 5463\u001B[0m \u001B[1;32mif\u001B[0m \u001B[0mself\u001B[0m\u001B[1;33m.\u001B[0m\u001B[0m_info_axis\u001B[0m\u001B[1;33m.\u001B[0m\u001B[0m_can_hold_identifiers_and_holds_name\u001B[0m\u001B[1;33m(\u001B[0m\u001B[0mname\u001B[0m\u001B[1;33m)\u001B[0m\u001B[1;33m:\u001B[0m\u001B[1;33m\u001B[0m\u001B[1;33m\u001B[0m\u001B[0m\n\u001B[0;32m 5464\u001B[0m \u001B[1;32mreturn\u001B[0m \u001B[0mself\u001B[0m\u001B[1;33m[\u001B[0m\u001B[0mname\u001B[0m\u001B[1;33m]\u001B[0m\u001B[1;33m\u001B[0m\u001B[1;33m\u001B[0m\u001B[0m\n\u001B[1;32m-> 5465\u001B[1;33m \u001B[1;32mreturn\u001B[0m \u001B[0mobject\u001B[0m\u001B[1;33m.\u001B[0m\u001B[0m__getattribute__\u001B[0m\u001B[1;33m(\u001B[0m\u001B[0mself\u001B[0m\u001B[1;33m,\u001B[0m \u001B[0mname\u001B[0m\u001B[1;33m)\u001B[0m\u001B[1;33m\u001B[0m\u001B[1;33m\u001B[0m\u001B[0m\n\u001B[0m\u001B[0;32m 5466\u001B[0m \u001B[1;33m\u001B[0m\u001B[0m\n\u001B[0;32m 5467\u001B[0m \u001B[1;32mdef\u001B[0m \u001B[0m__setattr__\u001B[0m\u001B[1;33m(\u001B[0m\u001B[0mself\u001B[0m\u001B[1;33m,\u001B[0m \u001B[0mname\u001B[0m\u001B[1;33m:\u001B[0m \u001B[0mstr\u001B[0m\u001B[1;33m,\u001B[0m \u001B[0mvalue\u001B[0m\u001B[1;33m)\u001B[0m \u001B[1;33m->\u001B[0m \u001B[1;32mNone\u001B[0m\u001B[1;33m:\u001B[0m\u001B[1;33m\u001B[0m\u001B[1;33m\u001B[0m\u001B[0m\n", 393 | "\u001B[1;31mAttributeError\u001B[0m: 'DataFrame' object has no attribute 'ix'" 394 | ] 395 | } 396 | ], 397 | "source": [ 398 | "df.ix[:3, ['A', 'C']]" 399 | ], 400 | "metadata": { 401 | "collapsed": false, 402 | "pycharm": { 403 | "name": "#%%\n" 404 | } 405 | } 406 | }, 407 | { 408 | "cell_type": "markdown", 409 | "source": [ 410 | "### 筛选" 411 | ], 412 | "metadata": { 413 | "collapsed": false, 414 | "pycharm": { 415 | "name": "#%% md\n" 416 | } 417 | } 418 | }, 419 | { 420 | "cell_type": "code", 421 | "execution_count": 22, 422 | "outputs": [ 423 | { 424 | "data": { 425 | "text/plain": " A B C D\n2013-01-04 12 13 14 15\n2013-01-05 16 17 18 19\n2013-01-06 20 21 22 23", 426 | "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
ABCD
2013-01-0412131415
2013-01-0516171819
2013-01-0620212223
\n
" 427 | }, 428 | "execution_count": 22, 429 | "metadata": {}, 430 | "output_type": "execute_result" 431 | } 432 | ], 433 | "source": [ 434 | "df[df.A > 8]" 435 | ], 436 | "metadata": { 437 | "collapsed": false, 438 | "pycharm": { 439 | "name": "#%%\n" 440 | } 441 | } 442 | }, 443 | { 444 | "cell_type": "code", 445 | "execution_count": null, 446 | "outputs": [], 447 | "source": [], 448 | "metadata": { 449 | "collapsed": false, 450 | "pycharm": { 451 | "name": "#%%\n" 452 | } 453 | } 454 | } 455 | ], 456 | "metadata": { 457 | "kernelspec": { 458 | "display_name": "Python 3", 459 | "language": "python", 460 | "name": "python3" 461 | }, 462 | "language_info": { 463 | "codemirror_mode": { 464 | "name": "ipython", 465 | "version": 2 466 | }, 467 | "file_extension": ".py", 468 | "mimetype": "text/x-python", 469 | "name": "python", 470 | "nbconvert_exporter": "python", 471 | "pygments_lexer": "ipython2", 472 | "version": "2.7.6" 473 | } 474 | }, 475 | "nbformat": 4, 476 | "nbformat_minor": 0 477 | } -------------------------------------------------------------------------------- /docs/data-analysis/files/02-setting-value.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "collapsed": true, 7 | "pycharm": { 8 | "name": "#%% md\n" 9 | } 10 | }, 11 | "source": [ 12 | "## 设置值" 13 | ] 14 | }, 15 | { 16 | "cell_type": "code", 17 | "execution_count": 18, 18 | "outputs": [], 19 | "source": [ 20 | "import pandas as pd\n", 21 | "import numpy as np" 22 | ], 23 | "metadata": { 24 | "collapsed": false, 25 | "pycharm": { 26 | "name": "#%%\n" 27 | } 28 | } 29 | }, 30 | { 31 | "cell_type": "code", 32 | "execution_count": 19, 33 | "outputs": [], 34 | "source": [ 35 | "dates = pd.date_range('20130101', periods=6)\n", 36 | "df = pd.DataFrame(np.arange(24).reshape((6,4)), index=dates, columns=['A', 'B', 'C', 'D'])" 37 | ], 38 | "metadata": { 39 | "collapsed": false, 40 | "pycharm": { 41 | "name": "#%%\n" 42 | } 43 | } 44 | }, 45 | { 46 | "cell_type": "code", 47 | "execution_count": 20, 48 | "outputs": [ 49 | { 50 | "data": { 51 | "text/plain": " A B C D\n2013-01-01 0 2222 2 3\n2013-01-02 4 5 6 7\n2013-01-03 8 9 1111 11\n2013-01-04 12 13 14 15\n2013-01-05 16 17 18 19\n2013-01-06 20 21 22 23", 52 | "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
ABCD
2013-01-010222223
2013-01-024567
2013-01-0389111111
2013-01-0412131415
2013-01-0516171819
2013-01-0620212223
\n
" 53 | }, 54 | "execution_count": 20, 55 | "metadata": {}, 56 | "output_type": "execute_result" 57 | } 58 | ], 59 | "source": [ 60 | "df.iloc[2, 2] = 1111\n", 61 | "df.loc['20130101', 'B'] = 2222\n", 62 | "df" 63 | ], 64 | "metadata": { 65 | "collapsed": false, 66 | "pycharm": { 67 | "name": "#%%\n" 68 | } 69 | } 70 | }, 71 | { 72 | "cell_type": "code", 73 | "execution_count": 21, 74 | "outputs": [ 75 | { 76 | "data": { 77 | "text/plain": " A B C D\n2013-01-01 0 2222 2 3\n2013-01-02 4 5 6 7\n2013-01-03 8 0 1111 11\n2013-01-04 12 0 14 15\n2013-01-05 16 0 18 19\n2013-01-06 20 0 22 23", 78 | "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
ABCD
2013-01-010222223
2013-01-024567
2013-01-0380111111
2013-01-041201415
2013-01-051601819
2013-01-062002223
\n
" 79 | }, 80 | "execution_count": 21, 81 | "metadata": {}, 82 | "output_type": "execute_result" 83 | } 84 | ], 85 | "source": [ 86 | "df.B[df.A>4] = 0\n", 87 | "df" 88 | ], 89 | "metadata": { 90 | "collapsed": false, 91 | "pycharm": { 92 | "name": "#%%\n" 93 | } 94 | } 95 | }, 96 | { 97 | "cell_type": "code", 98 | "execution_count": 22, 99 | "outputs": [ 100 | { 101 | "data": { 102 | "text/plain": " A B C D E\n2013-01-01 0 2222 2 3 NaN\n2013-01-02 4 5 6 7 NaN\n2013-01-03 8 0 1111 11 NaN\n2013-01-04 12 0 14 15 NaN\n2013-01-05 16 0 18 19 NaN\n2013-01-06 20 0 22 23 NaN", 103 | "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
ABCDE
2013-01-010222223NaN
2013-01-024567NaN
2013-01-0380111111NaN
2013-01-041201415NaN
2013-01-051601819NaN
2013-01-062002223NaN
\n
" 104 | }, 105 | "execution_count": 22, 106 | "metadata": {}, 107 | "output_type": "execute_result" 108 | } 109 | ], 110 | "source": [ 111 | "df['E'] = np.nan\n", 112 | "df" 113 | ], 114 | "metadata": { 115 | "collapsed": false, 116 | "pycharm": { 117 | "name": "#%%\n" 118 | } 119 | } 120 | }, 121 | { 122 | "cell_type": "code", 123 | "execution_count": 25, 124 | "outputs": [ 125 | { 126 | "data": { 127 | "text/plain": " A B C D E F\n2013-01-01 0 2222 2 3 NaN NaN\n2013-01-02 4 5 6 7 NaN NaN\n2013-01-03 8 0 1111 11 NaN NaN\n2013-01-04 12 0 14 15 NaN NaN\n2013-01-05 16 0 18 19 NaN NaN\n2013-01-06 20 0 22 23 NaN NaN", 128 | "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
ABCDEF
2013-01-010222223NaNNaN
2013-01-024567NaNNaN
2013-01-0380111111NaNNaN
2013-01-041201415NaNNaN
2013-01-051601819NaNNaN
2013-01-062002223NaNNaN
\n
" 129 | }, 130 | "execution_count": 25, 131 | "metadata": {}, 132 | "output_type": "execute_result" 133 | } 134 | ], 135 | "source": [ 136 | "# 如果没有index=pd.date_range('20130101', periods=6) 就会是NAN\n", 137 | "df['F'] = pd.Series([1,2,3,4,5,6], index=pd.date_range('20130101', periods=6))\n", 138 | "df" 139 | ], 140 | "metadata": { 141 | "collapsed": false, 142 | "pycharm": { 143 | "name": "#%%\n" 144 | } 145 | } 146 | }, 147 | { 148 | "cell_type": "code", 149 | "execution_count": null, 150 | "outputs": [], 151 | "source": [], 152 | "metadata": { 153 | "collapsed": false, 154 | "pycharm": { 155 | "name": "#%%\n" 156 | } 157 | } 158 | } 159 | ], 160 | "metadata": { 161 | "kernelspec": { 162 | "display_name": "Python 3", 163 | "language": "python", 164 | "name": "python3" 165 | }, 166 | "language_info": { 167 | "codemirror_mode": { 168 | "name": "ipython", 169 | "version": 2 170 | }, 171 | "file_extension": ".py", 172 | "mimetype": "text/x-python", 173 | "name": "python", 174 | "nbconvert_exporter": "python", 175 | "pygments_lexer": "ipython2", 176 | "version": "2.7.6" 177 | } 178 | }, 179 | "nbformat": 4, 180 | "nbformat_minor": 0 181 | } -------------------------------------------------------------------------------- /docs/data-analysis/files/03-process-dropout-data.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "collapsed": true, 7 | "pycharm": { 8 | "name": "#%% md\n" 9 | } 10 | }, 11 | "source": [ 12 | "## 处理丢失数据" 13 | ] 14 | }, 15 | { 16 | "cell_type": "code", 17 | "execution_count": 2, 18 | "outputs": [], 19 | "source": [ 20 | "import pandas as pd\n", 21 | "import numpy as np" 22 | ], 23 | "metadata": { 24 | "collapsed": false, 25 | "pycharm": { 26 | "name": "#%%\n" 27 | } 28 | } 29 | }, 30 | { 31 | "cell_type": "code", 32 | "execution_count": 3, 33 | "outputs": [], 34 | "source": [ 35 | "dates = pd.date_range('20130101', periods=6)\n", 36 | "df = pd.DataFrame(np.arange(24).reshape((6,4)), index=dates, columns=['A', 'B', 'C', 'D'])" 37 | ], 38 | "metadata": { 39 | "collapsed": false, 40 | "pycharm": { 41 | "name": "#%%\n" 42 | } 43 | } 44 | }, 45 | { 46 | "cell_type": "code", 47 | "execution_count": 10, 48 | "outputs": [], 49 | "source": [ 50 | "df.iloc[0, 1] = np.nan\n", 51 | "df.iloc[1, 2] = np.nan" 52 | ], 53 | "metadata": { 54 | "collapsed": false, 55 | "pycharm": { 56 | "name": "#%%\n" 57 | } 58 | } 59 | }, 60 | { 61 | "cell_type": "code", 62 | "execution_count": 11, 63 | "outputs": [ 64 | { 65 | "data": { 66 | "text/plain": " A B C D\n2013-01-01 0 NaN 2.0 3\n2013-01-02 4 5.0 NaN 7\n2013-01-03 8 9.0 10.0 11\n2013-01-04 12 13.0 14.0 15\n2013-01-05 16 17.0 18.0 19\n2013-01-06 20 21.0 22.0 23", 67 | "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
ABCD
2013-01-010NaN2.03
2013-01-0245.0NaN7
2013-01-0389.010.011
2013-01-041213.014.015
2013-01-051617.018.019
2013-01-062021.022.023
\n
" 68 | }, 69 | "execution_count": 11, 70 | "metadata": {}, 71 | "output_type": "execute_result" 72 | } 73 | ], 74 | "source": [ 75 | "df" 76 | ], 77 | "metadata": { 78 | "collapsed": false, 79 | "pycharm": { 80 | "name": "#%%\n" 81 | } 82 | } 83 | }, 84 | { 85 | "cell_type": "code", 86 | "execution_count": 12, 87 | "outputs": [ 88 | { 89 | "name": "stdout", 90 | "output_type": "stream", 91 | "text": [ 92 | " A B C D\n", 93 | "2013-01-03 8 9.0 10.0 11\n", 94 | "2013-01-04 12 13.0 14.0 15\n", 95 | "2013-01-05 16 17.0 18.0 19\n", 96 | "2013-01-06 20 21.0 22.0 23\n" 97 | ] 98 | } 99 | ], 100 | "source": [ 101 | "# 直接删除整行 / axis = 1 删除整列\n", 102 | "print(df.dropna(axis=0, how='any')) # how = {'any', 'all'}" 103 | ], 104 | "metadata": { 105 | "collapsed": false, 106 | "pycharm": { 107 | "name": "#%%\n" 108 | } 109 | } 110 | }, 111 | { 112 | "cell_type": "code", 113 | "execution_count": 13, 114 | "outputs": [ 115 | { 116 | "data": { 117 | "text/plain": " A B C D\n2013-01-01 0 0.0 2.0 3\n2013-01-02 4 5.0 0.0 7\n2013-01-03 8 9.0 10.0 11\n2013-01-04 12 13.0 14.0 15\n2013-01-05 16 17.0 18.0 19\n2013-01-06 20 21.0 22.0 23", 118 | "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
ABCD
2013-01-0100.02.03
2013-01-0245.00.07
2013-01-0389.010.011
2013-01-041213.014.015
2013-01-051617.018.019
2013-01-062021.022.023
\n
" 119 | }, 120 | "execution_count": 13, 121 | "metadata": {}, 122 | "output_type": "execute_result" 123 | } 124 | ], 125 | "source": [ 126 | "df = df.fillna(value=0)\n", 127 | "df" 128 | ], 129 | "metadata": { 130 | "collapsed": false, 131 | "pycharm": { 132 | "name": "#%%\n" 133 | } 134 | } 135 | }, 136 | { 137 | "cell_type": "code", 138 | "execution_count": 18, 139 | "outputs": [ 140 | { 141 | "name": "stdout", 142 | "output_type": "stream", 143 | "text": [ 144 | "False\n" 145 | ] 146 | } 147 | ], 148 | "source": [ 149 | "# 是否丢失数据\n", 150 | "print(np.any(df.isnull()) == True)" 151 | ], 152 | "metadata": { 153 | "collapsed": false, 154 | "pycharm": { 155 | "name": "#%%\n" 156 | } 157 | } 158 | }, 159 | { 160 | "cell_type": "code", 161 | "execution_count": null, 162 | "outputs": [], 163 | "source": [], 164 | "metadata": { 165 | "collapsed": false, 166 | "pycharm": { 167 | "name": "#%%\n" 168 | } 169 | } 170 | } 171 | ], 172 | "metadata": { 173 | "kernelspec": { 174 | "display_name": "Python 3", 175 | "language": "python", 176 | "name": "python3" 177 | }, 178 | "language_info": { 179 | "codemirror_mode": { 180 | "name": "ipython", 181 | "version": 2 182 | }, 183 | "file_extension": ".py", 184 | "mimetype": "text/x-python", 185 | "name": "python", 186 | "nbconvert_exporter": "python", 187 | "pygments_lexer": "ipython2", 188 | "version": "2.7.6" 189 | } 190 | }, 191 | "nbformat": 4, 192 | "nbformat_minor": 0 193 | } -------------------------------------------------------------------------------- /docs/data-analysis/files/04-import-and-export.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": { 7 | "collapsed": true 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "import pandas as pd" 12 | ] 13 | }, 14 | { 15 | "cell_type": "code", 16 | "execution_count": 2, 17 | "outputs": [], 18 | "source": [ 19 | "students = pd.read_csv('./student.csv')" 20 | ], 21 | "metadata": { 22 | "collapsed": false, 23 | "pycharm": { 24 | "name": "#%%\n" 25 | } 26 | } 27 | }, 28 | { 29 | "cell_type": "code", 30 | "execution_count": 3, 31 | "outputs": [ 32 | { 33 | "data": { 34 | "text/plain": " Student ID name age gender\n0 1100 Kelly 22 Female\n1 1101 Clo 21 Female\n2 1102 Tilly 22 Female\n3 1103 Tony 24 Male\n4 1104 David 20 Male\n5 1105 Catty 22 Female\n6 1106 M 3 Female\n7 1107 N 43 Male\n8 1108 A 13 Male\n9 1109 S 12 Male\n10 1110 David 33 Male\n11 1111 Dw 3 Female\n12 1112 Q 23 Male\n13 1113 W 21 Female", 35 | "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
Student IDnameagegender
01100Kelly22Female
11101Clo21Female
21102Tilly22Female
31103Tony24Male
41104David20Male
51105Catty22Female
61106M3Female
71107N43Male
81108A13Male
91109S12Male
101110David33Male
111111Dw3Female
121112Q23Male
131113W21Female
\n
" 36 | }, 37 | "execution_count": 3, 38 | "metadata": {}, 39 | "output_type": "execute_result" 40 | } 41 | ], 42 | "source": [ 43 | "students" 44 | ], 45 | "metadata": { 46 | "collapsed": false, 47 | "pycharm": { 48 | "name": "#%%\n" 49 | } 50 | } 51 | }, 52 | { 53 | "cell_type": "code", 54 | "execution_count": null, 55 | "outputs": [], 56 | "source": [ 57 | "# 保存\n", 58 | "students.to_pickle()" 59 | ], 60 | "metadata": { 61 | "collapsed": false, 62 | "pycharm": { 63 | "name": "#%%\n" 64 | } 65 | } 66 | } 67 | ], 68 | "metadata": { 69 | "kernelspec": { 70 | "display_name": "Python 3", 71 | "language": "python", 72 | "name": "python3" 73 | }, 74 | "language_info": { 75 | "codemirror_mode": { 76 | "name": "ipython", 77 | "version": 2 78 | }, 79 | "file_extension": ".py", 80 | "mimetype": "text/x-python", 81 | "name": "python", 82 | "nbconvert_exporter": "python", 83 | "pygments_lexer": "ipython2", 84 | "version": "2.7.6" 85 | } 86 | }, 87 | "nbformat": 4, 88 | "nbformat_minor": 0 89 | } -------------------------------------------------------------------------------- /docs/data-analysis/files/05-concat.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": { 7 | "collapsed": true 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "import pandas as pd\n", 12 | "import numpy as np" 13 | ] 14 | }, 15 | { 16 | "cell_type": "code", 17 | "execution_count": 2, 18 | "outputs": [], 19 | "source": [ 20 | "df1 = pd.DataFrame(np.ones((3,4))*0, columns=['a','b','c','d'])\n", 21 | "df2 = pd.DataFrame(np.ones((3,4))*1, columns=['a','b','c','d'])\n", 22 | "df3 = pd.DataFrame(np.ones((3,4))*2, columns=['a','b','c','d'])" 23 | ], 24 | "metadata": { 25 | "collapsed": false, 26 | "pycharm": { 27 | "name": "#%%\n" 28 | } 29 | } 30 | }, 31 | { 32 | "cell_type": "code", 33 | "execution_count": 3, 34 | "outputs": [ 35 | { 36 | "data": { 37 | "text/plain": " a b c d\n0 0.0 0.0 0.0 0.0\n1 0.0 0.0 0.0 0.0\n2 0.0 0.0 0.0 0.0", 38 | "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
abcd
00.00.00.00.0
10.00.00.00.0
20.00.00.00.0
\n
" 39 | }, 40 | "execution_count": 3, 41 | "metadata": {}, 42 | "output_type": "execute_result" 43 | } 44 | ], 45 | "source": [ 46 | "df1" 47 | ], 48 | "metadata": { 49 | "collapsed": false, 50 | "pycharm": { 51 | "name": "#%%\n" 52 | } 53 | } 54 | }, 55 | { 56 | "cell_type": "code", 57 | "execution_count": 4, 58 | "outputs": [ 59 | { 60 | "data": { 61 | "text/plain": " a b c d\n0 1.0 1.0 1.0 1.0\n1 1.0 1.0 1.0 1.0\n2 1.0 1.0 1.0 1.0", 62 | "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
abcd
01.01.01.01.0
11.01.01.01.0
21.01.01.01.0
\n
" 63 | }, 64 | "execution_count": 4, 65 | "metadata": {}, 66 | "output_type": "execute_result" 67 | } 68 | ], 69 | "source": [ 70 | "df2" 71 | ], 72 | "metadata": { 73 | "collapsed": false, 74 | "pycharm": { 75 | "name": "#%%\n" 76 | } 77 | } 78 | }, 79 | { 80 | "cell_type": "code", 81 | "execution_count": 5, 82 | "outputs": [ 83 | { 84 | "data": { 85 | "text/plain": " a b c d\n0 2.0 2.0 2.0 2.0\n1 2.0 2.0 2.0 2.0\n2 2.0 2.0 2.0 2.0", 86 | "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
abcd
02.02.02.02.0
12.02.02.02.0
22.02.02.02.0
\n
" 87 | }, 88 | "execution_count": 5, 89 | "metadata": {}, 90 | "output_type": "execute_result" 91 | } 92 | ], 93 | "source": [ 94 | "df3" 95 | ], 96 | "metadata": { 97 | "collapsed": false, 98 | "pycharm": { 99 | "name": "#%%\n" 100 | } 101 | } 102 | }, 103 | { 104 | "cell_type": "code", 105 | "execution_count": 6, 106 | "outputs": [ 107 | { 108 | "data": { 109 | "text/plain": " a b c d\n0 0.0 0.0 0.0 0.0\n1 0.0 0.0 0.0 0.0\n2 0.0 0.0 0.0 0.0\n0 1.0 1.0 1.0 1.0\n1 1.0 1.0 1.0 1.0\n2 1.0 1.0 1.0 1.0\n0 2.0 2.0 2.0 2.0\n1 2.0 2.0 2.0 2.0\n2 2.0 2.0 2.0 2.0", 110 | "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
abcd
00.00.00.00.0
10.00.00.00.0
20.00.00.00.0
01.01.01.01.0
11.01.01.01.0
21.01.01.01.0
02.02.02.02.0
12.02.02.02.0
22.02.02.02.0
\n
" 111 | }, 112 | "execution_count": 6, 113 | "metadata": {}, 114 | "output_type": "execute_result" 115 | } 116 | ], 117 | "source": [ 118 | "res = pd.concat([df1,df2,df3], axis=0)\n", 119 | "res" 120 | ], 121 | "metadata": { 122 | "collapsed": false, 123 | "pycharm": { 124 | "name": "#%%\n" 125 | } 126 | } 127 | }, 128 | { 129 | "cell_type": "code", 130 | "execution_count": 7, 131 | "outputs": [ 132 | { 133 | "data": { 134 | "text/plain": " a b c d\n0 0.0 0.0 0.0 0.0\n1 0.0 0.0 0.0 0.0\n2 0.0 0.0 0.0 0.0\n3 1.0 1.0 1.0 1.0\n4 1.0 1.0 1.0 1.0\n5 1.0 1.0 1.0 1.0\n6 2.0 2.0 2.0 2.0\n7 2.0 2.0 2.0 2.0\n8 2.0 2.0 2.0 2.0", 135 | "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
abcd
00.00.00.00.0
10.00.00.00.0
20.00.00.00.0
31.01.01.01.0
41.01.01.01.0
51.01.01.01.0
62.02.02.02.0
72.02.02.02.0
82.02.02.02.0
\n
" 136 | }, 137 | "execution_count": 7, 138 | "metadata": {}, 139 | "output_type": "execute_result" 140 | } 141 | ], 142 | "source": [ 143 | "res = pd.concat([df1,df2,df3], axis=0, ignore_index=True)\n", 144 | "res" 145 | ], 146 | "metadata": { 147 | "collapsed": false, 148 | "pycharm": { 149 | "name": "#%%\n" 150 | } 151 | } 152 | }, 153 | { 154 | "cell_type": "markdown", 155 | "source": [ 156 | "### join\n", 157 | "* inner\n", 158 | "* outer" 159 | ], 160 | "metadata": { 161 | "collapsed": false, 162 | "pycharm": { 163 | "name": "#%% md\n" 164 | } 165 | } 166 | }, 167 | { 168 | "cell_type": "code", 169 | "execution_count": 8, 170 | "outputs": [], 171 | "source": [ 172 | "df1 = pd.DataFrame(np.ones((3,4))*0, columns=['a','b','c','d'], index=[1,2,3])\n", 173 | "df2 = pd.DataFrame(np.ones((3,4))*1, columns=['b','c','d', 'e'], index=[2,3,4])" 174 | ], 175 | "metadata": { 176 | "collapsed": false, 177 | "pycharm": { 178 | "name": "#%%\n" 179 | } 180 | } 181 | }, 182 | { 183 | "cell_type": "code", 184 | "execution_count": 9, 185 | "outputs": [ 186 | { 187 | "data": { 188 | "text/plain": " a b c d\n1 0.0 0.0 0.0 0.0\n2 0.0 0.0 0.0 0.0\n3 0.0 0.0 0.0 0.0", 189 | "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
abcd
10.00.00.00.0
20.00.00.00.0
30.00.00.00.0
\n
" 190 | }, 191 | "execution_count": 9, 192 | "metadata": {}, 193 | "output_type": "execute_result" 194 | } 195 | ], 196 | "source": [ 197 | "df1" 198 | ], 199 | "metadata": { 200 | "collapsed": false, 201 | "pycharm": { 202 | "name": "#%%\n" 203 | } 204 | } 205 | }, 206 | { 207 | "cell_type": "code", 208 | "execution_count": 10, 209 | "outputs": [ 210 | { 211 | "data": { 212 | "text/plain": " b c d e\n2 1.0 1.0 1.0 1.0\n3 1.0 1.0 1.0 1.0\n4 1.0 1.0 1.0 1.0", 213 | "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
bcde
21.01.01.01.0
31.01.01.01.0
41.01.01.01.0
\n
" 214 | }, 215 | "execution_count": 10, 216 | "metadata": {}, 217 | "output_type": "execute_result" 218 | } 219 | ], 220 | "source": [ 221 | "df2" 222 | ], 223 | "metadata": { 224 | "collapsed": false, 225 | "pycharm": { 226 | "name": "#%%\n" 227 | } 228 | } 229 | }, 230 | { 231 | "cell_type": "code", 232 | "execution_count": 11, 233 | "outputs": [ 234 | { 235 | "data": { 236 | "text/plain": " a b c d e\n1 0.0 0.0 0.0 0.0 NaN\n2 0.0 0.0 0.0 0.0 NaN\n3 0.0 0.0 0.0 0.0 NaN\n2 NaN 1.0 1.0 1.0 1.0\n3 NaN 1.0 1.0 1.0 1.0\n4 NaN 1.0 1.0 1.0 1.0", 237 | "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
abcde
10.00.00.00.0NaN
20.00.00.00.0NaN
30.00.00.00.0NaN
2NaN1.01.01.01.0
3NaN1.01.01.01.0
4NaN1.01.01.01.0
\n
" 238 | }, 239 | "execution_count": 11, 240 | "metadata": {}, 241 | "output_type": "execute_result" 242 | } 243 | ], 244 | "source": [ 245 | "res = pd.concat([df1, df2], axis=0)\n", 246 | "res" 247 | ], 248 | "metadata": { 249 | "collapsed": false, 250 | "pycharm": { 251 | "name": "#%%\n" 252 | } 253 | } 254 | }, 255 | { 256 | "cell_type": "code", 257 | "execution_count": 13, 258 | "outputs": [ 259 | { 260 | "data": { 261 | "text/plain": " a b c d e\n1 0.0 0.0 0.0 0.0 NaN\n2 0.0 0.0 0.0 0.0 NaN\n3 0.0 0.0 0.0 0.0 NaN\n2 NaN 1.0 1.0 1.0 1.0\n3 NaN 1.0 1.0 1.0 1.0\n4 NaN 1.0 1.0 1.0 1.0", 262 | "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
abcde
10.00.00.00.0NaN
20.00.00.00.0NaN
30.00.00.00.0NaN
2NaN1.01.01.01.0
3NaN1.01.01.01.0
4NaN1.01.01.01.0
\n
" 263 | }, 264 | "execution_count": 13, 265 | "metadata": {}, 266 | "output_type": "execute_result" 267 | } 268 | ], 269 | "source": [ 270 | "res = pd.concat([df1, df2], axis=0, join='outer')\n", 271 | "res" 272 | ], 273 | "metadata": { 274 | "collapsed": false, 275 | "pycharm": { 276 | "name": "#%%\n" 277 | } 278 | } 279 | }, 280 | { 281 | "cell_type": "code", 282 | "execution_count": 14, 283 | "outputs": [ 284 | { 285 | "data": { 286 | "text/plain": " b c d\n1 0.0 0.0 0.0\n2 0.0 0.0 0.0\n3 0.0 0.0 0.0\n2 1.0 1.0 1.0\n3 1.0 1.0 1.0\n4 1.0 1.0 1.0", 287 | "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
bcd
10.00.00.0
20.00.00.0
30.00.00.0
21.01.01.0
31.01.01.0
41.01.01.0
\n
" 288 | }, 289 | "execution_count": 14, 290 | "metadata": {}, 291 | "output_type": "execute_result" 292 | } 293 | ], 294 | "source": [ 295 | "res = pd.concat([df1, df2], axis=0, join='inner')\n", 296 | "res" 297 | ], 298 | "metadata": { 299 | "collapsed": false, 300 | "pycharm": { 301 | "name": "#%%\n" 302 | } 303 | } 304 | }, 305 | { 306 | "cell_type": "markdown", 307 | "source": [ 308 | "### append" 309 | ], 310 | "metadata": { 311 | "collapsed": false, 312 | "pycharm": { 313 | "name": "#%% md\n" 314 | } 315 | } 316 | }, 317 | { 318 | "cell_type": "code", 319 | "execution_count": 15, 320 | "outputs": [], 321 | "source": [ 322 | "df1 = pd.DataFrame(np.ones((3,4))*0, columns=['a','b','c','d'])\n", 323 | "df2 = pd.DataFrame(np.ones((3,4))*1, columns=['a','b','c','d'])\n", 324 | "# df2 = pd.DataFrame(np.ones((3,4))*1, columns=['b','c','d', 'e'], index=[2,3,4])" 325 | ], 326 | "metadata": { 327 | "collapsed": false, 328 | "pycharm": { 329 | "name": "#%%\n" 330 | } 331 | } 332 | }, 333 | { 334 | "cell_type": "code", 335 | "execution_count": 18, 336 | "outputs": [ 337 | { 338 | "data": { 339 | "text/plain": " a b c d e\n0 0.0 0.0 0.0 0.0 NaN\n1 0.0 0.0 0.0 0.0 NaN\n2 0.0 0.0 0.0 0.0 NaN\n3 NaN 1.0 1.0 1.0 1.0\n4 NaN 1.0 1.0 1.0 1.0\n5 NaN 1.0 1.0 1.0 1.0", 340 | "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
abcde
00.00.00.00.0NaN
10.00.00.00.0NaN
20.00.00.00.0NaN
3NaN1.01.01.01.0
4NaN1.01.01.01.0
5NaN1.01.01.01.0
\n
" 341 | }, 342 | "execution_count": 18, 343 | "metadata": {}, 344 | "output_type": "execute_result" 345 | } 346 | ], 347 | "source": [ 348 | "res = df1.append(df2, ignore_index=True)\n", 349 | "res" 350 | ], 351 | "metadata": { 352 | "collapsed": false, 353 | "pycharm": { 354 | "name": "#%%\n" 355 | } 356 | } 357 | }, 358 | { 359 | "cell_type": "code", 360 | "execution_count": 20, 361 | "outputs": [ 362 | { 363 | "data": { 364 | "text/plain": " a b c d\n0 0.0 0.0 0.0 0.0\n1 0.0 0.0 0.0 0.0\n2 0.0 0.0 0.0 0.0\n3 1.0 2.0 3.0 4.0", 365 | "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
abcd
00.00.00.00.0
10.00.00.00.0
20.00.00.00.0
31.02.03.04.0
\n
" 366 | }, 367 | "execution_count": 20, 368 | "metadata": {}, 369 | "output_type": "execute_result" 370 | } 371 | ], 372 | "source": [ 373 | "s1 = pd.Series([1,2,3,4], index=['a', 'b', 'c', 'd'])\n", 374 | "res = df1.append(s1, ignore_index=True)\n", 375 | "res" 376 | ], 377 | "metadata": { 378 | "collapsed": false, 379 | "pycharm": { 380 | "name": "#%%\n" 381 | } 382 | } 383 | }, 384 | { 385 | "cell_type": "code", 386 | "execution_count": null, 387 | "outputs": [], 388 | "source": [], 389 | "metadata": { 390 | "collapsed": false, 391 | "pycharm": { 392 | "name": "#%%\n" 393 | } 394 | } 395 | } 396 | ], 397 | "metadata": { 398 | "kernelspec": { 399 | "display_name": "Python 3", 400 | "language": "python", 401 | "name": "python3" 402 | }, 403 | "language_info": { 404 | "codemirror_mode": { 405 | "name": "ipython", 406 | "version": 2 407 | }, 408 | "file_extension": ".py", 409 | "mimetype": "text/x-python", 410 | "name": "python", 411 | "nbconvert_exporter": "python", 412 | "pygments_lexer": "ipython2", 413 | "version": "2.7.6" 414 | } 415 | }, 416 | "nbformat": 4, 417 | "nbformat_minor": 0 418 | } -------------------------------------------------------------------------------- /docs/data-analysis/files/06-merge.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "collapsed": true, 7 | "pycharm": { 8 | "name": "#%% md\n" 9 | } 10 | }, 11 | "source": [ 12 | "## merge" 13 | ] 14 | }, 15 | { 16 | "cell_type": "code", 17 | "execution_count": 1, 18 | "outputs": [], 19 | "source": [ 20 | "import pandas as pd" 21 | ], 22 | "metadata": { 23 | "collapsed": false, 24 | "pycharm": { 25 | "name": "#%%\n" 26 | } 27 | } 28 | }, 29 | { 30 | "cell_type": "code", 31 | "execution_count": 2, 32 | "outputs": [], 33 | "source": [ 34 | "left = pd.DataFrame({'key': ['K0', 'K1', 'K2', 'K3'],\n", 35 | " 'A': ['A0', 'A1', 'A2', 'A3'],\n", 36 | " 'B': ['B0', 'B1', 'B2', 'B3']})\n", 37 | "right = pd.DataFrame({'key': ['K0', 'K1', 'K2', 'K3'],\n", 38 | " 'C': ['C0', 'C1', 'C2', 'C3'],\n", 39 | " 'D': ['D0', 'D1', 'D2', 'D3']})" 40 | ], 41 | "metadata": { 42 | "collapsed": false, 43 | "pycharm": { 44 | "name": "#%%\n" 45 | } 46 | } 47 | }, 48 | { 49 | "cell_type": "code", 50 | "execution_count": 3, 51 | "outputs": [ 52 | { 53 | "data": { 54 | "text/plain": " key A B\n0 K0 A0 B0\n1 K1 A1 B1\n2 K2 A2 B2\n3 K3 A3 B3", 55 | "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
keyAB
0K0A0B0
1K1A1B1
2K2A2B2
3K3A3B3
\n
" 56 | }, 57 | "execution_count": 3, 58 | "metadata": {}, 59 | "output_type": "execute_result" 60 | } 61 | ], 62 | "source": [ 63 | "left" 64 | ], 65 | "metadata": { 66 | "collapsed": false, 67 | "pycharm": { 68 | "name": "#%%\n" 69 | } 70 | } 71 | }, 72 | { 73 | "cell_type": "code", 74 | "execution_count": 4, 75 | "outputs": [ 76 | { 77 | "data": { 78 | "text/plain": " key C D\n0 K0 C0 D0\n1 K1 C1 D1\n2 K2 C2 D2\n3 K3 C3 D3", 79 | "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
keyCD
0K0C0D0
1K1C1D1
2K2C2D2
3K3C3D3
\n
" 80 | }, 81 | "execution_count": 4, 82 | "metadata": {}, 83 | "output_type": "execute_result" 84 | } 85 | ], 86 | "source": [ 87 | "right" 88 | ], 89 | "metadata": { 90 | "collapsed": false, 91 | "pycharm": { 92 | "name": "#%%\n" 93 | } 94 | } 95 | }, 96 | { 97 | "cell_type": "code", 98 | "execution_count": 7, 99 | "outputs": [ 100 | { 101 | "data": { 102 | "text/plain": " key A B C D\n0 K0 A0 B0 C0 D0\n1 K1 A1 B1 C1 D1\n2 K2 A2 B2 C2 D2\n3 K3 A3 B3 C3 D3", 103 | "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
keyABCD
0K0A0B0C0D0
1K1A1B1C1D1
2K2A2B2C2D2
3K3A3B3C3D3
\n
" 104 | }, 105 | "execution_count": 7, 106 | "metadata": {}, 107 | "output_type": "execute_result" 108 | } 109 | ], 110 | "source": [ 111 | "res = pd.merge(left, right, on='key')\n", 112 | "res" 113 | ], 114 | "metadata": { 115 | "collapsed": false, 116 | "pycharm": { 117 | "name": "#%%\n" 118 | } 119 | } 120 | }, 121 | { 122 | "cell_type": "code", 123 | "execution_count": 8, 124 | "outputs": [], 125 | "source": [ 126 | "left = pd.DataFrame({'key1': ['K0', 'K0', 'K1', 'K2'],\n", 127 | " 'key2': ['K0', 'K1', 'K0', 'K1'],\n", 128 | " 'A': ['A0', 'A1', 'A2', 'A3'],\n", 129 | " 'B': ['B0', 'B1', 'B2', 'B3']})\n", 130 | "right = pd.DataFrame({'key1': ['K0', 'K1', 'K1', 'K2'],\n", 131 | " 'key2': ['K0', 'K0', 'K0', 'K0'],\n", 132 | " 'C': ['C0', 'C1', 'C2', 'C3'],\n", 133 | " 'D': ['D0', 'D1', 'D2', 'D3']})" 134 | ], 135 | "metadata": { 136 | "collapsed": false, 137 | "pycharm": { 138 | "name": "#%%\n" 139 | } 140 | } 141 | }, 142 | { 143 | "cell_type": "code", 144 | "execution_count": 9, 145 | "outputs": [ 146 | { 147 | "data": { 148 | "text/plain": " key1 key2 A B\n0 K0 K0 A0 B0\n1 K0 K1 A1 B1\n2 K1 K0 A2 B2\n3 K2 K1 A3 B3", 149 | "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
key1key2AB
0K0K0A0B0
1K0K1A1B1
2K1K0A2B2
3K2K1A3B3
\n
" 150 | }, 151 | "execution_count": 9, 152 | "metadata": {}, 153 | "output_type": "execute_result" 154 | } 155 | ], 156 | "source": [ 157 | "left" 158 | ], 159 | "metadata": { 160 | "collapsed": false, 161 | "pycharm": { 162 | "name": "#%%\n" 163 | } 164 | } 165 | }, 166 | { 167 | "cell_type": "code", 168 | "execution_count": 10, 169 | "outputs": [ 170 | { 171 | "data": { 172 | "text/plain": " key1 key2 C D\n0 K0 K0 C0 D0\n1 K1 K0 C1 D1\n2 K1 K0 C2 D2\n3 K2 K0 C3 D3", 173 | "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
key1key2CD
0K0K0C0D0
1K1K0C1D1
2K1K0C2D2
3K2K0C3D3
\n
" 174 | }, 175 | "execution_count": 10, 176 | "metadata": {}, 177 | "output_type": "execute_result" 178 | } 179 | ], 180 | "source": [ 181 | "right" 182 | ], 183 | "metadata": { 184 | "collapsed": false, 185 | "pycharm": { 186 | "name": "#%%\n" 187 | } 188 | } 189 | }, 190 | { 191 | "cell_type": "code", 192 | "execution_count": 12, 193 | "outputs": [ 194 | { 195 | "data": { 196 | "text/plain": " key1 key2 A B C D\n0 K0 K0 A0 B0 C0 D0\n1 K1 K0 A2 B2 C1 D1\n2 K1 K0 A2 B2 C2 D2", 197 | "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
key1key2ABCD
0K0K0A0B0C0D0
1K1K0A2B2C1D1
2K1K0A2B2C2D2
\n
" 198 | }, 199 | "execution_count": 12, 200 | "metadata": {}, 201 | "output_type": "execute_result" 202 | } 203 | ], 204 | "source": [ 205 | "res = pd.merge(left, right, on=['key1', 'key2'])\n", 206 | "res" 207 | ], 208 | "metadata": { 209 | "collapsed": false, 210 | "pycharm": { 211 | "name": "#%%\n" 212 | } 213 | } 214 | }, 215 | { 216 | "cell_type": "code", 217 | "execution_count": 13, 218 | "outputs": [ 219 | { 220 | "data": { 221 | "text/plain": " key1 key2 A B C D\n0 K0 K0 A0 B0 C0 D0\n1 K1 K0 A2 B2 C1 D1\n2 K1 K0 A2 B2 C2 D2", 222 | "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
key1key2ABCD
0K0K0A0B0C0D0
1K1K0A2B2C1D1
2K1K0A2B2C2D2
\n
" 223 | }, 224 | "execution_count": 13, 225 | "metadata": {}, 226 | "output_type": "execute_result" 227 | } 228 | ], 229 | "source": [ 230 | "res = pd.merge(left, right, on=['key1', 'key2'], how='inner')\n", 231 | "res" 232 | ], 233 | "metadata": { 234 | "collapsed": false, 235 | "pycharm": { 236 | "name": "#%%\n" 237 | } 238 | } 239 | }, 240 | { 241 | "cell_type": "code", 242 | "execution_count": 14, 243 | "outputs": [ 244 | { 245 | "data": { 246 | "text/plain": " key1 key2 A B C D\n0 K0 K0 A0 B0 C0 D0\n1 K0 K1 A1 B1 NaN NaN\n2 K1 K0 A2 B2 C1 D1\n3 K1 K0 A2 B2 C2 D2\n4 K2 K1 A3 B3 NaN NaN\n5 K2 K0 NaN NaN C3 D3", 247 | "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
key1key2ABCD
0K0K0A0B0C0D0
1K0K1A1B1NaNNaN
2K1K0A2B2C1D1
3K1K0A2B2C2D2
4K2K1A3B3NaNNaN
5K2K0NaNNaNC3D3
\n
" 248 | }, 249 | "execution_count": 14, 250 | "metadata": {}, 251 | "output_type": "execute_result" 252 | } 253 | ], 254 | "source": [ 255 | "res = pd.merge(left, right, on=['key1', 'key2'], how='outer')\n", 256 | "res" 257 | ], 258 | "metadata": { 259 | "collapsed": false, 260 | "pycharm": { 261 | "name": "#%%\n" 262 | } 263 | } 264 | }, 265 | { 266 | "cell_type": "code", 267 | "execution_count": 15, 268 | "outputs": [ 269 | { 270 | "data": { 271 | "text/plain": " key1 key2 A B C D\n0 K0 K0 A0 B0 C0 D0\n1 K1 K0 A2 B2 C1 D1\n2 K1 K0 A2 B2 C2 D2\n3 K2 K0 NaN NaN C3 D3", 272 | "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
key1key2ABCD
0K0K0A0B0C0D0
1K1K0A2B2C1D1
2K1K0A2B2C2D2
3K2K0NaNNaNC3D3
\n
" 273 | }, 274 | "execution_count": 15, 275 | "metadata": {}, 276 | "output_type": "execute_result" 277 | } 278 | ], 279 | "source": [ 280 | "res = pd.merge(left, right, on=['key1', 'key2'], how='right')\n", 281 | "res" 282 | ], 283 | "metadata": { 284 | "collapsed": false, 285 | "pycharm": { 286 | "name": "#%%\n" 287 | } 288 | } 289 | }, 290 | { 291 | "cell_type": "code", 292 | "execution_count": 16, 293 | "outputs": [ 294 | { 295 | "data": { 296 | "text/plain": " key1 key2 A B C D _merge\n0 K0 K0 A0 B0 C0 D0 both\n1 K0 K1 A1 B1 NaN NaN left_only\n2 K1 K0 A2 B2 C1 D1 both\n3 K1 K0 A2 B2 C2 D2 both\n4 K2 K1 A3 B3 NaN NaN left_only\n5 K2 K0 NaN NaN C3 D3 right_only", 297 | "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
key1key2ABCD_merge
0K0K0A0B0C0D0both
1K0K1A1B1NaNNaNleft_only
2K1K0A2B2C1D1both
3K1K0A2B2C2D2both
4K2K1A3B3NaNNaNleft_only
5K2K0NaNNaNC3D3right_only
\n
" 298 | }, 299 | "execution_count": 16, 300 | "metadata": {}, 301 | "output_type": "execute_result" 302 | } 303 | ], 304 | "source": [ 305 | "res = pd.merge(left, right, on=['key1', 'key2'], how='outer', indicator=True)\n", 306 | "res" 307 | ], 308 | "metadata": { 309 | "collapsed": false, 310 | "pycharm": { 311 | "name": "#%%\n" 312 | } 313 | } 314 | }, 315 | { 316 | "cell_type": "markdown", 317 | "source": [ 318 | "### index" 319 | ], 320 | "metadata": { 321 | "collapsed": false, 322 | "pycharm": { 323 | "name": "#%% md\n" 324 | } 325 | } 326 | }, 327 | { 328 | "cell_type": "code", 329 | "execution_count": 17, 330 | "outputs": [], 331 | "source": [ 332 | "left = pd.DataFrame({'A': ['A0', 'A1', 'A2'],\n", 333 | " 'B': ['B0', 'B1', 'B2']},\n", 334 | " index=['K0', 'K1', 'K2'])\n", 335 | "right = pd.DataFrame({'C': ['C0', 'C2', 'C3'],\n", 336 | " 'D': ['D0', 'D2', 'D3']},\n", 337 | " index=['K0', 'K2', 'K3'])" 338 | ], 339 | "metadata": { 340 | "collapsed": false, 341 | "pycharm": { 342 | "name": "#%%\n" 343 | } 344 | } 345 | }, 346 | { 347 | "cell_type": "code", 348 | "execution_count": 18, 349 | "outputs": [ 350 | { 351 | "data": { 352 | "text/plain": " A B C D\nK0 A0 B0 C0 D0\nK1 A1 B1 NaN NaN\nK2 A2 B2 C2 D2\nK3 NaN NaN C3 D3", 353 | "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
ABCD
K0A0B0C0D0
K1A1B1NaNNaN
K2A2B2C2D2
K3NaNNaNC3D3
\n
" 354 | }, 355 | "execution_count": 18, 356 | "metadata": {}, 357 | "output_type": "execute_result" 358 | } 359 | ], 360 | "source": [ 361 | "res = pd.merge(left, right, left_index=True, right_index=True, how='outer')\n", 362 | "res" 363 | ], 364 | "metadata": { 365 | "collapsed": false, 366 | "pycharm": { 367 | "name": "#%%\n" 368 | } 369 | } 370 | }, 371 | { 372 | "cell_type": "markdown", 373 | "source": [ 374 | "### 解决名字相同内涵不同的数据" 375 | ], 376 | "metadata": { 377 | "collapsed": false, 378 | "pycharm": { 379 | "name": "#%% md\n" 380 | } 381 | } 382 | }, 383 | { 384 | "cell_type": "code", 385 | "execution_count": 19, 386 | "outputs": [], 387 | "source": [ 388 | "boys = pd.DataFrame({'k': ['K0', 'K1', 'K2'], 'age': [1, 2, 3]})\n", 389 | "girls = pd.DataFrame({'k': ['K0', 'K0', 'K3'], 'age': [4, 5, 6]})\n" 390 | ], 391 | "metadata": { 392 | "collapsed": false, 393 | "pycharm": { 394 | "name": "#%%\n" 395 | } 396 | } 397 | }, 398 | { 399 | "cell_type": "code", 400 | "execution_count": 20, 401 | "outputs": [ 402 | { 403 | "data": { 404 | "text/plain": " k age\n0 K0 1\n1 K1 2\n2 K2 3", 405 | "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
kage
0K01
1K12
2K23
\n
" 406 | }, 407 | "execution_count": 20, 408 | "metadata": {}, 409 | "output_type": "execute_result" 410 | } 411 | ], 412 | "source": [ 413 | "boys" 414 | ], 415 | "metadata": { 416 | "collapsed": false, 417 | "pycharm": { 418 | "name": "#%%\n" 419 | } 420 | } 421 | }, 422 | { 423 | "cell_type": "code", 424 | "execution_count": 21, 425 | "outputs": [ 426 | { 427 | "data": { 428 | "text/plain": " k age\n0 K0 4\n1 K0 5\n2 K3 6", 429 | "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
kage
0K04
1K05
2K36
\n
" 430 | }, 431 | "execution_count": 21, 432 | "metadata": {}, 433 | "output_type": "execute_result" 434 | } 435 | ], 436 | "source": [ 437 | "girls" 438 | ], 439 | "metadata": { 440 | "collapsed": false, 441 | "pycharm": { 442 | "name": "#%%\n" 443 | } 444 | } 445 | }, 446 | { 447 | "cell_type": "code", 448 | "execution_count": 22, 449 | "outputs": [ 450 | { 451 | "data": { 452 | "text/plain": " k age_boy age_girl\n0 K0 1 4\n1 K0 1 5", 453 | "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
kage_boyage_girl
0K014
1K015
\n
" 454 | }, 455 | "execution_count": 22, 456 | "metadata": {}, 457 | "output_type": "execute_result" 458 | } 459 | ], 460 | "source": [ 461 | "res = pd.merge(boys, girls, on='k', suffixes=['_boy', '_girl'], how='inner')\n", 462 | "res" 463 | ], 464 | "metadata": { 465 | "collapsed": false, 466 | "pycharm": { 467 | "name": "#%%\n" 468 | } 469 | } 470 | }, 471 | { 472 | "cell_type": "code", 473 | "execution_count": null, 474 | "outputs": [], 475 | "source": [], 476 | "metadata": { 477 | "collapsed": false, 478 | "pycharm": { 479 | "name": "#%%\n" 480 | } 481 | } 482 | } 483 | ], 484 | "metadata": { 485 | "kernelspec": { 486 | "display_name": "Python 3", 487 | "language": "python", 488 | "name": "python3" 489 | }, 490 | "language_info": { 491 | "codemirror_mode": { 492 | "name": "ipython", 493 | "version": 2 494 | }, 495 | "file_extension": ".py", 496 | "mimetype": "text/x-python", 497 | "name": "python", 498 | "nbconvert_exporter": "python", 499 | "pygments_lexer": "ipython2", 500 | "version": "2.7.6" 501 | } 502 | }, 503 | "nbformat": 4, 504 | "nbformat_minor": 0 505 | } -------------------------------------------------------------------------------- /docs/data-analysis/files/student.csv: -------------------------------------------------------------------------------- 1 | Student ID,name ,age,gender 2 | 1100,Kelly,22,Female 3 | 1101,Clo,21,Female 4 | 1102,Tilly,22,Female 5 | 1103,Tony,24,Male 6 | 1104,David,20,Male 7 | 1105,Catty,22,Female 8 | 1106,M,3,Female 9 | 1107,N,43,Male 10 | 1108,A,13,Male 11 | 1109,S,12,Male 12 | 1110,David,33,Male 13 | 1111,Dw,3,Female 14 | 1112,Q,23,Male 15 | 1113,W,21,Female -------------------------------------------------------------------------------- /docs/data-analysis/img/3-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhiyu1998/Python-Basis-Notes/2be71810a0eaa7624451ab00fe1c4dec73e6dc58/docs/data-analysis/img/3-1.png -------------------------------------------------------------------------------- /docs/data-analysis/img/3-10.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhiyu1998/Python-Basis-Notes/2be71810a0eaa7624451ab00fe1c4dec73e6dc58/docs/data-analysis/img/3-10.png -------------------------------------------------------------------------------- /docs/data-analysis/img/3-11.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhiyu1998/Python-Basis-Notes/2be71810a0eaa7624451ab00fe1c4dec73e6dc58/docs/data-analysis/img/3-11.png -------------------------------------------------------------------------------- /docs/data-analysis/img/3-12.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhiyu1998/Python-Basis-Notes/2be71810a0eaa7624451ab00fe1c4dec73e6dc58/docs/data-analysis/img/3-12.png -------------------------------------------------------------------------------- /docs/data-analysis/img/3-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhiyu1998/Python-Basis-Notes/2be71810a0eaa7624451ab00fe1c4dec73e6dc58/docs/data-analysis/img/3-2.png -------------------------------------------------------------------------------- /docs/data-analysis/img/3-3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhiyu1998/Python-Basis-Notes/2be71810a0eaa7624451ab00fe1c4dec73e6dc58/docs/data-analysis/img/3-3.png -------------------------------------------------------------------------------- /docs/data-analysis/img/3-4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhiyu1998/Python-Basis-Notes/2be71810a0eaa7624451ab00fe1c4dec73e6dc58/docs/data-analysis/img/3-4.png -------------------------------------------------------------------------------- /docs/data-analysis/img/3-5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhiyu1998/Python-Basis-Notes/2be71810a0eaa7624451ab00fe1c4dec73e6dc58/docs/data-analysis/img/3-5.png -------------------------------------------------------------------------------- /docs/data-analysis/img/3-6.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhiyu1998/Python-Basis-Notes/2be71810a0eaa7624451ab00fe1c4dec73e6dc58/docs/data-analysis/img/3-6.png -------------------------------------------------------------------------------- /docs/data-analysis/img/3-7.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhiyu1998/Python-Basis-Notes/2be71810a0eaa7624451ab00fe1c4dec73e6dc58/docs/data-analysis/img/3-7.png -------------------------------------------------------------------------------- /docs/data-analysis/img/3-8.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhiyu1998/Python-Basis-Notes/2be71810a0eaa7624451ab00fe1c4dec73e6dc58/docs/data-analysis/img/3-8.png -------------------------------------------------------------------------------- /docs/data-analysis/img/3-9.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhiyu1998/Python-Basis-Notes/2be71810a0eaa7624451ab00fe1c4dec73e6dc58/docs/data-analysis/img/3-9.png -------------------------------------------------------------------------------- /docs/data-analysis/pandas.md: -------------------------------------------------------------------------------- 1 | --- 2 | sidebar_position: 2 3 | --- 4 | 5 | # 数据分析:pandas 6 | 7 | [跳转查看文件](https://github.com/zhiyu1998/Python-Basis-Notes/blob/master/docs/pandas/01-choice_data.ipynb) 8 | 9 | @文档todo -------------------------------------------------------------------------------- /docs/deeplearning/_category_.json: -------------------------------------------------------------------------------- 1 | { 2 | "label": "深度学习", 3 | "position": 4, 4 | "link": { 5 | "type": "generated-index" 6 | } 7 | } 8 | -------------------------------------------------------------------------------- /docs/deeplearning/deeplearning.md: -------------------------------------------------------------------------------- 1 | --- 2 | sidebar_position: 1 3 | --- 4 | 5 | # 深度学习之学习路线 6 | 7 | ## :checkered_flag: 前言 8 | 9 | 我的研究生期间(研究方向:异常检测)深度学习的读书/学习笔记,包含 10 | * 图形分类(入门) 11 | * 目标识别(纯属游戏向 :arrow_right: 自动瞄准) 12 | * 异常检测(交通时序) 13 | 14 | 15 | 16 | ## :mountain_cableway: 路线推荐 17 | 18 | 机器学习 :arrow_right: 数据操作(推荐观看我的仓库) :arrow_right: 深度学习基础(推荐阅读pytorch版本《动手深度学习》:https://zh.d2l.ai/) :arrow_right: 深度学习基础网络【分类】(推荐观看:https://space.bilibili.com/18161609/channel/series) :arrow_right: 分支(看研究方向):arrow_double_up: 19 | 20 | > 注:个人建议《动手深度学习》不用读到基础网络(alexnet...) 21 | 22 | 23 | 24 | ## :bookmark_tabs: 推荐书籍和网址 25 | 26 | * pytorch内功修炼(Pytorch中文百科): https://www.pytorch.wiki/ 27 | * 论文理论支撑 (神经网络与深度学习):https://nndl.github.io/ 28 | * 语法精益(流畅的Python)、数据处理(深入浅出pandas) 29 | * 奇葩review大赏(娱乐):https://shitmyreviewerssay.tumblr.com/ 30 | 31 | 32 | 33 | 💨其他推荐 34 | 35 | * [Pytorch官网](https://pytorch.org/) 36 | * [飞桨PaddlePaddle官网](https://www.paddlepaddle.org.cn/) 37 | * [scikit-learn中文社区](https://scikit-learn.org.cn/) 38 | * [Matplotlib: Python plotting — Matplotlib 3.4.2 documentation](https://matplotlib.org/stable/index.html) 39 | * [Jittor(计图): 即时编译深度学习框架 — Jittor](https://cg.cs.tsinghua.edu.cn/jittor/) 40 | * [Dataset Search:数据集搜索](https://datasetsearch.research.google.com/) 41 | * [TensorFlow官方教程](https://tensorflow.google.cn/tutorials) 42 | * [Keras:TF封装](https://keras.io/) 43 | * [Hydra 九头蛇:简化深度学习配置](https://hydra.cc/) 44 | * [ml-tooling/best-of-ml-python: 🏆 深度学习开源库排行榜](https://github.com/ml-tooling/best-of-ml-python#graph-data) 45 | * [NumPy 中文](https://www.numpy.org.cn/) 46 | * [Kaggle: 深度学习竞赛](https://www.kaggle.com/) 47 | * [Pillow (PIL Fork) ](https://www.osgeo.cn/pillow/index.html) 48 | * [复杂网络软件 — NetworkX](https://www.osgeo.cn/networkx/index.html) 49 | * [深度学习在图像处理中的应用教程](https://github.com/WZMIAOMIAO/deep-learning-for-image-processing) 50 | * [pandas中文教程](http://joyfulpandas.datawhale.club/Content/index.html) 51 | * [External-Attention-pytorch: 🍀 现成轮子](https://github.com/xmu-xiaoma666/External-Attention-pytorch) 52 | * [VIT汇总](https://github.com/lucidrains/vit-pytorch) 53 | * [深度学习500问](https://github.com/scutan90/DeepLearning-500-questions) 54 | * [深度学习论文阅读路线图](https://github.com/floodsung/Deep-Learning-Papers-Reading-Roadmap) 55 | * [深度学习论文注释实现](https://github.com/labmlai/annotated_deep_learning_paper_implementations) 56 | * [深度学习入门教程, 优秀文章](https://github.com/Mikoto10032/DeepLearning) 57 | * [吴恩达深度学习课程笔记](https://github.com/fengdu78/deeplearning_ai_books) 58 | * [tensorflow2中文教程,持续更新(当前版本:tensorflow2.0)](https://github.com/czy36mengfei/tensorflow2_tutorials_chinese) 59 | * [初学者的TensorFlow教程和例子 (support TF v1 & v2)](https://github.com/aymericdamien/TensorFlow-Examples) 60 | * [简单且准备使用 TensorFlow 的教程](https://github.com/instillai/TensorFlow-Course#why-use-tensorflow) 61 | * [PyTorch 对于研究人员的教程](https://github.com/yunjey/pytorch-tutorial) 62 | * [吴恩达机器学习个人笔记](https://github.com/fengdu78/Coursera-ML-AndrewNg-Notes) 63 | * [Matplotlib 中文](https://www.matplotlib.org.cn/) 64 | * [pytorch-image-models:PyTorch 图像模型、脚本、预训练权重](https://github.com/rwightman/pytorch-image-models) 65 | * [Flops counter](https://github.com/sovrasov/flops-counter.pytorch) 66 | * [CVPR 2022 论文和开源项目合集](https://github.com/amusi/CVPR2021-Papers-with-Code) 67 | * [PyTorch implementations of GAN:对抗神经网络合集](https://github.com/eriklindernoren/PyTorch-GAN) 68 | * [the-gan-zoo: A list of all named GANs!:对抗神经网络合集](https://github.com/hindupuravinash/the-gan-zoo) 69 | 70 | 71 | 72 | 📚书 73 | 74 | * [《计算机视觉实战演练:算法与应用》](https://github.com/Charmve/computer-vision-in-action) 75 | * [Sklearn 与 TensorFlow 机器学习实用指南第二版](https://hands1ml.apachecn.org/#/) 76 | * [利用 Python 进行数据分析 · 第 2 版](https://pyda.apachecn.org/#/) 77 | * [南瓜书PumpkinBook](https://datawhalechina.github.io/pumpkin-book/#/) 78 | 79 | 80 | 81 | 🏣社区 82 | 83 | * [Hugging Face 自然语言处理](https://huggingface.co/) 84 | * [Sieun Park – Medium](https://sieunpark77.medium.com/) 85 | * [Distill — Latest articles about machine learning](https://distill.pub/) 86 | * [Towards Data Science](https://towardsdatascience.com/) 87 | * [Neurohive - Neural Networks](https://neurohive.io/en/) 88 | * [974 questions with answers in COMPUTER SCIENCE | Science topic](https://www.researchgate.net/topic/Computer-Science) 89 | * [devRant](https://devrant.com/feed) 90 | 91 | 92 | 93 | ## :pencil2: 论文推荐阅读 94 | 95 | ### 图像分类(Classification) 96 | 97 | - LeNet [http://yann.lecun.com/exdb/lenet/index.html](http://yann.lecun.com/exdb/lenet/index.html) 98 | - AlexNet [http://papers.nips.cc/paper/4824-imagenet-classification-with-deep-convolutional-neural-networks.pdf](http://papers.nips.cc/paper/4824-imagenet-classification-with-deep-convolutional-neural-networks.pdf) 99 | - ZFNet(Visualizing and Understanding Convolutional Networks) [https://arxiv.org/abs/1311.2901](https://arxiv.org/abs/1311.2901) 100 | - VGG [https://arxiv.org/abs/1409.1556](https://arxiv.org/abs/1409.1556) 101 | - GoogLeNet, Inceptionv1(Going deeper with convolutions) [https://arxiv.org/abs/1409.4842](https://arxiv.org/abs/1409.4842) 102 | - Batch Normalization [https://arxiv.org/abs/1502.03167](https://arxiv.org/abs/1502.03167) 103 | - Inceptionv3(Rethinking the Inception Architecture for Computer Vision) [https://arxiv.org/abs/1512.00567](https://arxiv.org/abs/1512.00567) 104 | - Inceptionv4, Inception-ResNet [https://arxiv.org/abs/1602.07261](https://arxiv.org/abs/1602.07261) 105 | - Xception(Deep Learning with Depthwise Separable Convolutions) [https://arxiv.org/abs/1610.02357](https://arxiv.org/abs/1610.02357) 106 | - ResNet [https://arxiv.org/abs/1512.03385](https://arxiv.org/abs/1512.03385) 107 | - ResNeXt [https://arxiv.org/abs/1611.05431](https://arxiv.org/abs/1611.05431) 108 | - DenseNet [https://arxiv.org/abs/1608.06993](https://arxiv.org/abs/1608.06993) 109 | - NASNet-A(Learning Transferable Architectures for Scalable Image Recognition) [https://arxiv.org/abs/1707.07012](https://arxiv.org/abs/1707.07012) 110 | - SENet(Squeeze-and-Excitation Networks) [https://arxiv.org/abs/1709.01507](https://arxiv.org/abs/1709.01507) 111 | - MobileNet(v1) [https://arxiv.org/abs/1704.04861](https://arxiv.org/abs/1704.04861) 112 | - MobileNet(v2) [https://arxiv.org/abs/1801.04381](https://arxiv.org/abs/1801.04381) 113 | - MobileNet(v3) [https://arxiv.org/abs/1905.02244](https://arxiv.org/abs/1905.02244) 114 | - ShuffleNet(v1) [https://arxiv.org/abs/1707.01083](https://arxiv.org/abs/1707.01083) 115 | - ShuffleNet(v2) [https://arxiv.org/abs/1807.11164](https://arxiv.org/abs/1807.11164) 116 | - Bag of Tricks for Image Classification with Convolutional Neural Networks [https://arxiv.org/abs/1812.01187](https://arxiv.org/abs/1812.01187) 117 | - EfficientNet(v1) [https://arxiv.org/abs/1905.11946](https://arxiv.org/abs/1905.11946) 118 | - EfficientNet(v2) [https://arxiv.org/abs/2104.00298](https://arxiv.org/abs/2104.00298) 119 | - CSPNet [https://arxiv.org/abs/1911.11929](https://arxiv.org/abs/1911.11929) 120 | - RegNet [https://arxiv.org/abs/2003.13678](https://arxiv.org/abs/2003.13678) 121 | - NFNets(High-Performance Large-Scale Image Recognition Without Normalization) [https://arxiv.org/abs/2102.06171](https://arxiv.org/abs/2102.06171) 122 | - Attention Is All You Need [https://arxiv.org/abs/1706.03762](https://arxiv.org/abs/1706.03762) 123 | - Vision Transformer [https://arxiv.org/abs/2010.11929](https://arxiv.org/abs/2010.11929) 124 | - DeiT(Training data-efficient image transformers ) [https://arxiv.org/abs/2012.12877](https://arxiv.org/abs/2012.12877) 125 | - Swin Transformer [https://arxiv.org/abs/2103.14030](https://arxiv.org/abs/2103.14030) 126 | - Swin Transformer V2: Scaling Up Capacity and Resolution [https://arxiv.org/abs/2111.09883](https://arxiv.org/abs/2111.09883) 127 | - BEiT: BERT Pre-Training of Image Transformers [https://arxiv.org/abs/2106.08254](https://arxiv.org/abs/2106.08254) 128 | - MAE(Masked Autoencoders Are Scalable Vision Learners) [https://arxiv.org/abs/2111.06377](https://arxiv.org/abs/2111.06377) 129 | - CoAtNet [https://arxiv.org/pdf/2106.04803v2.pdf](https://arxiv.org/pdf/2106.04803v2.pdf) 130 | 131 | 132 | 133 | ### 目标检测(Object Detection) 134 | 135 | - R-CNN [https://arxiv.org/abs/1311.2524](https://arxiv.org/abs/1311.2524) 136 | - Fast R-CNN [https://arxiv.org/abs/1504.08083](https://arxiv.org/abs/1504.08083) 137 | - Faster R-CNN [https://arxiv.org/abs/1506.01497](https://arxiv.org/abs/1506.01497) 138 | - Cascade R-CNN: Delving into High Quality Object Detection [https://arxiv.org/abs/1712.00726](https://arxiv.org/abs/1712.00726) 139 | - Mask R-CNN [https://arxiv.org/abs/1703.06870](https://arxiv.org/abs/1703.06870) 140 | - SSD [https://arxiv.org/abs/1512.02325](https://arxiv.org/abs/1512.02325) 141 | - FPN(Feature Pyramid Networks for Object Detection) [https://arxiv.org/abs/1612.03144](https://arxiv.org/abs/1612.03144) 142 | - RetinaNet(Focal Loss for Dense Object Detection) [https://arxiv.org/abs/1708.02002](https://arxiv.org/abs/1708.02002) 143 | - Bag of Freebies for Training Object Detection Neural Networks [https://arxiv.org/abs/1902.04103](https://arxiv.org/abs/1902.04103) 144 | - YOLOv1 [https://arxiv.org/abs/1506.02640](https://arxiv.org/abs/1506.02640) 145 | - YOLOv2 [https://arxiv.org/abs/1612.08242](https://arxiv.org/abs/1612.08242) 146 | - YOLOv3 [https://arxiv.org/abs/1804.02767](https://arxiv.org/abs/1804.02767) 147 | - YOLOv4 [https://arxiv.org/abs/2004.10934](https://arxiv.org/abs/2004.10934) 148 | - Scaled-YOLOv4 [https://arxiv.org/abs/2011.08036](https://arxiv.org/abs/2011.08036) 149 | - PP-YOLO [https://arxiv.org/abs/2007.12099](https://arxiv.org/abs/2007.12099) 150 | - PP-YOLOv2 [https://arxiv.org/abs/2104.10419](https://arxiv.org/abs/2104.10419) 151 | - YOLOX [http://arxiv.org/abs/2107.08430](http://arxiv.org/abs/2107.08430) 152 | - CornerNet [https://arxiv.org/abs/1808.01244](https://arxiv.org/abs/1808.01244) 153 | - FCOS [https://arxiv.org/abs/1904.01355](https://arxiv.org/abs/1904.01355) 154 | - CenterNet [https://arxiv.org/abs/1904.07850](https://arxiv.org/abs/1904.07850) 155 | - Mask R-CNN https://arxiv.org/abs/1703.06870) 156 | 157 | 158 | 159 | ### 异常检测(Anomaly Detection) 160 | 161 | - Anomaly Transformer [http://arxiv.org/abs/2110.02642](http://arxiv.org/abs/2110.02642) 162 | - DL-Traff [http://arxiv.org/abs/2108.09091](http://arxiv.org/abs/2108.09091) 163 | - Generative adversarial networks in time series: A survey and taxonomy [http://arxiv.org/abs/2107.11098](http://arxiv.org/abs/2107.11098) 164 | - Learning Graph Neural Networks for Multivariate Time Series Anomaly Detection [http://arxiv.org/abs/2111.08082](http://arxiv.org/abs/2111.08082) 165 | - Long-Range Transformers [http://arxiv.org/abs/2109.12218](http://arxiv.org/abs/2109.12218) 166 | - Sig-Wasserstein GANs [http://arxiv.org/abs/2111.01207](http://arxiv.org/abs/2111.01207) 167 | 168 | 169 | 170 | ### Others 171 | 172 | - Microsoft COCO: Common Objects in Context [https://arxiv.org/abs/1405.0312](https://arxiv.org/abs/1405.0312) 173 | - The PASCALVisual Object Classes Challenge: A Retrospective [http://host.robots.ox.ac.uk/pascal/VOC/pubs/everingham15.pdf](http://host.robots.ox.ac.uk/pascal/VOC/pubs/everingham15.pdf) 174 | 175 | 176 | 177 | ## :surfer: 感谢 178 | 179 | 特别致谢以下仓库对于我学习的帮助: 180 | [ WZMIAOMIAO / 181 | 深度学习在图像处理中的应用教程](https://github.com/WZMIAOMIAO/deep-learning-for-image-processing) 182 | [《神经网络与深度学习》 邱锡鹏著](https://github.com/nndl/nndl.github.io) 183 | [ d2l-ai /《动手学深度学习》](https://github.com/d2l-ai/d2l-zh) 184 | -------------------------------------------------------------------------------- /docs/deeplearning/graph.mdx: -------------------------------------------------------------------------------- 1 | --- 2 | sidebar_position: 2 3 | --- 4 | 5 | export const Highlight = ({children, color}) => ( 6 | { 15 | alert(`You clicked the color ${color} with label ${children}`); 16 | }}> 17 | {children} 18 | 19 | ); 20 | 21 | # 图卷积神经网络(持续更新) 22 | 23 | ## 文献分享 24 | 25 | 26 | ### 通用参考の交通流预测专题 27 | - Graph convolutional networks: a comprehensive review:https://computationalsocialnetworks.springeropen.com/articles/10.1186/s40649-019-0069-y 28 | - A Survey of Traffic Prediction: from Spatio-Temporal Data to Intelligent Transportation 通用参考の交通流预测:https://link.springer.com/article/10.1007/s41019-020-00151-z 29 | - A Survey on Modern Deep Neural Network for Traffic Prediction: Trends, Methods and Challenges 通用参考の交通流预测:https://ieeexplore.ieee.org/document/9112608 30 | - A Summary of Traffic Flow Forecasting Methods 通用参考の交通流预测:http://www.gljtkj.com/EN/Y2004/V21/I3/82 31 | - A comprehensive survey on graph neural networks 通用参考の交通流预测:https://ieeexplore.ieee.org/abstract/document/9046288?casa_token=_-IU9Ixzx8kAAAAA:vcOheOMCzaaZRi5lykrhdY0CwfuoOiRU3lrdmA8uSXv1Auu8z9LrB67_JfrnSyjhoNEHbCAauz9atg 32 | - 切比雪夫多项式(Chebyshev polynomials) 通用参考の交通流预测:https://proceedings.neurips.cc/paper_files/paper/2016/hash/04df4d434d481c5bb723be1b6df1ee65-Abstract.html 33 | - 2023年全球道路安全状况报告(Global status report on road safety 2023) 通用参考の交通流预测:https://www.who.int/teams/social-determinants-of-health/safety-and-mobility/global-status-report-on-road-safety-2023 34 | 35 | ### 图嵌入 36 | - Graph embedding techniques, applications, and performance: A survey:https://www.sciencedirect.com/science/article/pii/S0950705118301540 37 | - A comprehensive survey of graph embedding: Problems, techniques, and applications:https://ieeexplore.ieee.org/abstract/document/8294302/?casa_token=RPHDwCwRd_sAAAAA:Us_qNvVZ0rIkhicT8MUJI87qKpF5diSGURb5rBkEtEn_Sru7qd_N5j4SESctQvL8kAM-bJLvzxQAVE8 38 | 39 | ### 通用参考の异常检测专题 40 | - Detecting Road Traffic Events by Coupling Multiple Timeseries With a Nonparametric Bayesian Method:https://ieeexplore.ieee.org/abstract/document/6763098?casa_token=wPKB1S938vcAAAAA:il9gnh6pKOssqEYkYuzKor8XoYvhwYM_veqgVUjyCMoOqMMfnYtrnfnh7x4UKjw9UgsJaglC6we2nQ 41 | - Investigating the impact of weather conditions and time of day on traffic flow characteristics:https://journals.ametsoc.org/view/journals/wcas/14/3/WCAS-D-22-0012.1.xml 42 | - Variational Disentangled Graph Auto-Encoders for Link Prediction:https://arxiv.org/abs/2306.11315 43 | - Graph neural networks for anomaly detection in industrial internet of things:https://ieeexplore.ieee.org/abstract/document/9471816?casa_token=c93zsFxKTZQAAAAA:Ud0fjHwZxW4orRAXglbEJnLVnZKSZJnmhwH0qH7dCGOlVBwODXGyVaD9Frzo2yV3ZOuXsCPA8FAaoA 44 | - Perceiving spatiotemporal traffic anomalies from sparse representation-modeled city dynamics:https://link.springer.com/article/10.1007/s00779-020-01474-4 45 | - Urban anomaly analytics: Description, detection, and prediction:https://ieeexplore.ieee.org/abstract/document/9080109/ 46 | - Graph convolutional adversarial networks for spatiotemporal anomaly detection:https://ieeexplore.ieee.org/abstract/document/9669110/ 47 | - Anomaly detection and inter-sensor transfer learning on smart manufacturing datasets:https://www.mdpi.com/1424-8220/23/1/486 48 | - Graph neural network-based anomaly detection in multivariate time series:https://ojs.aaai.org/index.php/AAAI/article/view/16523 49 | - GMAT-DU: Traffic anomaly prediction with fine spatiotemporal granularity in sparse data:https://ieeexplore.ieee.org/abstract/document/10061355/ 50 | - Graph anomaly detection with graph neural networks: Current status and challenges:https://ieeexplore.ieee.org/abstract/document/9906987/ 51 | - Anomaly detection with generative adversarial networks for multivariate time series:https://arxiv.org/abs/1809.04758 52 | - A multimodal anomaly detector for robot-assisted feeding using an lstm-based variational autoencoder:https://ieeexplore.ieee.org/abstract/document/8279425/ 53 | 54 | ### 图卷积神经网络 55 | - GCN:https://arxiv.org/abs/1609.02907 56 | - GAT:https://arxiv.org/abs/1710.10903 57 | - GraphSAGE:https://proceedings.neurips.cc/paper_files/paper/2017/hash/5dd9db5e033da9c6fb5ba83c7a7ebea9-Abstract.html 58 | - GIN:https://arxiv.org/abs/1810.00826 59 | - DeepGCN:https://arxiv.org/abs/1904.03751 60 | - PMLP:https://arxiv.org/abs/2212.09034 61 | - DeepGCN:http://openaccess.thecvf.com/content_ICCV_2019/html/Li_DeepGCNs_Can_GCNs_Go_As_Deep_As_CNNs_ICCV_2019_paper.html 62 | - 图的对比(Neighbor contrastive learning on learnable graph augmentation):https://ojs.aaai.org/index.php/AAAI/article/view/26168 63 | 64 | 🔥 【强力推荐】PyTorch 的图神经网络库:https://pyg.org/ 65 | ![](./images/pyg.png) -------------------------------------------------------------------------------- /docs/deeplearning/images/pyg.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhiyu1998/Python-Basis-Notes/2be71810a0eaa7624451ab00fe1c4dec73e6dc58/docs/deeplearning/images/pyg.png -------------------------------------------------------------------------------- /docs/intro.md: -------------------------------------------------------------------------------- 1 | ## 目录 2 | 3 | ### 跳转 4 | * 🐍 [Python基础](./basics/base.md) 5 | * 📊 [Numpy基础](./data-analysis/numpy.md) 6 | * 🐼 [Pandas基础](./data-analysis/pandas.md) 7 | * 🍥 [DeepLearning基础 + 路线](./deeplearning/deeplearning.md) 8 | * 📚 [Scripts 脚本库](./scripts/letpub) 9 | 10 | ### 📑 在线阅读 11 | 12 | https://zhiyu1998.github.io/Python-Basis-Notes/docs/#/ 13 | 14 | ### 🐍 脚本库 15 | 16 | - [提取论文Reference的期刊/会议信息](/docs/scripts/letpub) 17 | - [采集中国企业ESG评级](/docs/scripts/syntaogf) 18 | - [飞鸽知识库导出](/docs/scripts/feige_export) 19 | - [自动化识别框架](/docs/scripts/auto_ocr_framework) 20 | 21 | ### 📈 趋势走向图 22 | 23 | ![](https://api.star-history.com/svg?repos=zhiyu1998/Python-Basis-Notes&type=Date) 24 | 25 | -------------------------------------------------------------------------------- /docs/scripts/1-extra_letpub.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | # @Time : 2022/11/21/0021 下午 23:59 4 | # @Author : zhiyu1998 5 | 6 | import json 7 | import os.path 8 | import re 9 | import time 10 | import random 11 | import logging 12 | 13 | import pandas as pd 14 | import requests 15 | 16 | from bs4 import BeautifulSoup 17 | 18 | # 头请求 19 | headers = { 20 | 'User-Agent': 'Mozilla/5.0 (Linux; Android 5.0; SM-G900P Build/LRX21T) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.25 Mobile Safari/537.36', 21 | 'Reference': 'https://www.letpub.com/journalapp/', 22 | } 23 | # 排除名单 24 | exclude_list = ['arXiv'] 25 | 26 | 27 | def logger_config(log_path, logging_name): 28 | """ 29 | 配置log 30 | logger是日志对象,handler是流处理器,console是控制台输出(没有console也可以,将不会在控制台输出,会在日志文件中输出) 31 | :param log_path: 输出log路径 32 | :param logging_name: 记录中name,可随意 33 | :return: 34 | """ 35 | # 获取logger对象,取名 36 | logger = logging.getLogger(logging_name) 37 | # 输出DEBUG及以上级别的信息,针对所有输出的第一层过滤 38 | logger.setLevel(level=logging.DEBUG) 39 | # 获取文件日志句柄并设置日志级别,第二层过滤 40 | handler = logging.FileHandler(log_path, encoding='UTF-8') 41 | handler.setLevel(logging.INFO) 42 | # 生成并设置文件日志格式 43 | formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s') 44 | handler.setFormatter(formatter) 45 | # console相当于控制台输出,handler文件输出。获取流句柄并设置日志级别,第二层过滤 46 | console = logging.StreamHandler() 47 | console.setLevel(logging.DEBUG) 48 | # 为logger对象添加句柄 49 | logger.addHandler(handler) 50 | logger.addHandler(console) 51 | return logger 52 | 53 | 54 | logger = logger_config(log_path='log.txt', logging_name='日志') 55 | 56 | 57 | def extra_title_journal() -> pd.DataFrame: 58 | """ 59 | 提取论文标题和会议/期刊 60 | :return: 61 | """ 62 | res: pd.DataFrame = pd.DataFrame(columns=['title', 'journal']) 63 | with open("ref.bib") as f: 64 | temp = [] 65 | for line in f: 66 | if 'title' in line: 67 | temp.append(re.findall(r'({.*?})', line)[0].replace('{', '').replace('}', '')) 68 | if 'journal' in line or 'booktitle' in line: 69 | temp.append(re.findall(r'({.*?})', line)[0].replace('{', '').replace('}', '')) 70 | if line == '\n': 71 | # 校准 72 | if len(temp) > 2: 73 | temp.pop(2) 74 | # 补全 75 | if len(temp) < 2: 76 | temp.append(' ') 77 | res.loc[len(res)] = temp 78 | temp.clear() 79 | return res 80 | 81 | 82 | def save_excel(res: pd.DataFrame) -> None: 83 | """ 84 | 保存为excel 85 | :param res: 86 | :return: 87 | """ 88 | if os.path.exists('./ref.xlsx'): 89 | os.remove('./ref.xlsx') 90 | res.to_excel('ref.xlsx', index=False) 91 | 92 | 93 | def get_msg_from_letpub(journal_name: str) -> list: 94 | """ 95 | 从letpub获取期刊数据 96 | :return: ISSN 期刊名 期刊指标 中科院分区 学科领域 SCI/SCIE 是否OA 录用比例 审稿周期 近期文章 查看数 97 | """ 98 | url: str = f'https://www.letpub.com.cn/journalappAjaxXS.php?querytype=autojournal&term={journal_name}' 99 | r: requests.Response = requests.get(url=url, headers=headers) 100 | # 提取精准匹配的结果 -- [0] 101 | try: 102 | issn: str = json.loads(r.text)[0]['issn'] 103 | if issn == '': 104 | return [] 105 | except Exception as e: 106 | logger.info(f'请求错误:{e}') 107 | return [] 108 | # 请求信息 109 | postUrl: str = 'https://www.letpub.com.cn/index.php?page=journalapp&view=search' 110 | request_params: dict = { 111 | "searchname": "", 112 | "searchissn": issn, 113 | "searchfield": "", 114 | "searchimpactlow": "", 115 | "searchimpacthigh": "", 116 | "searchscitype": "", 117 | "view": "search", 118 | "searchcategory1": "", 119 | "searchcategory2": "", 120 | "searchjcrkind": "", 121 | "searchopenaccess": "", 122 | "searchsort": "relevance" 123 | } 124 | # 二次请求查询更快 125 | r2: requests.Response = requests.post(url=postUrl, headers=headers, data=request_params) 126 | # 爬取信息 127 | soup = BeautifulSoup(r2.text, 'lxml') 128 | td = soup.find_all('td', attrs={ 129 | 'style': 'border:1px #DDD solid; border-collapse:collapse; text-align:left; padding:8px 8px 8px 8px;'}) 130 | temp_letpub_data = [d.text for d in td] 131 | return temp_letpub_data 132 | 133 | 134 | def insert_sci_msg(payload: pd.DataFrame) -> pd.DataFrame: 135 | """ 136 | 获取SCI信息 137 | :return: 138 | """ 139 | res_dict: dict = {} 140 | # 遍历每个期刊 141 | for line in payload.loc[:, 'journal']: 142 | # TODO: 排除不想查询的 (line in exclude_list or) 143 | if line.isspace(): 144 | continue 145 | journal_data = get_msg_from_letpub(line) 146 | # 爬取结果判空 147 | if len(journal_data) == 0: 148 | continue 149 | res_dict[line] = journal_data 150 | time.sleep(round(random.uniform(0, 1), 2)) 151 | # 增加期刊的列 152 | payload_res: pd.DataFrame = payload.assign(issn='', journal_name='', target='', area='', field='', sci='', is_oa='', 153 | employment_ratio='', 154 | review_cycle='', recent='', view='') 155 | # 把爬取的数据填充进去 156 | for index, row in payload_res.iterrows(): 157 | print(f'已解决:{row["title"]}') 158 | if row['journal'] in res_dict: 159 | ''' 160 | 0-ISSN 161 | 1-期刊名 162 | 2-期刊指标 163 | 3-中科院分区 164 | 4-学科领域 165 | 5-SCI/SCIE 166 | 6-是否OA 167 | 7-录用比例 168 | 8-审稿周期 169 | 9-近期文章 170 | 10-查看数 171 | ''' 172 | match_item = res_dict[row['journal']] 173 | row['issn'] = match_item[0] 174 | row['journal_name'] = match_item[1] 175 | row['target'] = match_item[2] 176 | row['area'] = match_item[3] 177 | row['field'] = match_item[4] 178 | row['sci'] = match_item[5] 179 | row['is_oa'] = match_item[6] 180 | row['employment_ratio'] = match_item[7] 181 | row['review_cycle'] = match_item[8] 182 | row['recent'] = match_item[9] 183 | row['view'] = match_item[10] 184 | payload_res.iloc[index] = row 185 | return payload_res 186 | 187 | 188 | if __name__ == '__main__': 189 | start = time.time() 190 | # 提取论文名/期刊 191 | res: pd.DataFrame = extra_title_journal() 192 | # 获取期刊信息 193 | sci_res: pd.DataFrame = insert_sci_msg(res) 194 | # 保存EXCEL 195 | save_excel(sci_res) 196 | print(f"耗时:{time.time() - start:.2f}秒") 197 | -------------------------------------------------------------------------------- /docs/scripts/2-extra_syntaogf.py: -------------------------------------------------------------------------------- 1 | import re 2 | import random 3 | import pandas as pd 4 | 5 | from selenium import webdriver 6 | from selenium.webdriver.edge.service import Service 7 | from selenium.webdriver.common.by import By 8 | from selenium.webdriver.support.ui import WebDriverWait 9 | from selenium.webdriver.support import expected_conditions as EC 10 | from selenium.common.exceptions import ElementNotInteractableException 11 | 12 | import time 13 | 14 | # pandas读取数据 15 | my_excel = pd.read_excel("./data.xls") 16 | company_names = my_excel.iloc[:, 2] 17 | 18 | # TODO 从第n条数据开始爬 19 | # selenium爬取数据 20 | n = 0 21 | 22 | options = webdriver.EdgeOptions() 23 | options.add_argument('--headless') 24 | options.add_argument('--disable-animations') 25 | # TODO 添加浏览器引擎,例如:C:\\Users\\Administrator\\Documents\\PythonWorkSpace\\Test\\msedgedriver.exe 26 | s = Service(r"") 27 | 28 | driver = webdriver.Edge(options=options, service=s) 29 | 30 | driver.get("https://www.syntaogf.com/") 31 | #隐性等待3秒,打不开页面才报错 32 | driver.implicitly_wait(3) 33 | 34 | wait = WebDriverWait(driver, 10) 35 | driver.execute_script("window.scrollBy(0,700)") 36 | # 所有数据 37 | test_company = company_names 38 | # 第一次的位置 39 | with open("res_2.txt", "a") as f: 40 | ele = wait.until(EC.presence_of_element_located((By.XPATH, '//*[@id="search_form_1"]/form/input[1]'))) 41 | ele.send_keys(test_company[n]) 42 | btn = wait.until(EC.presence_of_element_located((By.XPATH, '//*[@id="search_form_1"]/form/input[2]'))).click() 43 | # 等待标签 44 | try: 45 | label = wait.until(EC.presence_of_element_located((By.XPATH, '//*[@id="search_html"]/div/div[1]/a'))).click() 46 | except ElementNotInteractableException: 47 | wait.until( 48 | EC.presence_of_element_located((By.XPATH, '//*[@id="no_search_html"]/div/div[2]/a'))).click() 49 | time.sleep(2) 50 | wait.until(EC.presence_of_element_located((By.XPATH, '//*[@id="search_form_1"]/form/i'))).click() 51 | ele = wait.until(EC.presence_of_element_located((By.XPATH, '//*[@id="search_form_1"]/form/input[1]'))) 52 | ele.send_keys("航锦科技股份有限公司") 53 | btn = wait.until( 54 | EC.presence_of_element_located((By.XPATH, '//*[@id="search_form_1"]/form/input[2]'))).click() 55 | ele = wait.until( 56 | EC.presence_of_element_located((By.XPATH, '//*[@id="search_form_2"]/form/input[1]'))).clear() 57 | time.sleep(2) 58 | # list = wait.until(EC.presence_of_element_located((By.XPATH, '//*[@id="search_html"]/div/div[1]/div[5]'))) 59 | list = wait.until(EC.presence_of_all_elements_located((By.XPATH, '//*[@id="search_html"]/div/div[1]/div[5]/div'))) 60 | reg = r'^\d{4}|[A-Z][+-]?$' 61 | temp_str = test_company[n] + " " 62 | for item in list: 63 | match_res = re.findall(reg, item.text) 64 | temp_str = temp_str + ":".join(match_res) + " " 65 | temp_str += "\n" 66 | f.write(temp_str) 67 | time.sleep(2) 68 | print(test_company[n] + "完成") 69 | 70 | for com in test_company[n+1:]: 71 | temp_str = com + " " 72 | ele = wait.until(EC.presence_of_element_located((By.XPATH, '//*[@id="search_form_2"]/form/input[1]'))) 73 | time.sleep(random.randint(1, 2)) 74 | ele.send_keys(com) 75 | time.sleep(random.randint(1, 2)) 76 | btn = wait.until(EC.presence_of_element_located((By.XPATH, '//*[@id="search_form_2"]/form/input[2]'))).click() 77 | time.sleep(random.randint(1, 2)) 78 | 79 | # 等待标签 80 | # label = wait.until(EC.presence_of_element_located((By.XPATH, '//*[@id="search_html"]/div/div[1]/a'))).click() 81 | # time.sleep(1) 82 | # list = wait.until(EC.presence_of_element_located((By.XPATH, '//*[@id="search_html"]/div/div[1]/div[5]'))) 83 | list = wait.until( 84 | EC.presence_of_all_elements_located((By.XPATH, '//*[@id="search_html"]/div/div[1]/div[5]/div'))) 85 | for item in list: 86 | match_res: list = re.findall(reg, item.text) 87 | temp_str = temp_str + ":".join(match_res) + " " 88 | temp_str += "\n" 89 | f.write(temp_str) 90 | # 如果不存在这个公司 91 | try: 92 | ele = wait.until( 93 | EC.presence_of_element_located((By.XPATH, '//*[@id="search_form_2"]/form/input[1]'))).clear() 94 | except ElementNotInteractableException: 95 | wait.until( 96 | EC.presence_of_element_located((By.XPATH, '//*[@id="no_search_html"]/div/div[2]/a'))).click() 97 | time.sleep(2) 98 | wait.until(EC.presence_of_element_located((By.XPATH, '//*[@id="search_form_1"]/form/i'))).click() 99 | ele = wait.until(EC.presence_of_element_located((By.XPATH, '//*[@id="search_form_1"]/form/input[1]'))) 100 | ele.send_keys("航锦科技股份有限公司") 101 | btn = wait.until( 102 | EC.presence_of_element_located((By.XPATH, '//*[@id="search_form_1"]/form/input[2]'))).click() 103 | ele = wait.until( 104 | EC.presence_of_element_located((By.XPATH, '//*[@id="search_form_2"]/form/input[1]'))).clear() 105 | time.sleep(2) 106 | print(com + "完成") 107 | continue 108 | time.sleep(random.randint(1, 2)) 109 | print(com + "完成") -------------------------------------------------------------------------------- /docs/scripts/_category_.json: -------------------------------------------------------------------------------- 1 | { 2 | "label": "脚本集", 3 | "position": 5, 4 | "link": { 5 | "type": "generated-index" 6 | } 7 | } 8 | -------------------------------------------------------------------------------- /docs/scripts/auto_ocr_framework.md: -------------------------------------------------------------------------------- 1 | # 自动化识别框架 2 | 3 | 这是在帮助我的好朋友实现 `抖店自动发送消息` 的时候实现的一个框架,本来很简陋,然后被我硬生生的拿来做了一个简单的自动化识别框架,现在手游一开刷个牙回来就刷完了 4 | 5 | ## 技术栈 6 | 7 | - pandas 8 | - cv2 9 | - pyautogui 10 | - functools 11 | 12 | ## 常用的几个方法 13 | 14 | - `click_image` 点击图片 15 | - `click_image_until_another_appears` 点击图片直到下一个图片出现 16 | - `click_image_sequence` 点击一系列图片,可以传入 List 17 | - `type_text` 输入文字 18 | - `screenshot_and_click` OCR后找到指定文字点击 19 | - `process_screenshot_for_ocr` OCR图片得到数据 20 | 21 | ## 心理测试自动化逻辑(招聘用) 22 | 23 | @todo 主要是有图片,有时间单独开一个仓库公开代码 24 | 25 | ## 物华弥新自动化逻辑 26 | 27 | @todo 主要是有图片,有时间单独开一个仓库公开代码 28 | 29 | ## 框架代码 30 | 31 | ```python 32 | import hashlib 33 | import math 34 | import pandas as pd 35 | import pyperclip 36 | import requests 37 | import base64 38 | import cv2 39 | import pyautogui 40 | import time 41 | import random 42 | import numpy as np 43 | import logging 44 | from io import BytesIO 45 | from functools import wraps 46 | from functools import lru_cache 47 | from config import SCREENSHOT_REGION 48 | 49 | logging.basicConfig(level=logging.INFO) 50 | 51 | 52 | def retry_on_failure(retries=3, delay=1): 53 | """ 54 | 装饰器,用于在函数失败时重试 55 | :param retries: 重试次数 56 | :param delay: 重试间隔时间 57 | """ 58 | 59 | def decorator(func): 60 | @wraps(func) 61 | def wrapper(*args, **kwargs): 62 | for attempt in range(retries): 63 | result = func(*args, **kwargs) 64 | if result: 65 | return result 66 | logging.warning(f"尝试 {attempt + 1} 失败,正在重试...") 67 | time.sleep(delay) 68 | logging.error(f"所有 {retries} 次尝试失败。") 69 | return None 70 | 71 | return wrapper 72 | 73 | return decorator 74 | 75 | 76 | class AutomationTool: 77 | # 定义常量 78 | LEFT = 'left' 79 | RIGHT = 'right' 80 | FULL = 'full' 81 | 82 | # 缓存最近一次的截图和 OCR 结果 83 | _last_screenshot = None 84 | _last_screenshot_time = 0 85 | _last_screenshot_hash = None 86 | _last_ocr_result = None 87 | _screenshot_cache_duration = 1 # 缓存持续时间(秒) 88 | 89 | UMI_OCR_URL = "http://127.0.0.1:1224/api/ocr" 90 | 91 | @staticmethod 92 | @lru_cache(maxsize=None) 93 | def read_excel(excel_path, usecols="A") -> pd.DataFrame: 94 | """ 95 | 读取Excel文件中指定的列数据 96 | :param excel_path: Excel文件路径 97 | :param usecols: 要读取的列(默认读取第A列) 98 | :return: 包含指定列数据的DataFrame 99 | """ 100 | df = pd.read_excel(excel_path, usecols=usecols) 101 | return df 102 | 103 | @staticmethod 104 | def ocr_image(base64_image_data): 105 | """ 106 | 发送HTTP请求到Umi-OCR 107 | :param base64_image_data: 108 | :return: 109 | """ 110 | try: 111 | response = requests.post(AutomationTool.UMI_OCR_URL, json={ "base64": base64_image_data }) 112 | response.raise_for_status() 113 | return response.json() 114 | except requests.RequestException as e: 115 | logging.error(f"OCR请求失败: {e}") 116 | return None 117 | 118 | @staticmethod 119 | def capture_screenshot(): 120 | """ 121 | 截取屏幕并返回PIL格式的图像 122 | :return: 123 | """ 124 | # 如果SCREENSHOT_REGION为空 125 | region = SCREENSHOT_REGION if SCREENSHOT_REGION else AutomationTool.FULL 126 | # 判断SCREENSHOT_REGION(截图是左半部分还是全屏幕) 127 | if region == AutomationTool.LEFT: 128 | logging.info("截取屏幕的左半部分") 129 | return AutomationTool.capture_screenshot_half(AutomationTool.LEFT) 130 | elif region == AutomationTool.RIGHT: 131 | logging.info("截取屏幕的右半部分") 132 | return AutomationTool.capture_screenshot_half(AutomationTool.RIGHT) 133 | elif region == AutomationTool.FULL: 134 | logging.info("截取整个屏幕") 135 | return pyautogui.screenshot() 136 | else: 137 | raise ValueError("截图区域无效。请使用'left'、'right'或'full'") 138 | 139 | @staticmethod 140 | def capture_screenshot_half(side=LEFT): 141 | """ 142 | 截取屏幕的左半部分或右半部分 143 | :param side: 'left' 或 'right',默认为 'left' 144 | :return: 截取的图像 145 | """ 146 | # 获取屏幕的宽度和高度 147 | screen_width, screen_height = pyautogui.size() 148 | # 截取整个屏幕 149 | screenshot = pyautogui.screenshot() 150 | # 计算宽度的 73% 151 | width_73_percent = int(screen_width * 0.73) 152 | 153 | if side == AutomationTool.LEFT: 154 | # 裁剪出左半部分 155 | half = screenshot.crop((0, 0, width_73_percent, screen_height)) 156 | elif side == AutomationTool.RIGHT: 157 | # 裁剪出右半部分 158 | half = screenshot.crop((screen_width - width_73_percent, 0, screen_width, screen_height)) 159 | else: 160 | raise ValueError("无效的side参数。请使用'left'或'right'") 161 | 162 | # half.save(f"{side}_region.png") # 调试使用 163 | return half 164 | 165 | @staticmethod 166 | def convert_image_to_base64(pil_image) -> str: 167 | """ 168 | 将PIL格式图像转换为Base64编码 169 | :param pil_image: 170 | :return: 171 | """ 172 | buffered = BytesIO() 173 | pil_image.save(buffered, format="PNG") 174 | img_base64 = base64.b64encode(buffered.getvalue()).decode("utf-8") 175 | return img_base64 176 | 177 | @staticmethod 178 | def convert_image_to_opencv(pil_image): 179 | """ 180 | 将PIL格式图像转换为OpenCV格式 181 | :param pil_image: 182 | :return: 183 | """ 184 | np_image = np.array(pil_image) 185 | return cv2.cvtColor(np_image, cv2.COLOR_RGB2BGR) 186 | 187 | @staticmethod 188 | def extract_text_in_box(ocr_data, x1, y1, x2, y2): 189 | """ 190 | 提取给定坐标框内的文字。 191 | :param ocr_data: OCR 结果数据 192 | :param x1, y1, x2, y2: 指定的坐标框 (左上角 x1, y1 和 右下角 x2, y2) 193 | :return: 识别到的文字 194 | """ 195 | for item in ocr_data['data']: 196 | box = item['box'] 197 | text = item['text'] 198 | x_min = min([point[0] for point in box]) 199 | y_min = min([point[1] for point in box]) 200 | x_max = max([point[0] for point in box]) 201 | y_max = max([point[1] for point in box]) 202 | # 判断 box 是否在指定范围内 203 | if x_min >= x1 and y_min >= y1 and x_max <= x2 and y_max <= y2: 204 | return text 205 | return None 206 | 207 | @staticmethod 208 | def click_on_text(ocr_data, target_text): 209 | """ 210 | 根据识别到的文字,移动鼠标并点击目标文字的位置 211 | :param ocr_data: 212 | :param target_text: 213 | :return: 214 | """ 215 | for item in ocr_data['data']: 216 | text = item['text'] 217 | if target_text in text: 218 | box = item['box'] 219 | x_min = min([point[0] for point in box]) 220 | y_min = min([point[1] for point in box]) 221 | x_max = max([point[0] for point in box]) 222 | y_max = max([point[1] for point in box]) 223 | # 计算中心位置并添加随机偏移 224 | center_x = (x_min + x_max) // 2 + AutomationTool.human_like_offset() 225 | center_y = (y_min + y_max) // 2 + AutomationTool.human_like_offset() 226 | # 获取当前鼠标位置 227 | current_x, current_y = pyautogui.position() 228 | # 模拟人类鼠标移动 229 | AutomationTool.move_mouse_smoothly((current_x, current_y), (center_x, center_y)) 230 | # 等待随机时间 231 | time.sleep(AutomationTool.human_like_delay()) 232 | # 点击 233 | pyautogui.click() 234 | logging.info(f"点击了文字: {text}, 位置: {center_x}, {center_y}") 235 | return True 236 | logging.warning(f"未找到目标文字: {target_text}") 237 | return False 238 | 239 | @staticmethod 240 | def type_text(input_text): 241 | """ 242 | 像粘贴一样在当前焦点输入框中快速输入指定文字 243 | :param input_text: 244 | :return: 245 | """ 246 | try: 247 | # 将文本复制到剪贴板 248 | pyperclip.copy(str(input_text)) 249 | # 模拟 Ctrl + V 粘贴(Windows/Linux),或者 Command + V(macOS) 250 | pyautogui.hotkey('ctrl', 'v') 251 | except Exception as e: 252 | logging.error(f"输入文字失败: {e}") 253 | 254 | @staticmethod 255 | @retry_on_failure(retries=3, delay=1) 256 | def screenshot_and_click(target_text): 257 | """ 258 | 截图并点击指定文字 259 | :param target_text: 260 | :return: 261 | """ 262 | ocr_data = AutomationTool.process_screenshot_for_ocr() 263 | if ocr_data: 264 | # 根据目标文字进行点击 265 | clicked = AutomationTool.click_on_text(ocr_data, target_text) 266 | if clicked: 267 | time.sleep(1) 268 | logging.info(f"成功点击目标文字: {target_text}") 269 | return True 270 | else: 271 | logging.warning(f"未找到目标文字: {target_text}") 272 | return False 273 | 274 | @staticmethod 275 | def find_text_in_screen(target_text: str) -> bool: 276 | """ 277 | 截图并判断是否存在某个文字 278 | :param target_text: 279 | :return: 280 | """ 281 | ocr_data = AutomationTool.process_screenshot_for_ocr() 282 | if ocr_data: 283 | # 遍历所有识别到的文字,判断是否已经包含了发送的消息 284 | for item in ocr_data['data']: 285 | if target_text in item['text']: 286 | logging.info(f"找到相应目标文字:{target_text}") 287 | return True 288 | logging.warning(f"未找到相应目标文字:{target_text}") 289 | return False 290 | 291 | @staticmethod 292 | def find_image_in_screenshot(template_path, threshold=0.8): 293 | """ 294 | 在屏幕截图中查找给定图片模板(使用灰度图) 295 | :param template_path: 296 | :param threshold: 匹配阈值 297 | :return: 298 | """ 299 | screenshot = AutomationTool.capture_screenshot() 300 | screenshot_cv = AutomationTool.convert_image_to_opencv(screenshot) 301 | # 转换为灰度图 302 | screenshot_gray = cv2.cvtColor(screenshot_cv, cv2.COLOR_BGR2GRAY) 303 | # 读取模板图片并转换为灰度图 304 | template = cv2.imread(template_path, cv2.IMREAD_GRAYSCALE) 305 | if template is None: 306 | logging.error(f"无法读取模板图片:{template_path}") 307 | return None 308 | # 获取模板的宽高 309 | h, w = template.shape[:2] 310 | # 使用模板匹配查找模板 311 | res = cv2.matchTemplate(screenshot_gray, template, cv2.TM_CCOEFF_NORMED) 312 | # 获取最佳匹配位置 313 | min_val, max_val, min_loc, max_loc = cv2.minMaxLoc(res) 314 | if max_val > threshold: 315 | top_left = max_loc 316 | center_x = top_left[0] + w // 2 317 | center_y = top_left[1] + h // 2 318 | return center_x, center_y 319 | else: 320 | logging.info("未找到匹配的图片") 321 | return None 322 | 323 | @staticmethod 324 | @retry_on_failure(retries=3, delay=2) 325 | def click_image(template_path): 326 | """ 327 | 在屏幕上查找图片并点击 328 | :param template_path: 329 | :return: 330 | """ 331 | position = AutomationTool.find_image_in_screenshot(template_path) 332 | logging.info(f"图片位置: {position}") 333 | if position: 334 | # 获取当前鼠标位置 335 | current_x, current_y = pyautogui.position() 336 | # 移动鼠标到目标位置 337 | AutomationTool.move_mouse_smoothly((current_x, current_y), position, duration=0.3) 338 | # 等待随机时间 339 | time.sleep(AutomationTool.human_like_delay()) 340 | # 点击 341 | pyautogui.click() 342 | logging.info(f"点击了图片位置: {position}") 343 | return True 344 | else: 345 | logging.warning(f"未找到图片: {template_path}") 346 | return False 347 | 348 | @staticmethod 349 | def click_image_until_another_appears(click_image_path, stop_image_path, max_attempts=10, delay_between_clicks=1): 350 | """ 351 | 持续点击一个图片,直到另一个图片出现为止。 352 | 353 | :param click_image_path: 要点击的图片路径 354 | :param stop_image_path: 出现后停止点击的图片路径 355 | :param max_attempts: 最大尝试次数 356 | :param delay_between_clicks: 每次点击之间的延迟(秒) 357 | :return: 如果成功找到停止图片返回True,否则返回False 358 | """ 359 | for attempt in range(max_attempts): 360 | # 检查停止图片是否出现 361 | if AutomationTool.find_image_in_screenshot(stop_image_path): 362 | logging.info(f"找到停止图片: {stop_image_path}") 363 | return True 364 | 365 | # 点击指定图片 366 | AutomationTool.click_image(click_image_path) 367 | logging.info(f"点击图片: {click_image_path},尝试次数: {attempt + 1}") 368 | 369 | # 等待指定时间 370 | time.sleep(delay_between_clicks) 371 | 372 | logging.warning(f"达到最大尝试次数 {max_attempts},未找到停止图片: {stop_image_path}") 373 | return False 374 | 375 | @staticmethod 376 | def process_screenshot_for_ocr(): 377 | """ 378 | 截取屏幕并进行 OCR 处理,使用缓存优化 379 | :return: OCR 数据 380 | """ 381 | current_time = time.time() 382 | # 检查缓存是否有效 383 | if AutomationTool._last_screenshot is not None: 384 | if current_time - AutomationTool._last_screenshot_time < AutomationTool._screenshot_cache_duration: 385 | logging.info("使用缓存的 OCR 结果") 386 | return AutomationTool._last_ocr_result 387 | 388 | # 截取屏幕 389 | image = AutomationTool.capture_screenshot() 390 | # 计算截图的哈希值 391 | image_hash = AutomationTool._calculate_image_hash(image) 392 | 393 | # 如果截图内容未变化,直接返回缓存的 OCR 结果 394 | if AutomationTool._last_screenshot_hash == image_hash: 395 | logging.info("截图内容未变化,使用缓存的 OCR 结果") 396 | AutomationTool._last_screenshot_time = current_time 397 | return AutomationTool._last_ocr_result 398 | 399 | # 更新缓存 400 | AutomationTool._last_screenshot = image 401 | AutomationTool._last_screenshot_hash = image_hash 402 | AutomationTool._last_screenshot_time = current_time 403 | 404 | # 进行 OCR 识别 405 | image_base64 = AutomationTool.convert_image_to_base64(image) 406 | ocr_result = AutomationTool.ocr_image(image_base64) 407 | 408 | # 缓存 OCR 结果 409 | AutomationTool._last_ocr_result = ocr_result 410 | 411 | return ocr_result 412 | 413 | @staticmethod 414 | def _calculate_image_hash(image): 415 | """ 416 | 计算图像的哈希值 417 | :param image: PIL 图像 418 | :return: 哈希值字符串 419 | """ 420 | buffered = BytesIO() 421 | image.save(buffered, format="PNG") 422 | image_bytes = buffered.getvalue() 423 | return hashlib.md5(image_bytes).hexdigest() 424 | 425 | @staticmethod 426 | def click_image_sequence(image_paths, delay_between=1, max_wait=10): 427 | """ 428 | 按顺序识别并点击一系列图片。 429 | :param image_paths: 图片路径列表 430 | :param delay_between: 每次尝试之间的延迟 431 | :param max_wait: 等待第二张图片出现的最大时间(秒) 432 | :return: 如果成功点击所有图片返回True,否则返回False 433 | """ 434 | for image_path in image_paths: 435 | start_time = time.time() 436 | while True: 437 | if AutomationTool.click_image(image_path): 438 | break 439 | if time.time() - start_time > max_wait: 440 | logging.warning(f"未能在规定时间内找到图片: {image_path}") 441 | return False 442 | time.sleep(delay_between) 443 | logging.info("成功点击所有图片") 444 | return True 445 | 446 | @staticmethod 447 | def move_and_swipe_with_hold(image_path, swipe_distance=200, direction='right', duration=0.5, button='left'): 448 | """ 449 | 将鼠标移动到图片的位置,然后向右滑动指定的距离。 450 | 451 | :param image_path: 图片的位置 452 | :param swipe_distance: 向右滑动的距离(像素) 453 | :param direction: 滑动的方向,可以是 'right', 'left', 'top', 'bottom' 454 | :param duration: 移动和滑动的持续时间(秒), 455 | :param button: 按住的鼠标按钮,可以是 'left', 'right', 'middle' 456 | """ 457 | # 移动到目标位置 458 | position = AutomationTool.find_image_in_screenshot(image_path, 0.7) 459 | if position is None: 460 | logging.error(f"未找到图片:{image_path}") 461 | return False 462 | x, y = position 463 | 464 | # 获取当前鼠标位置 465 | current_x, current_y = pyautogui.position() 466 | # 移动鼠标到图片位置 467 | AutomationTool.move_mouse_smoothly((current_x, current_y), (x, y), duration=0.3) 468 | 469 | # 等待一段时间,确保鼠标已经移动到目标位置 470 | time.sleep(AutomationTool.human_like_delay()) 471 | 472 | # 按下鼠标按钮 473 | pyautogui.mouseDown(button=button) 474 | 475 | # 根据方向参数计算目标位置 476 | if direction == 'right': 477 | target_x = x + swipe_distance 478 | target_y = y 479 | elif direction == 'left': 480 | target_x = x - swipe_distance 481 | target_y = y 482 | elif direction == 'top': 483 | target_x = x 484 | target_y = y - swipe_distance 485 | elif direction == 'bottom': 486 | target_x = x 487 | target_y = y + swipe_distance 488 | else: 489 | raise ValueError("Invalid direction. Use 'right', 'left', 'top', or 'bottom'.") 490 | 491 | # 按住鼠标并滑动到目标位置 492 | AutomationTool.move_mouse_smoothly((x, y), (target_x, target_y), duration=duration, hold_button=button) 493 | # 等待随机时间 494 | time.sleep(AutomationTool.human_like_delay()) 495 | 496 | logging.info(f"从位置 ({x}, {y}) 滑动到 ({target_x}, {target_y}),方向:{direction}") 497 | return True 498 | 499 | @staticmethod 500 | def press_enter(): 501 | """ 502 | 按下回车键 503 | """ 504 | pyautogui.press('enter') 505 | 506 | @staticmethod 507 | def press_esc(): 508 | """ 509 | 按下esc键 510 | """ 511 | pyautogui.press('esc') 512 | 513 | @staticmethod 514 | def human_like_delay(min_delay=0.1, max_delay=0.3): 515 | """ 516 | 返回一个介于 min_delay 和 max_delay 之间的随机等待时间 517 | """ 518 | return random.uniform(min_delay, max_delay) 519 | 520 | @staticmethod 521 | def human_like_offset(offset_range=2): 522 | """ 523 | 返回一个在 -offset_range 到 offset_range 之间的随机偏移 524 | """ 525 | return random.randint(-offset_range, offset_range) 526 | 527 | @staticmethod 528 | def move_mouse_smoothly(start_pos, end_pos, duration=0.5, hold_button=None): 529 | """ 530 | 模拟人类的鼠标移动,使用 pyautogui 的 tween 函数 531 | :param start_pos: 起始位置 (x, y) 532 | :param end_pos: 结束位置 (x, y) 533 | :param duration: 总持续时间(秒) 534 | :param hold_button: 如果需要在移动过程中按住鼠标按钮,可以指定 'left', 'right', 'middle' 535 | """ 536 | # 添加随机偏移到结束位置 537 | offset_x = AutomationTool.human_like_offset() 538 | offset_y = AutomationTool.human_like_offset() 539 | end_pos = (end_pos[0] + offset_x, end_pos[1] + offset_y) 540 | 541 | # 随机选择一个缓动函数 542 | tween_funcs = [ 543 | pyautogui.easeInQuad, 544 | pyautogui.easeOutQuad, 545 | pyautogui.easeInOutQuad, 546 | pyautogui.easeInBounce, 547 | pyautogui.easeOutBounce, 548 | pyautogui.easeInElastic, 549 | pyautogui.easeOutElastic 550 | ] 551 | tween_func = random.choice(tween_funcs) 552 | 553 | # 按下鼠标按钮(如果需要) 554 | if hold_button: 555 | pyautogui.mouseDown(button=hold_button) 556 | 557 | # 使用 pyautogui 的 moveTo 函数,指定持续时间和缓动函数 558 | pyautogui.moveTo(end_pos[0], end_pos[1], duration=duration, tween=tween_func) 559 | 560 | # 释放鼠标按钮(如果需要) 561 | if hold_button: 562 | pyautogui.mouseUp(button=hold_button) 563 | 564 | def custom_tween(x): 565 | """ 566 | 自定义缓动函数,可以调整 x 的幂次来控制速度曲线 567 | """ 568 | return x ** 2 # 或者其他数学函数 569 | 570 | @staticmethod 571 | def _bezier_curve(points, n=50): 572 | """ 573 | 生成贝塞尔曲线的点集 574 | :param points: 控制点列表 575 | :param n: 点的数量 576 | :return: 点的列表 577 | """ 578 | result = [] 579 | for i in range(n + 1): 580 | t = i / n 581 | x = 0 582 | y = 0 583 | n_points = len(points) 584 | for j, (px, py) in enumerate(points): 585 | bernstein = AutomationTool._bernstein_poly(j, n_points - 1, t) 586 | x += px * bernstein 587 | y += py * bernstein 588 | result.append((x, y)) 589 | return result 590 | 591 | @staticmethod 592 | def _bernstein_poly(i, n, t): 593 | """ 594 | 计算伯恩斯坦多项式值 595 | """ 596 | return math.comb(n, i) * (t ** i) * ((1 - t) ** (n - i)) 597 | 598 | ``` 599 | -------------------------------------------------------------------------------- /docs/scripts/feige_export.md: -------------------------------------------------------------------------------- 1 | # 飞鸽知识库导出 2 | 3 | 这个需求是帮助我的好朋友导出飞鸽(来自于抖店机器人)的知识库数据,界面相当复杂,每天0点数据变化还会导致 xpath 发生变化,如果下面的脚本失效更改 xpath 即可解决 4 | 5 | ## 技术栈 6 | - pandas 7 | - selenium 8 | 9 | ## 大致逻辑介绍 10 | 11 | ### 文件系统 12 | 13 | 启动&主要逻辑 -- main.py 14 | Excel操作 -- excel_io.py 15 | 实例化对象 -- knowledge.py 16 | 关闭Edge,方便selenium -- shutdown_edge.bat 17 | 一些工具 -- util.py 18 | 19 | ### 逻辑 20 | 21 | 1. 下载驱动 && 找到 Edge 所在的位置 22 | 23 | 本次使用的是 Edge 浏览器 24 | > https://developer.microsoft.com/en-us/microsoft-edge/tools/webdriver/?form=MA13LH 25 | 26 | 大部分直接安装都会在: 27 | > C:\Program Files (x86)\Microsoft\Edge\Application\msedge.exe 28 | 29 | 2. 首先使用默认用户的数据,尝试过携带 Cookie 会失效 30 | 31 | ```python 32 | brave_path = r"C:\Program Files (x86)\Microsoft\Edge\Application\msedge.exe" 33 | options = webdriver.EdgeOptions() 34 | # options.binary_location = brave_path 35 | # 加载已有的用户数据目录 36 | options.add_argument(r"--user-data-dir=C:\Users\Administrator\AppData\Local\Microsoft\Edge\User Data") 37 | options.add_argument(r"--profile-directory=Default") # 加载默认的用户配置文件 38 | 39 | service = webdriver.EdgeService(executable_path=r"C:\Users\Administrator\Desktop\Migration\msedgedriver.exe") 40 | 41 | driver = webdriver.Edge(options=options, service=service) 42 | ``` 43 | 44 | 3. 打开飞鸽的知识库 45 | ```python 46 | driver.get('https://im.jinritemai.com/pc_seller_v2/main/setting/robot/knowledge') 47 | ``` 48 | 49 | 4. 一些主要逻辑 50 | 51 | - a. 获取主要分类 52 | - b. 点击分类 53 | - c. 找到二级分类 54 | - d. 点击自定义知识 55 | - e. 在二级分类下找到所有数据 -- 找到页码(计算) -- 找到下一页(跳转) -- 点击每页显示100条数据(最大获取) 56 | - f. 在显示100条数据这里考虑如果不存在的一些情况,以及页面为空的情况 57 | - g. 计算分页逻辑,计算的公式:`total_pages = (total_data_count + page_size - 1) // page_size` 58 | - h. 然后就开始分页的每一页循环,最大为分页逻辑计算出来的最大页码 59 | - i. 获取每一个知识点的信息 -> 转换为 Knowledge 实体 -> 保存到 list -> 保存到excel 60 | - j. excel 每进行 20 次数据增量,就备份一次,一是防止数据丢失,二是方便我的好朋友进行观测 61 | - k. 等待完成 62 | 63 | ## 效果 64 | 65 | 已经落地实现,由于商业问题所以不公开任何数据 66 | 67 | ![](./images/ref-3-1.png) 68 | 69 | [点击查看代码](https://github.com/zhiyu1998/feige_knowledge_export) 70 | -------------------------------------------------------------------------------- /docs/scripts/images/ref-1-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhiyu1998/Python-Basis-Notes/2be71810a0eaa7624451ab00fe1c4dec73e6dc58/docs/scripts/images/ref-1-1.png -------------------------------------------------------------------------------- /docs/scripts/images/ref-1-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhiyu1998/Python-Basis-Notes/2be71810a0eaa7624451ab00fe1c4dec73e6dc58/docs/scripts/images/ref-1-2.png -------------------------------------------------------------------------------- /docs/scripts/images/ref-3-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhiyu1998/Python-Basis-Notes/2be71810a0eaa7624451ab00fe1c4dec73e6dc58/docs/scripts/images/ref-3-1.png -------------------------------------------------------------------------------- /docs/scripts/letpub.md: -------------------------------------------------------------------------------- 1 | # 提取论文Reference的期刊/会议信息 2 | > 学术性的脚本基本来自我导师,因为不想搬砖,就让python替我搬砖吧! 3 | 4 | **使用说明** 5 | 6 | 需求: 7 | 8 | ● 参考论文的期刊 9 | 10 | ● 筛选有没有周期短的期刊 11 | 12 | overleaf参考论文示例: 13 | 14 | > 每个之间要有空格 15 | 16 | 17 | 18 | ![1](images/ref-1-1.png) 19 | 20 | ``` 21 | @article{yu2019review, 22 | title={A review of recurrent neural networks: LSTM cells and network architectures}, 23 | author={Yu, Yong and Si, Xiaosheng and Hu, Changhua and Zhang, Jianxun}, 24 | journal={Neural computation}, 25 | volume={31}, 26 | number={7}, 27 | pages={1235--1270}, 28 | year={2019}, 29 | publisher={MIT Press One Rogers Street, Cambridge, MA 02142-1209, USA journals-info~…} 30 | } 31 | 32 | @article{huang2015bidirectional, 33 | title={Bidirectional LSTM-CRF models for sequence tagging}, 34 | author={Huang, Zhiheng and Xu, Wei and Yu, Kai}, 35 | journal={arXiv preprint arXiv:1508.01991}, 36 | year={2015} 37 | } 38 | 39 | @inproceedings{sundermeyer2012lstm, 40 | title={LSTM neural networks for language modeling}, 41 | author={Sundermeyer, Martin and Schl{\"u}ter, Ralf and Ney, Hermann}, 42 | booktitle={Thirteenth annual conference of the international speech communication association}, 43 | year={2012} 44 | } 45 | ``` 46 | 47 | 要求安装库: 48 | 49 | ```properties 50 | pandas~=1.5.1 51 | requests~=2.28.1 52 | beautifulsoup4~=4.11.1 53 | lxml~=4.9.1 54 | openpyxl~=3.0.10 55 | ``` 56 | 57 | 创建一个python脚本: 58 | 59 | > 把源码放入到和ref.bib同级位置的地方 60 | 61 | [查看详细代码](1-extra_letpub.py) 62 | 63 | 结果示例: 64 | 65 | ![1](images/ref-1-2.png) 66 | 67 | #### 版本更新说明 68 | 69 | 1.0 70 | 71 | - 2022年11月22日 12点22分 增加日志、修复部分问题 72 | - 2022年11月22日 00点17分 第一次初始化脚本 -------------------------------------------------------------------------------- /docs/scripts/syntaogf.md: -------------------------------------------------------------------------------- 1 | # 采集中国企业ESG评级 2 | 这个需求是帮助我的好朋友采集中国企业的ESG评级数据,这个数据是从一个网站上采集的,网站的地址是:[http://www.esgchina.org/](http://www.esgchina.org/)。 3 | 4 | ## 技术栈 5 | - pandas 6 | - selenium 7 | 8 | ## 逻辑 9 | 大致逻辑是打开网页搜索把数据放到Excel中,并且支持断开后下次还能继续采集。 10 | 11 | 1. 首先你要找到你要采集的Excel有哪些数据 12 | ```python 13 | # pandas读取数据 14 | my_excel = pd.read_excel("./data.xls") 15 | company_names = my_excel.iloc[:, 2] 16 | ``` 17 | 2. 断开后继续开始的位置 18 | ```python 19 | # TODO 从第n条数据开始爬 20 | # selenium爬取数据 21 | n = 0 22 | ``` 23 | 3. 下载驱动后写一下驱动的位置 24 | - 驱动(要和浏览器版本相同):https://developer.microsoft.com/en-us/microsoft-edge/tools/webdriver/?form=MA13LH 25 | ```python 26 | options = webdriver.EdgeOptions() 27 | options.add_argument('--headless') 28 | options.add_argument('--disable-animations') 29 | # TODO 添加浏览器引擎,例如:C:\\Users\\Administrator\\Documents\\PythonWorkSpace\\Test\\msedgedriver.exe 30 | s = Service(r"") 31 | ``` 32 | 4. 开始采集数据 33 | 34 | [点击查看代码](2-extra_syntaogf.py) -------------------------------------------------------------------------------- /docusaurus.config.ts: -------------------------------------------------------------------------------- 1 | import {themes as prismThemes} from 'prism-react-renderer'; 2 | import type {Config} from '@docusaurus/types'; 3 | import type * as Preset from '@docusaurus/preset-classic'; 4 | 5 | const config: Config = { 6 | title: '🐍 Python-Basis-Notes', 7 | tagline: '你的Python入门好帮手:一份包含了Python基础学习需要的知识框架 🐍 + 爬虫基础 🕷️ + numpy基础 📊 + pandas基础 🐼 + 深度学习 🍥 + 脚本库 📚', 8 | favicon: 'img/favicon.ico', 9 | 10 | // Set the production url of your site here 11 | url: 'https://zhiyu1998.github.io', 12 | // Set the // pathname under which your site is served 13 | // For GitHub pages deployment, it is often '//' 14 | baseUrl: '/Python-Basis-Notes/', 15 | 16 | // GitHub pages deployment config. 17 | // If you aren't using GitHub pages, you don't need these. 18 | organizationName: 'facebook', // Usually your GitHub org/user name. 19 | projectName: 'docusaurus', // Usually your repo name. 20 | 21 | onBrokenLinks: 'throw', 22 | onBrokenMarkdownLinks: 'warn', 23 | 24 | // Even if you don't use internationalization, you can use this field to set 25 | // useful metadata like html lang. For example, if your site is Chinese, you 26 | // may want to replace "en" with "zh-Hans". 27 | i18n: { 28 | defaultLocale: 'en', 29 | locales: ['en'], 30 | }, 31 | 32 | presets: [ 33 | [ 34 | 'classic', 35 | { 36 | docs: { 37 | sidebarPath: './sidebars.ts', 38 | // Please change this to your repo. 39 | // Remove this to remove the "edit this page" links. 40 | editUrl: 41 | 'https://github.com/facebook/docusaurus/tree/main/packages/create-docusaurus/templates/shared/', 42 | }, 43 | blog: { 44 | showReadingTime: true, 45 | // Please change this to your repo. 46 | // Remove this to remove the "edit this page" links. 47 | editUrl: 48 | 'https://github.com/facebook/docusaurus/tree/main/packages/create-docusaurus/templates/shared/', 49 | }, 50 | theme: { 51 | customCss: './src/css/custom.css', 52 | }, 53 | } satisfies Preset.Options, 54 | ], 55 | ], 56 | 57 | themeConfig: { 58 | // Replace with your project's social card 59 | image: 'img/docusaurus-social-card.jpg', 60 | navbar: { 61 | title: 'Python-Basis-Notes', 62 | logo: { 63 | alt: 'My Site Logo', 64 | src: 'img/logo.svg', 65 | }, 66 | items: [ 67 | { 68 | type: 'docSidebar', 69 | sidebarId: 'tutorialSidebar', 70 | position: 'left', 71 | label: '文档', 72 | }, 73 | // {to: '/blog', label: 'Blog', position: 'left'}, 74 | { 75 | href: 'https://github.com/zhiyu1998/Python-Basis-Notes', 76 | label: 'GitHub', 77 | position: 'right', 78 | }, 79 | ], 80 | }, 81 | footer: { 82 | style: 'dark', 83 | links: [ 84 | { 85 | title: '文档', 86 | items: [ 87 | { 88 | label: '点击进入', 89 | to: '/docs/intro', 90 | }, 91 | ], 92 | }, 93 | { 94 | title: '另一个文档', 95 | items: [ 96 | { 97 | label: 'Java基础', 98 | href: 'https://zhiyu1998.github.io/Computer-Science-Learn-Notes/Java/basic/basic.html', 99 | }, 100 | { 101 | label: 'Java大厂面试', 102 | href: 'https://zhiyu1998.github.io/Computer-Science-Learn-Notes/Java/eightpart/giant.html', 103 | }, 104 | ], 105 | }, 106 | { 107 | title: '更多', 108 | items: [ 109 | // { 110 | // label: '博客(未来)', 111 | // to: '/blog', 112 | // }, 113 | { 114 | label: 'GitHub', 115 | href: 'https://github.com/zhiyu1998/Python-Basis-Notes', 116 | }, 117 | ], 118 | }, 119 | ], 120 | copyright: `Copyright © ${new Date().getFullYear()} Python-Basis-Notes, Inc. Built with zhiyu1998.`, 121 | }, 122 | prism: { 123 | theme: prismThemes.github, 124 | darkTheme: prismThemes.dracula, 125 | }, 126 | } satisfies Preset.ThemeConfig, 127 | }; 128 | 129 | export default config; 130 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "mine", 3 | "version": "0.0.0", 4 | "private": true, 5 | "scripts": { 6 | "docusaurus": "docusaurus", 7 | "start": "docusaurus start", 8 | "build": "docusaurus build", 9 | "swizzle": "docusaurus swizzle", 10 | "deploy": "docusaurus deploy", 11 | "clear": "docusaurus clear", 12 | "serve": "docusaurus serve", 13 | "write-translations": "docusaurus write-translations", 14 | "write-heading-ids": "docusaurus write-heading-ids", 15 | "typecheck": "tsc" 16 | }, 17 | "dependencies": { 18 | "@docusaurus/core": "3.1.1", 19 | "@docusaurus/preset-classic": "3.1.1", 20 | "@mdx-js/react": "^3.0.0", 21 | "clsx": "^2.0.0", 22 | "prism-react-renderer": "^2.3.0", 23 | "react": "^18.0.0", 24 | "react-dom": "^18.0.0" 25 | }, 26 | "devDependencies": { 27 | "@docusaurus/module-type-aliases": "3.1.1", 28 | "@docusaurus/tsconfig": "3.1.1", 29 | "@docusaurus/types": "3.1.1", 30 | "typescript": "~5.2.2" 31 | }, 32 | "browserslist": { 33 | "production": [ 34 | ">0.5%", 35 | "not dead", 36 | "not op_mini all" 37 | ], 38 | "development": [ 39 | "last 3 chrome version", 40 | "last 3 firefox version", 41 | "last 5 safari version" 42 | ] 43 | }, 44 | "engines": { 45 | "node": ">=18.0" 46 | } 47 | } 48 | -------------------------------------------------------------------------------- /sidebars.ts: -------------------------------------------------------------------------------- 1 | import type {SidebarsConfig} from '@docusaurus/plugin-content-docs'; 2 | 3 | /** 4 | * Creating a sidebar enables you to: 5 | - create an ordered group of docs 6 | - render a sidebar for each doc of that group 7 | - provide next/previous navigation 8 | 9 | The sidebars can be generated from the filesystem, or explicitly defined here. 10 | 11 | Create as many sidebars as you want. 12 | */ 13 | const sidebars: SidebarsConfig = { 14 | // By default, Docusaurus generates a sidebar from the docs folder structure 15 | tutorialSidebar: [{type: 'autogenerated', dirName: '.'}], 16 | 17 | // But you can create a sidebar manually 18 | /* 19 | tutorialSidebar: [ 20 | 'intro', 21 | 'hello', 22 | { 23 | type: 'category', 24 | label: 'Tutorial', 25 | items: ['basics/create-a-document'], 26 | }, 27 | ], 28 | */ 29 | }; 30 | 31 | export default sidebars; 32 | -------------------------------------------------------------------------------- /src/components/HomepageFeatures/index.tsx: -------------------------------------------------------------------------------- 1 | import clsx from 'clsx'; 2 | import Heading from '@theme/Heading'; 3 | import styles from './styles.module.css'; 4 | 5 | type FeatureItem = { 6 | title: string; 7 | Svg: React.ComponentType>; 8 | description: JSX.Element; 9 | }; 10 | 11 | const FeatureList: FeatureItem[] = [ 12 | { 13 | title: 'Python基础 & 常用脚本', 14 | Svg: require('@site/static/img/undraw_docusaurus_mountain.svg').default, 15 | description: ( 16 | <> 17 | Python思维详细的思维导图和积累多年的Python脚本,用到爽. 18 | 19 | ), 20 | }, 21 | { 22 | title: '数据分析', 23 | Svg: require('@site/static/img/undraw_docusaurus_tree.svg').default, 24 | description: ( 25 | <> 26 | Numpy、Pandas学到起飞🛫 27 | 28 | ), 29 | }, 30 | { 31 | title: '深度学习', 32 | Svg: require('@site/static/img/undraw_docusaurus_react.svg').default, 33 | description: ( 34 | <> 35 | 新手如何快速入门深度学习、图像识别、时序预测等。以及我的研究领域(图卷积神经网络)。 36 | 37 | ), 38 | }, 39 | ]; 40 | 41 | function Feature({title, Svg, description}: FeatureItem) { 42 | return ( 43 |
44 |
45 | 46 |
47 |
48 | {title} 49 |

{description}

50 |
51 |
52 | ); 53 | } 54 | 55 | export default function HomepageFeatures(): JSX.Element { 56 | return ( 57 |
58 |
59 |
60 | {FeatureList.map((props, idx) => ( 61 | 62 | ))} 63 |
64 |
65 |
66 | ); 67 | } 68 | -------------------------------------------------------------------------------- /src/components/HomepageFeatures/styles.module.css: -------------------------------------------------------------------------------- 1 | .features { 2 | display: flex; 3 | align-items: center; 4 | padding: 2rem 0; 5 | width: 100%; 6 | } 7 | 8 | .featureSvg { 9 | height: 200px; 10 | width: 200px; 11 | } 12 | -------------------------------------------------------------------------------- /src/css/custom.css: -------------------------------------------------------------------------------- 1 | /** 2 | * Any CSS included here will be global. The classic template 3 | * bundles Infima by default. Infima is a CSS framework designed to 4 | * work well for content-centric websites. 5 | */ 6 | 7 | /* You can override the default Infima variables here. */ 8 | :root { 9 | --ifm-color-primary: #2e8555; 10 | --ifm-color-primary-dark: #29784c; 11 | --ifm-color-primary-darker: #277148; 12 | --ifm-color-primary-darkest: #205d3b; 13 | --ifm-color-primary-light: #33925d; 14 | --ifm-color-primary-lighter: #359962; 15 | --ifm-color-primary-lightest: #3cad6e; 16 | --ifm-code-font-size: 95%; 17 | --docusaurus-highlighted-code-line-bg: rgba(0, 0, 0, 0.1); 18 | } 19 | 20 | /* For readability concerns, you should choose a lighter palette in dark mode. */ 21 | [data-theme='dark'] { 22 | --ifm-color-primary: #25c2a0; 23 | --ifm-color-primary-dark: #21af90; 24 | --ifm-color-primary-darker: #1fa588; 25 | --ifm-color-primary-darkest: #1a8870; 26 | --ifm-color-primary-light: #29d5b0; 27 | --ifm-color-primary-lighter: #32d8b4; 28 | --ifm-color-primary-lightest: #4fddbf; 29 | --docusaurus-highlighted-code-line-bg: rgba(0, 0, 0, 0.3); 30 | } 31 | -------------------------------------------------------------------------------- /src/pages/index.module.css: -------------------------------------------------------------------------------- 1 | /** 2 | * CSS files with the .module.css suffix will be treated as CSS modules 3 | * and scoped locally. 4 | */ 5 | 6 | .heroBanner { 7 | padding: 4rem 0; 8 | text-align: center; 9 | position: relative; 10 | overflow: hidden; 11 | } 12 | 13 | @media screen and (max-width: 996px) { 14 | .heroBanner { 15 | padding: 2rem; 16 | } 17 | } 18 | 19 | .buttons { 20 | display: flex; 21 | align-items: center; 22 | justify-content: center; 23 | } 24 | -------------------------------------------------------------------------------- /src/pages/index.tsx: -------------------------------------------------------------------------------- 1 | import clsx from 'clsx'; 2 | import Link from '@docusaurus/Link'; 3 | import useDocusaurusContext from '@docusaurus/useDocusaurusContext'; 4 | import Layout from '@theme/Layout'; 5 | import HomepageFeatures from '@site/src/components/HomepageFeatures'; 6 | import Heading from '@theme/Heading'; 7 | 8 | import styles from './index.module.css'; 9 | 10 | function HomepageHeader() { 11 | const {siteConfig} = useDocusaurusContext(); 12 | return ( 13 |
14 |
15 | 16 | {siteConfig.title} 17 | 18 |

{siteConfig.tagline}

19 |
20 | 23 | 开始阅读 - 5min ⏱️ 24 | 25 |
26 |
27 |
28 | ); 29 | } 30 | 31 | export default function Home(): JSX.Element { 32 | const {siteConfig} = useDocusaurusContext(); 33 | return ( 34 | 37 | 38 |
39 | 40 |
41 |
42 | ); 43 | } 44 | -------------------------------------------------------------------------------- /src/pages/markdown-page.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: Markdown page example 3 | --- 4 | 5 | # Markdown page example 6 | 7 | You don't need React to write simple standalone pages. 8 | -------------------------------------------------------------------------------- /static/.nojekyll: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhiyu1998/Python-Basis-Notes/2be71810a0eaa7624451ab00fe1c4dec73e6dc58/static/.nojekyll -------------------------------------------------------------------------------- /static/img/docusaurus-social-card.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhiyu1998/Python-Basis-Notes/2be71810a0eaa7624451ab00fe1c4dec73e6dc58/static/img/docusaurus-social-card.jpg -------------------------------------------------------------------------------- /static/img/docusaurus.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhiyu1998/Python-Basis-Notes/2be71810a0eaa7624451ab00fe1c4dec73e6dc58/static/img/docusaurus.png -------------------------------------------------------------------------------- /static/img/favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhiyu1998/Python-Basis-Notes/2be71810a0eaa7624451ab00fe1c4dec73e6dc58/static/img/favicon.ico -------------------------------------------------------------------------------- /static/img/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhiyu1998/Python-Basis-Notes/2be71810a0eaa7624451ab00fe1c4dec73e6dc58/static/img/logo.png -------------------------------------------------------------------------------- /static/img/logo.svg: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /static/img/undraw_docusaurus_mountain.svg: -------------------------------------------------------------------------------- 1 | 2 | Easy to Use 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | 120 | 121 | 122 | 123 | 124 | 125 | 126 | 127 | 128 | 129 | 130 | 131 | 132 | 133 | 134 | 135 | 136 | 137 | 138 | 139 | 140 | 141 | 142 | 143 | 144 | 145 | 146 | 147 | 148 | 149 | 150 | 151 | 152 | 153 | 154 | 155 | 156 | 157 | 158 | 159 | 160 | 161 | 162 | 163 | 164 | 165 | 166 | 167 | 168 | 169 | 170 | 171 | 172 | -------------------------------------------------------------------------------- /static/img/undraw_docusaurus_tree.svg: -------------------------------------------------------------------------------- 1 | 2 | Focus on What Matters 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | -------------------------------------------------------------------------------- /tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | // This file is not used in compilation. It is here just for a nice editor experience. 3 | "extends": "@docusaurus/tsconfig", 4 | "compilerOptions": { 5 | "baseUrl": "." 6 | } 7 | } 8 | --------------------------------------------------------------------------------