├── Automate the Boring Stuff with Python
    ├── README.md
    ├── 发送电子邮件.ipynb
    ├── 处理CSV文件.ipynb
    ├── 处理Excel 电子表格.ipynb
    ├── 模式匹配与正则表达式.ipynb
    ├── 用GUI 自动化控制键盘和鼠标.ipynb
    ├── 组织文件.ipynb
    └── 读写文件.ipynb
├── Deep learning
    ├── NLP
    │   ├── [自然语言处理] 自然语言处理库spaCy使用指北
    │   │   ├── [自然语言处理] spaCy使用指北.ipynb
    │   │   └── image
    │   │   │   ├── img1.png
    │   │   │   ├── img2.png
    │   │   │   ├── img3.svg
    │   │   │   ├── img4.svg
    │   │   │   └── img5.svg
    │   └── [语音识别] 基于Python构建简易的音频录制与语音识别应用
    │   │   ├── asr.py
    │   │   ├── record.py
    │   │   └── run.py
    ├── face detection
    │   ├── [python] 向量检索库Faiss使用指北
    │   │   └── [python] 向量检索库Faiss使用指北.ipynb
    │   ├── [深度学习] Python人脸识别库Deepface使用教程
    │   │   ├── [深度学习] Python人脸识别库Deepface使用教程.ipynb
    │   │   └── images
    │   │   │   ├── baijingting
    │   │   │       ├── 0000.jpg
    │   │   │       ├── 0001.jpg
    │   │   │       └── 0002.jpg
    │   │   │   ├── jiangwen
    │   │   │       ├── 0000.jpg
    │   │   │       ├── 0001.jpg
    │   │   │       └── 0002.jpg
    │   │   │   ├── pengyuyan
    │   │   │       ├── 0000.jpg
    │   │   │       ├── 0001.jpg
    │   │   │       └── 0002.jpg
    │   │   │   ├── zhangziyi
    │   │   │       ├── 0000.jpg
    │   │   │       ├── 0001.jpg
    │   │   │       └── 0002.jpg
    │   │   │   └── zhaoliying
    │   │   │       ├── 0000.jpg
    │   │   │       ├── 0001.jpg
    │   │   │       └── 0002.jpg
    │   └── [深度学习] Python人脸识别库face_recognition使用教程
    │   │   ├── [深度学习] Python人脸识别库face_recognition使用教程.ipynb
    │   │   └── test_img
    │   │       ├── README.md
    │   │       ├── eye_closed.jpg
    │   │       └── eye_opened.jpg
    ├── fast-reid
    │   └── fast-reid_tutorial
    │   │   ├── configs
    │   │       ├── Base-AGW.yml
    │   │       ├── Base-MGN.yml
    │   │       ├── Base-SBS.yml
    │   │       ├── Base-bagtricks.yml
    │   │       ├── DukeMTMC
    │   │       │   ├── AGW_R101-ibn.yml
    │   │       │   ├── AGW_R50-ibn.yml
    │   │       │   ├── AGW_R50.yml
    │   │       │   ├── AGW_S50.yml
    │   │       │   ├── bagtricks_R101-ibn.yml
    │   │       │   ├── bagtricks_R50-ibn.yml
    │   │       │   ├── bagtricks_R50.yml
    │   │       │   ├── bagtricks_S50.yml
    │   │       │   ├── mgn_R50-ibn.yml
    │   │       │   ├── sbs_R101-ibn.yml
    │   │       │   ├── sbs_R50-ibn.yml
    │   │       │   ├── sbs_R50.yml
    │   │       │   └── sbs_S50.yml
    │   │       ├── MSMT17
    │   │       │   ├── AGW_R101-ibn.yml
    │   │       │   ├── AGW_R50-ibn.yml
    │   │       │   ├── AGW_R50.yml
    │   │       │   ├── AGW_S50.yml
    │   │       │   ├── bagtricks_R101-ibn.yml
    │   │       │   ├── bagtricks_R50-ibn.yml
    │   │       │   ├── bagtricks_R50.yml
    │   │       │   ├── bagtricks_S50.yml
    │   │       │   ├── mgn_R50-ibn.yml
    │   │       │   ├── sbs_R101-ibn.yml
    │   │       │   ├── sbs_R50-ibn.yml
    │   │       │   ├── sbs_R50.yml
    │   │       │   └── sbs_S50.yml
    │   │       ├── Market1501
    │   │       │   ├── AGW_R101-ibn.yml
    │   │       │   ├── AGW_R50-ibn.yml
    │   │       │   ├── AGW_R50.yml
    │   │       │   ├── AGW_S50.yml
    │   │       │   ├── bagtricks_R101-ibn.yml
    │   │       │   ├── bagtricks_R50-ibn.yml
    │   │       │   ├── bagtricks_R50.yml
    │   │       │   ├── bagtricks_S50.yml
    │   │       │   ├── bagtricks_vit.yml
    │   │       │   ├── mgn_R50-ibn.yml
    │   │       │   ├── sbs_R101-ibn.yml
    │   │       │   ├── sbs_R50-ibn.yml
    │   │       │   ├── sbs_R50.yml
    │   │       │   └── sbs_S50.yml
    │   │       ├── VERIWild
    │   │       │   └── bagtricks_R50-ibn.yml
    │   │       ├── VeRi
    │   │       │   └── sbs_R50-ibn.yml
    │   │       └── VehicleID
    │   │       │   └── bagtricks_R50-ibn.yml
    │   │   ├── datasets
    │   │       └── Market-1501-v15.09.15
    │   │       │   ├── README.md
    │   │       │   └── img.png
    │   │   ├── demo.py
    │   │   ├── fastreid
    │   │       ├── README.md
    │   │       └── img.png
    │   │   ├── model
    │   │       ├── README.md
    │   │       └── img.png
    │   │   ├── predictor.py
    │   │   ├── train_net.py
    │   │   └── visualize_result.py
    └── 基于切片辅助超推理库SAHI优化小目标识别
    │   ├── [深度学习] 基于切片辅助超推理库SAHI优化小目标识别.ipynb
    │   └── image
    │       ├── sliced_inference.gif
    │       ├── small-vehicles1.jpeg
    │       ├── terrain2.png
    │       └── terrain2_coco.json
├── Documents
    ├── Yellowbrick使用笔记
    │   ├── Yellowbrick使用笔记1-快速入门.ipynb
    │   ├── Yellowbrick使用笔记2-模型选择.ipynb
    │   ├── Yellowbrick使用笔记3-特征分析可视化.ipynb
    │   ├── Yellowbrick使用笔记4-目标可视化.ipynb
    │   ├── Yellowbrick使用笔记5-回归可视化 .ipynb
    │   ├── Yellowbrick使用笔记6-分类可视化.ipynb
    │   ├── Yellowbrick使用笔记7-聚类可视化.ipynb
    │   └── Yellowbrick使用笔记8-模型选择可视化.ipynb
    ├── [python] CairoSVG使用教程.ipynb
    ├── [python] Python二维码生成器qrcode库入门.md
    ├── [python] 启发式算法库scikit-opt使用指北.ipynb
    ├── [python] 基于Dataset库操作数据库.ipynb
    ├── [深度学习] Pytorch模型转换为onnx模型笔记.ipynb
    ├── [深度学习] caffe分类模型训练、结果可视化、部署及量化笔记 .md
    ├── [深度学习] 基于切片辅助超推理库SAHI优化小目标识别.ipynb
    └── 特征选择笔记
    │   ├── 使用SelectFromModel特征选择.ipynb
    │   ├── 删除低方差的特征.ipynb
    │   ├── 单变量特征选择.ipynb
    │   └── 递归式特征消除.ipynb
├── LICENSE
├── Plot Items
    ├── Mapping
    │   ├── Python绘制数据地图1-GeoPandas入门指北
    │   │   ├── Python绘制数据地图1-GeoPandas入门指北.ipynb
    │   │   └── image
    │   │   │   ├── 1.png
    │   │   │   ├── 10.gif
    │   │   │   ├── 11.png
    │   │   │   ├── 12.png
    │   │   │   ├── 13.png
    │   │   │   ├── 14.png
    │   │   │   ├── 2.png
    │   │   │   ├── 3.png
    │   │   │   ├── 4.png
    │   │   │   ├── 5.png
    │   │   │   ├── 6.png
    │   │   │   ├── 7.png
    │   │   │   ├── 8.png
    │   │   │   └── 9.png
    │   ├── Python绘制数据地图2-GeoPandas地图可视化
    │   │   ├── 2022江苏省各市GDP.csv
    │   │   └── Python绘制数据地图2-GeoPandas地图可视化.ipynb
    │   ├── Python绘制数据地图3-GeoPandas使用要点.ipynb
    │   ├── Python绘制数据地图4-MovingPandas入门指北
    │   │   ├── Python绘制数据地图4-MovingPandas入门指北.ipynb
    │   │   └── data.zip
    │   └── Python绘制数据地图5-MovingPandas绘图实例
    │   │   ├── Python绘制数据地图5-MovingPandas绘图实例.ipynb
    │   │   └── data.zip
    ├── Matplotlib-related
    │   ├── 基于matplotlib-scalebar库绘制比例尺
    │   │   ├── image
    │   │   │   ├── green.png
    │   │   │   ├── img1.png
    │   │   │   ├── img2.png
    │   │   │   ├── img3.png
    │   │   │   ├── orange.png
    │   │   │   ├── purple.png
    │   │   │   ├── red.png
    │   │   │   ├── yellow.png
    │   │   │   ├── zoom1.png
    │   │   │   ├── zoom2.png
    │   │   │   ├── zoom3.png
    │   │   │   └── zoom4.png
    │   │   └── 基于matplotlib-scalebar库绘制比例尺.ipynb
    │   └── 基于matplotlib和plottable库绘制精美表格
    │   │   ├── data
    │   │       ├── bundesliga_crests_22_23
    │   │       │   ├── 1. FC Union Berlin.png
    │   │       │   ├── Bayer Leverkusen.png
    │   │       │   ├── Bayern Munich.png
    │   │       │   ├── Borussia Dortmund.png
    │   │       │   ├── Borussia Monchengladbach.png
    │   │       │   ├── Eintracht Frankfurt.png
    │   │       │   ├── FC Augsburg.png
    │   │       │   ├── FC Cologne.png
    │   │       │   ├── Hertha Berlin.png
    │   │       │   ├── Mainz.png
    │   │       │   ├── RB Leipzig.png
    │   │       │   ├── SC Freiburg.png
    │   │       │   ├── Schalke 04.png
    │   │       │   ├── TSG Hoffenheim.png
    │   │       │   ├── VfB Stuttgart.png
    │   │       │   ├── VfL Bochum.png
    │   │       │   ├── VfL Wolfsburg.png
    │   │       │   └── Werder Bremen.png
    │   │       ├── country_flags
    │   │       │   ├── Argentina.png
    │   │       │   ├── Australia.png
    │   │       │   ├── Brazil.png
    │   │       │   ├── Cameroon.png
    │   │       │   ├── Canada.png
    │   │       │   ├── Chile.png
    │   │       │   ├── China.png
    │   │       │   ├── England.png
    │   │       │   ├── France.png
    │   │       │   ├── Germany.png
    │   │       │   ├── Italy.png
    │   │       │   ├── Jamaica.png
    │   │       │   ├── Japan.png
    │   │       │   ├── Netherlands.png
    │   │       │   ├── New Zealand.png
    │   │       │   ├── Nigeria.png
    │   │       │   ├── Norway.png
    │   │       │   ├── Scotland.png
    │   │       │   ├── South Africa.png
    │   │       │   ├── South Korea.png
    │   │       │   ├── Spain.png
    │   │       │   ├── Sweden.png
    │   │       │   ├── Thailand.png
    │   │       │   └── USA.png
    │   │       ├── spi_matches.csv
    │   │       ├── spi_matches_latest.csv
    │   │       └── wwc_forecasts.csv
    │   │   └── 基于matplotlib和plottable库绘制精美表格.ipynb
    ├── NetworkX实例.ipynb
    ├── 圆环图Donut.ipynb
    ├── 基于PyWaffle库绘制华夫饼图.ipynb
    ├── 树形图Tree diagram.ipynb
    ├── 维恩图VENN DIAGRAM.ipynb
    ├── 网络图Network chart.ipynb
    └── 雷达图radarchart.ipynb
├── README.md
├── Seaborn Study
    ├── README.md
    └── sources
    │   ├── 1 箱形图BOXPLOT.ipynb
    │   ├── 10 绘图实例(2) Drawing example(2).ipynb
    │   ├── 11 绘图实例(3) Drawing example(3).ipynb
    │   ├── 12 绘图实例(4) Drawing example(4).ipynb
    │   ├── 2 散点图SCATTERPLOT.ipynb
    │   ├── 3 直方图HISTOGRAMPLOT.ipynb
    │   ├── 4 核密度图DENSITYPLOT.ipynb
    │   ├── 5 小提琴图VIOLINPLOT.ipynb
    │   ├── 6 热图HEATMAPPLOT.ipynb
    │   ├── 7 常用参数调整Adjustment of Common Parameters.ipynb
    │   ├── 8 避免过度绘图Avoid Overplotting.ipynb
    │   └── 9 绘图实例(1) Drawing example(1).ipynb
├── Spider Study
    ├── 0x01maoyanTop100_spider
    │   ├── maoyanTop100_spider.py
    │   ├── ref.md
    │   └── result.csv
    ├── 0x02weiboSingleUser_spider
    │   ├── ref.md
    │   ├── result.txt
    │   └── weiboSingleUser_spider.py
    ├── 0x03toutiaoArticle_spider
    │   ├── ref.md
    │   └── toutiaoArticle_spider.py
    ├── 0x04doubanMovieExplore_spider
    │   ├── doubanMovieExplore_spider.py
    │   ├── ref.md
    │   └── result.csv
    ├── 0x05baotuVideo_spider
    │   ├── baotuVideo_spider.py
    │   └── ref.md
    ├── 0x06quanjingCategoryImg_spider
    │   ├── quanjingCategoryImg_spider.py
    │   └── ref.md
    ├── 0x07doubanMusicChart_spider
    │   ├── doubanMusicChart_spider.py
    │   ├── ref.md
    │   └── result.csv
    ├── 0x08doubanSingleMovieComment_spider
    │   ├── doubanSingleMovieComment_spider.py
    │   └── ref.md
    ├── 0x09xiaomiShopCategoryApp_spider
    │   ├── 15.csv
    │   ├── ref.md
    │   └── xiaomiShopCategoryApp_spider.py
    ├── 0x10kuanShopApp_spider
    │   ├── game.csv
    │   ├── kuanShopApp_spider.py
    │   └── ref.md
    ├── 0x11bingioliu_spider
    │   ├── bingioliu_spider.py
    │   └── ref.md
    ├── 0x12shicimingjuwang_spider
    │   ├── ref.md
    │   └── shicimingjuwang_spider.py
    └── README.md
└── mxnet
    └── A 60-minute Gluon Crash Course.ipynb


/Automate the Boring Stuff with Python/README.md:
--------------------------------------------------------------------------------
1 | # 《Python编程快速上手-让繁琐工作自动化》学习笔记  
2 | ###  Automate the Boring Stuff with Python: Practical Programming for Total Beginners  
3 | My Csdn Blog: https://blog.csdn.net/LuohenYJ
4 | 


--------------------------------------------------------------------------------
/Automate the Boring Stuff with Python/处理CSV文件.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "## 1. 处理CSV文件笔记（第14章）  \n",
  8 |     "本文主要在python下介绍CSV文件，CSV 表示“Comma-Separated Values（逗号分隔的值）”，CSV文件是简化的电子表格，保存为纯文本文件。CSV 文件中的每行代表电子表格中的一行，逗号分割了该行中的单元格。Python 的csv模块让解析CSV 文件变得容易。CSV模块为Python自带库。常用函数如下:\n",
  9 |     "\n",
 10 |     "|函数|用途|备注|\n",
 11 |     "|:-:|:-:|:-:|\n",
 12 |     "|exampleFile = open(path)|打开文件，返回file文件|非csv模块中的函数，但可以用于打开csv文件|\n",
 13 |     "|csv.reader(exampleFile)|将file文件转换为一个Reader对象|不能直接将文件名字符串传递给csv.reader()函数|\n",
 14 |     "|exampleData = list(exampleReader)|在Reader 对象上应用list()函数，将返回一个csv文件内容列表|非csv模块中的函数|\n",
 15 |     "|outputFile = open('output.csv', 'w', newline='')|open()并传入'w'，以写模式打开一个文件|如果忘记设置newline关键字参数为空字符，output.csv中的行距将有两倍|\n",
 16 |     "|outputWriter.writerow[lists]|将lists写入csv文件中||\n",
 17 |     "|csv.writer(csvFile, delimiter='\\t')|将csv文件中的分隔符改为'\\t'||\n",
 18 |     "|csv.writer(csvFile, lineterminator='\\n\\n')|将csv文件中的行终止字符改为'\\n\\n'||"
 19 |    ]
 20 |   },
 21 |   {
 22 |    "cell_type": "markdown",
 23 |    "metadata": {},
 24 |    "source": [
 25 |     "## 2. 项目练习"
 26 |    ]
 27 |   },
 28 |   {
 29 |    "cell_type": "markdown",
 30 |    "metadata": {},
 31 |    "source": [
 32 |     "### 2.1 项目：从CSV 文件中删除表头  \n",
 33 |     "读取当前工作目录中所有扩展名为.csv 的文件，除掉第一行的内容重新写入同名的文件。用新的、无表头的内容替换CSV 文件的旧内容。"
 34 |    ]
 35 |   },
 36 |   {
 37 |    "cell_type": "code",
 38 |    "execution_count": 1,
 39 |    "metadata": {},
 40 |    "outputs": [
 41 |     {
 42 |      "name": "stdout",
 43 |      "output_type": "stream",
 44 |      "text": [
 45 |       "Removing header from example.csv...\n"
 46 |      ]
 47 |     }
 48 |    ],
 49 |    "source": [
 50 |     "import csv\n",
 51 |     "import os\n",
 52 |     "\n",
 53 |     "\n",
 54 |     "# 创建文件夹，exist_ok=True表示文件夹如果存在则不报错\n",
 55 |     "os.makedirs('headerRemoved', exist_ok=True)\n",
 56 |     "# Loop through every file in the current working directory.\n",
 57 |     "# 查找本地所有文件\n",
 58 |     "for csvFilename in os.listdir('.'):\n",
 59 |     "    if not csvFilename.endswith('.csv'):\n",
 60 |     "         # skip non-csv files 跳过不是csv文件\n",
 61 |     "        continue\n",
 62 |     "    print('Removing header from ' + csvFilename + '...')\n",
 63 |     "\n",
 64 |     "    # Read the CSV file in (skipping first row). 读取文件跳过第一行\n",
 65 |     "    csvRows = []\n",
 66 |     "    csvFileObj = open(csvFilename)\n",
 67 |     "    readerObj = csv.reader(csvFileObj)\n",
 68 |     "    # 读取每一行\n",
 69 |     "    for row in readerObj:\n",
 70 |     "        # 跳过第一行\n",
 71 |     "        # readerObj.line_num 表示行号从1开始\n",
 72 |     "        if readerObj.line_num == 1:\n",
 73 |     "                 # skip first row\n",
 74 |     "            continue\n",
 75 |     "        # 保存数据\n",
 76 |     "        csvRows.append(row)\n",
 77 |     "    csvFileObj.close()\n",
 78 |     "\n",
 79 |     "    # Write out the CSV file. 写文件\n",
 80 |     "    csvFileObj = open(os.path.join(\n",
 81 |     "        'headerRemoved', csvFilename), 'w', newline='')\n",
 82 |     "    csvWriter = csv.writer(csvFileObj)\n",
 83 |     "    for row in csvRows:\n",
 84 |     "        csvWriter.writerow(row)\n",
 85 |     "    csvFileObj.close()"
 86 |    ]
 87 |   },
 88 |   {
 89 |    "cell_type": "markdown",
 90 |    "metadata": {},
 91 |    "source": [
 92 |     "### 2.2 Excel 到CSV 的转换程序  \n",
 93 |     "将多个excel文件保存csv文件。一个Excel 文件可能包含多个工作表，必须为每个表创建一个CSV 文件。CSV文件的文件名应该是<Excel 文件名>_<表标题>.csv，其中<Excel 文件名>是没有扩展名的Excel 文件名（例如'spam_data'，而不是'spam_data.xlsx'），<表标题>是Worksheet 对象的title 变量中的字符串。"
 94 |    ]
 95 |   },
 96 |   {
 97 |    "cell_type": "code",
 98 |    "execution_count": 2,
 99 |    "metadata": {},
100 |    "outputs": [
101 |     {
102 |      "name": "stdout",
103 |      "output_type": "stream",
104 |      "text": [
105 |       "current file is: spreadsheet-A_Sheet.csv\n",
106 |       "current file is: spreadsheet-B_Sheet.csv\n",
107 |       "current file is: spreadsheet-C_Sheet.csv\n",
108 |       "current file is: spreadsheet-D_Sheet.csv\n",
109 |       "current file is: spreadsheet-E_Sheet.csv\n",
110 |       "current file is: spreadsheet-F_Sheet.csv\n",
111 |       "current file is: spreadsheet-G_Sheet.csv\n",
112 |       "current file is: spreadsheet-H_Sheet.csv\n",
113 |       "current file is: spreadsheet-I_Sheet.csv\n",
114 |       "current file is: spreadsheet-J_Sheet.csv\n",
115 |       "current file is: spreadsheet-K_Sheet.csv\n",
116 |       "current file is: spreadsheet-L_Sheet.csv\n",
117 |       "current file is: spreadsheet-M_Sheet.csv\n",
118 |       "current file is: spreadsheet-N_Sheet.csv\n",
119 |       "current file is: spreadsheet-O_Sheet.csv\n",
120 |       "current file is: spreadsheet-P_Sheet.csv\n",
121 |       "current file is: spreadsheet-Q_Sheet.csv\n",
122 |       "current file is: spreadsheet-R_Sheet.csv\n",
123 |       "current file is: spreadsheet-S_Sheet.csv\n",
124 |       "current file is: spreadsheet-T_Sheet.csv\n",
125 |       "current file is: spreadsheet-U_Sheet.csv\n",
126 |       "current file is: spreadsheet-V_Sheet.csv\n",
127 |       "current file is: spreadsheet-W_Sheet.csv\n",
128 |       "current file is: spreadsheet-X_Sheet.csv\n",
129 |       "current file is: spreadsheet-Y_Sheet.csv\n",
130 |       "current file is: spreadsheet-Z_Sheet.csv\n"
131 |      ]
132 |     }
133 |    ],
134 |    "source": [
135 |     "import openpyxl\n",
136 |     "import os\n",
137 |     "import csv\n",
138 |     "\n",
139 |     "inputPath = './excelSpreadsheets'\n",
140 |     "outputPath = './outputSheets'\n",
141 |     "\n",
142 |     "# 创建文件夹\n",
143 |     "os.makedirs(outputPath, exist_ok=True)\n",
144 |     "for excelFile in os.listdir(inputPath):\n",
145 |     "    # Skip non-xlsx files, load the workbook object.\n",
146 |     "    # 跳过不是xlsx的文件\n",
147 |     "    if not excelFile.endswith('xlsx'):\n",
148 |     "        continue\n",
149 |     "    # 输入文件\n",
150 |     "    inputFilePath = os.path.join(inputPath, excelFile)\n",
151 |     "    # 打开xlsx文件\n",
152 |     "    wb = openpyxl.load_workbook(inputFilePath)\n",
153 |     "    # 获得当前文件sheetName\n",
154 |     "    for sheetName in wb.sheetnames:\n",
155 |     "        # 设置文件\n",
156 |     "        csvFileName = excelFile.split('.')[0]+'_'+sheetName+'.csv'\n",
157 |     "        csvFile = open(os.path.join(outputPath, csvFileName), 'w', newline='')\n",
158 |     "        print(\"current file is: {}\".format(csvFileName))\n",
159 |     "        # 写csv文件\n",
160 |     "        outputWriter = csv.writer(csvFile)\n",
161 |     "        sheet = wb[sheetName]\n",
162 |     "\n",
163 |     "        # 遍历每一行数据\n",
164 |     "        for rowNum in range(1, sheet.max_row+1):\n",
165 |     "            # 保存每一行数据\n",
166 |     "            rowData = []\n",
167 |     "            for colNum in range(1, sheet.max_column+1):\n",
168 |     "                # 保存每一列数据\n",
169 |     "                rowData.append(sheet.cell(row=rowNum, column=colNum).value)\n",
170 |     "            # 写入一行数据\n",
171 |     "            outputWriter.writerow(rowData)\n",
172 |     "        csvFile.close()"
173 |    ]
174 |   }
175 |  ],
176 |  "metadata": {
177 |   "kernelspec": {
178 |    "display_name": "Python 3",
179 |    "language": "python",
180 |    "name": "python3"
181 |   },
182 |   "language_info": {
183 |    "codemirror_mode": {
184 |     "name": "ipython",
185 |     "version": 3
186 |    },
187 |    "file_extension": ".py",
188 |    "mimetype": "text/x-python",
189 |    "name": "python",
190 |    "nbconvert_exporter": "python",
191 |    "pygments_lexer": "ipython3",
192 |    "version": "3.7.1"
193 |   }
194 |  },
195 |  "nbformat": 4,
196 |  "nbformat_minor": 2
197 | }
198 | 


--------------------------------------------------------------------------------
/Automate the Boring Stuff with Python/用GUI 自动化控制键盘和鼠标.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "## 1. 用GUI 自动化控制键盘和鼠标  \n",
  8 |     "pyautogui模块可以向Windows、OS X 和Linux 发送虚拟按键和鼠标点击。根据使用的操作系统，在安装pyautogui之前，可能需要安装一些其他模块。\n",
  9 |     "+ Windows: 不需要安装其他模块。\n",
 10 |     "+ OS X:\n",
 11 |     "```\n",
 12 |     "sudo pip3 install pyobjc-framework-Quartz\n",
 13 |     "sudo pip3 install pyobjc-core\n",
 14 |     "sudo pip3 install pyobjc\n",
 15 |     "```\n",
 16 |     "+ Linux:\n",
 17 |     "```\n",
 18 |     "sudo pip3 install python3-xlib\n",
 19 |     "sudo apt-get install scrot\n",
 20 |     "sudo apt-get install python3-tk\n",
 21 |     "sudo apt-get install python3-dev\n",
 22 |     "```\n",
 23 |     "依赖项安装后安装pyautogui：\n",
 24 |     "```\n",
 25 |     "pip install pyautogui\n",
 26 |     "```"
 27 |    ]
 28 |   },
 29 |   {
 30 |    "cell_type": "markdown",
 31 |    "metadata": {},
 32 |    "source": [
 33 |     "使用pyautogui需要注意防止或恢复GUI。通过ctrl+cg关闭程序或者。设定暂停和自动防故障，如下：\n",
 34 |     "```\n",
 35 |     "# 在每次函数调用后等一会儿，将pyautogui.PAUSE 变量设置为要暂停的秒数\n",
 36 |     "pyautogui.PAUSE = 1\n",
 37 |     "# 设定自动防故障\n",
 38 |     "pyautogui.FAILSAFE = True\n",
 39 |     "```"
 40 |    ]
 41 |   },
 42 |   {
 43 |    "cell_type": "markdown",
 44 |    "metadata": {},
 45 |    "source": [
 46 |     "常用函数如下：\n",
 47 |     "\n",
 48 |     "|函数|用途|\n",
 49 |     "|-|-|\n",
 50 |     "|moveTo(x，y,duration)|将鼠标移动到指定的x、y 坐标，duration每次移动耗时多少秒，如果没有数调用duration，鼠标就会马上从一个点移到另一个点|\n",
 51 |     "|moveRel(xOffset，yOffset,duration)|相对于当前位置移动鼠标|\n",
 52 |     "|pyautogui.position()|确定鼠标当前位置|\n",
 53 |     "|dragTo(x，y,duration)|按下左键移动鼠标|\n",
 54 |     "|dragRel(xOffset，yOffset,duration)|按下左键，相对于当前位置移动鼠标|\n",
 55 |     "|click（x，y，button）|模拟点击(默认是左键)|\n",
 56 |     "|rightClick() |模拟右键点击|\n",
 57 |     "|middleClick()|模拟中键点击|\n",
 58 |     "|doubleClick()|模拟左键双击|\n",
 59 |     "|pyautogui.scroll(units)|模拟滚动滚轮。正参数表示向上滚动，负参数表示向下滚动|\n",
 60 |     "|pyautogui.screenshot()|返回屏幕快照的Image 对象|\n",
 61 |     "|pyautogui.pixelMatchesColor(x,y,tuple)|判断x、y 坐标处的像素与指定的颜色是否匹配。第一和第二个参数是xy整数坐标。第三个参数是包含3个整数元组，表示RGB 颜色|\n",
 62 |     "|pyautogui.locateOnScreen(image)|将返回图像iamge所在处当前屏幕图像的坐标，如果无法匹配返回None|\n",
 63 |     "|pyautogui.center(x,y,w,h)|返回区域的中心坐标|\n",
 64 |     "|typewrite(message)|键入给定消息字符串中的字符|\n",
 65 |     "|typewrite([key1，key2，key3])|键入给定键字符串，pyautogui.KEYBOARD_KEYS查看键字符串的列表|\n",
 66 |     "|press(key)|按下并释放给定键|\n",
 67 |     "|keyDown(key)|模拟按下给定键|\n",
 68 |     "|keyUp(key)|模拟释放给定键|\n",
 69 |     "|mouseDown(x，y，button)|模拟在x、y 处按下指定鼠标按键|\n",
 70 |     "|mouseUp(x，y，button)|模拟在x、y 处释放指定键|\n",
 71 |     "|hotkey([key1，key2，key3])|模拟按顺序按下给定键字符串，然后以相反的顺序释放|\n"
 72 |    ]
 73 |   },
 74 |   {
 75 |    "cell_type": "markdown",
 76 |    "metadata": {},
 77 |    "source": [
 78 |     "## 2 项目练习  "
 79 |    ]
 80 |   },
 81 |   {
 82 |    "cell_type": "markdown",
 83 |    "metadata": {},
 84 |    "source": [
 85 |     "### 2.1 项目：“现在鼠标在哪里？”  \n",
 86 |     "在移动鼠标时随时显示 x y 坐标和改点RGB值\n",
 87 |     "```\n",
 88 |     "# 显示鼠标实时位置\n",
 89 |     "import pyautogui\n",
 90 |     "\n",
 91 |     "# 程序终止crtl+c\n",
 92 |     "print('Press Ctrl-C to quit.')\n",
 93 |     "\n",
 94 |     "try:\n",
 95 |     "    while True:\n",
 96 |     "        x, y = pyautogui.position()\n",
 97 |     "        positionStr = 'X: ' + str(x).rjust(4) + ' Y: ' + str(y).rjust(4)\n",
 98 |     "        # 获得当前点的颜色\n",
 99 |     "        pixelColor = pyautogui.screenshot().getpixel((x, y))\n",
100 |     "        positionStr += ' RGB: (' + str(pixelColor[0]).rjust(3)\n",
101 |     "        positionStr += ', ' + str(pixelColor[1]).rjust(3)\n",
102 |     "        positionStr += ', ' + str(pixelColor[2]).rjust(3) + ')'\n",
103 |     "        # end结尾添加的字符，默认换行符\n",
104 |     "        print(positionStr, end='')\n",
105 |     "        # https://blog.csdn.net/lucosax/article/details/34963593\n",
106 |     "        # 得到了许多\\b 字符构成的字符串，长度与positionStr中保存的字符串长度一样，效果就是擦除了前面打印的字符串\\\n",
107 |     "        # flush把文件从内存buffer（缓冲区）中强制刷新到硬盘中，同时清空缓冲区。实现动态效果\n",
108 |     "        print('\\b' * len(positionStr), end='', flush=True)\n",
109 |     "\n",
110 |     "# 退出程序\n",
111 |     "except KeyboardInterrupt:\n",
112 |     "    print('\\nDone.')\n",
113 |     "```"
114 |    ]
115 |   },
116 |   {
117 |    "cell_type": "markdown",
118 |    "metadata": {},
119 |    "source": [
120 |     "### 2.2 项目：自动填表程序  \n",
121 |     "实现自动填表，表格网站，地址http://autbor.com/form\n",
122 |     "```python\n",
123 |     "import pyautogui, time\n",
124 |     "# Set these to the correct coordinates for your computer\n",
125 |     "# name输入位置\n",
126 |     "nameField = (622, 199)\n",
127 |     "submitButton = (601, 978)\n",
128 |     "submitButtonColor = (31, 115, 230)\n",
129 |     "submitAnotherLink = (760, 224)\n",
130 |     "\n",
131 |     "# 表单数据\n",
132 |     "formData = [{'name': 'Alice', 'fear': 'eavesdroppers', 'source': 'wand','robocop': 4, 'comments': 'Tell Bob I said hi.'},\n",
133 |     "            {'name': 'Bob', 'fear': 'bees', 'source': 'amulet', 'robocop': 4,'comments': 'Please take the puppets out of the break room.'},\n",
134 |     "            {'name': 'Carol', 'fear': 'puppets', 'source': 'crystal ball','robocop': 1, 'comments': 'Please take the puppets out of the break room.'},\n",
135 |     "            {'name': 'Alex Murphy', 'fear': 'ED-209', 'source': 'money','robocop': 5, 'comments': 'Protect the innocent. Serve the publictrust. Uphold the law.'},]\n",
136 |     "\n",
137 |     "# 暂停0.5s\n",
138 |     "pyautogui.PAUSE = 0.5\n",
139 |     "for person in formData:\n",
140 |     "    # Give the user a chance to kill the script.\n",
141 |     "    print('>>> 5 SECOND PAUSE TO LET USER PRESS CTRL-C <<<')\n",
142 |     "    time.sleep(5)\n",
143 |     "    # Wait until the form page has loaded.\n",
144 |     "    # 判断点submitButton和submitButtonColor的颜色是否匹配\n",
145 |     "    while not pyautogui.pixelMatchesColor(submitButton[0], submitButton[1],submitButtonColor):\n",
146 |     "        time.sleep(0.5)\n",
147 |     "    print('Entering %s info...' % (person['name']))\n",
148 |     "        \n",
149 |     "    # 从名字开始填写\n",
150 |     "    pyautogui.click(nameField[0], nameField[1])\n",
151 |     "    # Fill out the Name field.\n",
152 |     "    pyautogui.typewrite(person['name'] + '\\t')\n",
153 |     "    # Fill out the Greatest Fear(s) field.\n",
154 |     "    pyautogui.typewrite(person['fear'] + '\\t')\n",
155 |     "    \n",
156 |     "    # Fill out the Source of Wizard Powers field.\n",
157 |     "    if person['source'] == 'wand':\n",
158 |     "        # 按下键\n",
159 |     "        pyautogui.typewrite(['down'])\n",
160 |     "        # 回车\n",
161 |     "        pyautogui.typewrite(['enter'])\n",
162 |     "        # tab\n",
163 |     "        pyautogui.typewrite(['\\t'])\n",
164 |     "    elif person['source'] == 'amulet':\n",
165 |     "        pyautogui.typewrite(['down', 'down'])\n",
166 |     "        pyautogui.typewrite(['enter'])\n",
167 |     "        pyautogui.typewrite(['\\t'])\n",
168 |     "    elif person['source'] == 'crystal ball':\n",
169 |     "        pyautogui.typewrite(['down', 'down', 'down'])\n",
170 |     "        pyautogui.typewrite(['enter'])\n",
171 |     "        pyautogui.typewrite(['\\t'])\n",
172 |     "    elif person['source'] == 'money':\n",
173 |     "        pyautogui.typewrite(['down', 'down', 'down', 'down'])\n",
174 |     "        pyautogui.typewrite(['enter'])\n",
175 |     "        pyautogui.typewrite(['\\t'])\n",
176 |     "    # Fill out the RoboCop field.\n",
177 |     "    if person['robocop'] == 1:\n",
178 |     "        pyautogui.typewrite([' ', '\\t'])\n",
179 |     "    elif person['robocop'] == 2:\n",
180 |     "        pyautogui.typewrite(['right', '\\t'])\n",
181 |     "    elif person['robocop'] == 3:\n",
182 |     "        pyautogui.typewrite(['right', 'right', '\\t'])\n",
183 |     "    elif person['robocop'] == 4:\n",
184 |     "        pyautogui.typewrite(['right', 'right', 'right', '\\t'])\n",
185 |     "    elif person['robocop'] == 5:\n",
186 |     "        pyautogui.typewrite(['right', 'right', 'right', 'right', '\\t'])\n",
187 |     "        \n",
188 |     "    # Fill out the Additional Comments field.\n",
189 |     "    # 输入文字\n",
190 |     "    pyautogui.typewrite(person['comments'] + '\\t')\n",
191 |     "    # Click Submit.\n",
192 |     "    #pyautogui.press('enter')\n",
193 |     "    # Wait until form page has loaded.\n",
194 |     "    print('Clicked Submit.')\n",
195 |     "    time.sleep(5)\n",
196 |     "    # Click the Submit another response link.\n",
197 |     "    # 确认提交\n",
198 |     "    pyautogui.click(submitAnotherLink[0], submitAnotherLink[1])\n",
199 |     "```"
200 |    ]
201 |   },
202 |   {
203 |    "cell_type": "markdown",
204 |    "metadata": {},
205 |    "source": [
206 |     "## 3 参考链接\n",
207 |     "+ 完整pyautogui文档 https://pyautogui.readthedocs.io/en/latest/\n",
208 |     "+ https://www.jianshu.com/p/41463c82ec8f\n",
209 |     "+ https://blog.csdn.net/yzy_1996/article/details/85244714"
210 |    ]
211 |   }
212 |  ],
213 |  "metadata": {
214 |   "kernelspec": {
215 |    "display_name": "Python 3",
216 |    "language": "python",
217 |    "name": "python3"
218 |   },
219 |   "language_info": {
220 |    "codemirror_mode": {
221 |     "name": "ipython",
222 |     "version": 3
223 |    },
224 |    "file_extension": ".py",
225 |    "mimetype": "text/x-python",
226 |    "name": "python",
227 |    "nbconvert_exporter": "python",
228 |    "pygments_lexer": "ipython3",
229 |    "version": "3.7.1"
230 |   }
231 |  },
232 |  "nbformat": 4,
233 |  "nbformat_minor": 2
234 | }
235 | 


--------------------------------------------------------------------------------
/Automate the Boring Stuff with Python/读写文件.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "## 1. 读写文件（第8章）"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "markdown",
 12 |    "metadata": {},
 13 |    "source": [
 14 |     "**1.1 文件与文件路径**  \n",
 15 |     "通过import os调用os模块操作目录，常用函数如下："
 16 |    ]
 17 |   },
 18 |   {
 19 |    "cell_type": "markdown",
 20 |    "metadata": {},
 21 |    "source": [
 22 |     "|函数|用途|\n",
 23 |     "|-|:-|\n",
 24 |     "|os.getcwd()|取得当前工作路径|\n",
 25 |     "|os.chdir()|改变当前工作路径|\n",
 26 |     "|os.makedirs(path)|创建新文件夹|\n",
 27 |     "|os.listdir(path)|返回文件名字符串的列表|\n",
 28 |     "|os.path.join(path1,path2,...)|连接路径|\n",
 29 |     "|os.path.abspath(path)|返回绝对路径|\n",
 30 |     "|os.path.isabs(path)|检查是否绝对路径|\n",
 31 |     "|os.path.relpath(path, start)|返回从start路径到path的相对路径|\n",
 32 |     "|os.path.dirname(path)|返回目录名|\n",
 33 |     "|os.path.basename(path)|返回文件名|\n",
 34 |     "|os.path.split(path)|返回目录名和文件名的元组|\n",
 35 |     "|os.path.getsize(path)|返回path参数中文件的字节数|\n",
 36 |     "|os.path.exists(path)|检查路径是否存在|\n",
 37 |     "|os.path.isfile(path)|检查路径是否文件|\n",
 38 |     "|os.path.isdir(path)|检查路径是否文件夹|"
 39 |    ]
 40 |   },
 41 |   {
 42 |    "cell_type": "markdown",
 43 |    "metadata": {},
 44 |    "source": [
 45 |     "**1.2 文件读写**"
 46 |    ]
 47 |   },
 48 |   {
 49 |    "cell_type": "markdown",
 50 |    "metadata": {},
 51 |    "source": [
 52 |     "在Python 中，读写文件有3 个步骤：\n",
 53 |     "1. 调用open()函数，返回一个File 对象。\n",
 54 |     "2. 调用File 对象的read()或write()方法。\n",
 55 |     "3. 调用File 对象的close()方法，关闭该文件"
 56 |    ]
 57 |   },
 58 |   {
 59 |    "cell_type": "code",
 60 |    "execution_count": 1,
 61 |    "metadata": {},
 62 |    "outputs": [
 63 |     {
 64 |      "name": "stdout",
 65 |      "output_type": "stream",
 66 |      "text": [
 67 |       "Hello world!\n",
 68 |       "Bacon is not a vegetable.\n"
 69 |      ]
 70 |     }
 71 |    ],
 72 |    "source": [
 73 |     "# 将'w'作为第二个参数传递给open(),以写模式打开该文件\n",
 74 |     "# 写模式会清空源文件\n",
 75 |     "baconFile = open('bacon.txt', 'w')\n",
 76 |     "baconFile.write('Hello world!\\n')\n",
 77 |     "baconFile.close()\n",
 78 |     "\n",
 79 |     "# 将'a'作为第二个参数传递给open()，以添加模式打开该文件\n",
 80 |     "# 添加模式将在已有文件的末尾添加文本\n",
 81 |     "baconFile = open('bacon.txt', 'a')\n",
 82 |     "baconFile.write('Bacon is not a vegetable.')\n",
 83 |     "baconFile.close()\n",
 84 |     "\n",
 85 |     "# 读文件\n",
 86 |     "baconFile = open('bacon.txt')\n",
 87 |     "# read将文件的内容看成是单个大字符串\n",
 88 |     "content = baconFile.read()\n",
 89 |     "# readlines从该文件取得一个字符串的列表。列表中的每个字符串就是文本中的每一行\n",
 90 |     "# content = baconFile.readlines()\n",
 91 |     "baconFile.close()\n",
 92 |     "print(content)"
 93 |    ]
 94 |   },
 95 |   {
 96 |    "cell_type": "markdown",
 97 |    "metadata": {},
 98 |    "source": [
 99 |     "## 2. 项目练习"
100 |    ]
101 |   },
102 |   {
103 |    "cell_type": "markdown",
104 |    "metadata": {},
105 |    "source": [
106 |     "### 2.1 生成随机的测验试卷文件"
107 |    ]
108 |   },
109 |   {
110 |    "cell_type": "code",
111 |    "execution_count": 2,
112 |    "metadata": {},
113 |    "outputs": [],
114 |    "source": [
115 |     "# -*- coding: utf-8 -*-\n",
116 |     "import random\n",
117 |     "\n",
118 |     "\n",
119 |     "# 测验数据\n",
120 |     "# The quiz data. Keys are states and values are their capitals.\n",
121 |     "capitals = {'Alabama': 'Montgomery', 'Alaska': 'Juneau', 'Arizona': 'Phoenix',\n",
122 |     "'Arkansas': 'Little Rock', 'California': 'Sacramento', 'Colorado': 'Denver',\n",
123 |     "'Connecticut': 'Hartford', 'Delaware': 'Dover', 'Florida': 'Tallahassee',\n",
124 |     "'Georgia': 'Atlanta', 'Hawaii': 'Honolulu', 'Idaho': 'Boise', 'Illinois':\n",
125 |     "'Springfield', 'Indiana': 'Indianapolis', 'Iowa': 'Des Moines', 'Kansas':\n",
126 |     "'Topeka', 'Kentucky': 'Frankfort', 'Louisiana': 'Baton Rouge', 'Maine':\n",
127 |     "'Augusta', 'Maryland': 'Annapolis', 'Massachusetts': 'Boston', 'Michigan':\n",
128 |     "'Lansing', 'Minnesota': 'Saint Paul', 'Mississippi': 'Jackson', 'Missouri':\n",
129 |     "'Jefferson City', 'Montana': 'Helena', 'Nebraska': 'Lincoln', 'Nevada':\n",
130 |     "'Carson City', 'New Hampshire': 'Concord', 'New Jersey': 'Trenton', 'New Mexico': 'Santa Fe', 'New York': 'Albany', 'North Carolina': 'Raleigh',\n",
131 |     "'North Dakota': 'Bismarck', 'Ohio': 'Columbus', 'Oklahoma': 'Oklahoma City',\n",
132 |     "'Oregon': 'Salem', 'Pennsylvania': 'Harrisburg', 'Rhode Island': 'Providence',\n",
133 |     "'South Carolina': 'Columbia', 'South Dakota': 'Pierre', 'Tennessee':\n",
134 |     "'Nashville', 'Texas': 'Austin', 'Utah': 'Salt Lake City', 'Vermont':\n",
135 |     "'Montpelier', 'Virginia': 'Richmond', 'Washington': 'Olympia', \n",
136 |     "'West Virginia': 'Charleston', 'Wisconsin': 'Madison', 'Wyoming': 'Cheyenne'}\n",
137 |     "    \n",
138 |     "    \n",
139 |     "# 生成试卷\n",
140 |     "# 试卷份数    \n",
141 |     "n_quiz=2\n",
142 |     "for quizNum in range(n_quiz):\n",
143 |     "    # Create the quiz and answer key files.\n",
144 |     "    # 试卷\n",
145 |     "    quizFile = open('capitalsquiz%s.txt' % (quizNum + 1), 'w')\n",
146 |     "    # 答案\n",
147 |     "    answerKeyFile = open('capitalsquiz_answers%s.txt' % (quizNum + 1), 'w')\n",
148 |     "    # Write out the header for the quiz.\n",
149 |     "    # 头文件\n",
150 |     "    quizFile.write('Name:\\n\\nDate:\\n\\nPeriod:\\n\\n')\n",
151 |     "    quizFile.write((' ' * 20) + 'State Capitals Quiz (Form %s)' % (quizNum + 1))\n",
152 |     "    quizFile.write('\\n\\n')\n",
153 |     "    # Shuffle the order of the states.\n",
154 |     "    states = list(capitals.keys())\n",
155 |     "    # 创建了美国州名的随机列表\n",
156 |     "    random.shuffle(states)    \n",
157 |     "    \n",
158 |     "    \n",
159 |     "    # 创建答案    \n",
160 |     "    # Loop through all 50 states, making a question for each.\n",
161 |     "    for questionNum in range(50):\n",
162 |     "        # Get right and wrong answers.\n",
163 |     "        # 正确答案\n",
164 |     "        correctAnswer = capitals[states[questionNum]]\n",
165 |     "        # 错误答案\n",
166 |     "        wrongAnswers = list(capitals.values())\n",
167 |     "        # 删除正确的答案\n",
168 |     "        del wrongAnswers[wrongAnswers.index(correctAnswer)]\n",
169 |     "        # 随机取出三个答案\n",
170 |     "        wrongAnswers = random.sample(wrongAnswers, 3)\n",
171 |     "        answerOptions = wrongAnswers + [correctAnswer]\n",
172 |     "        random.shuffle(answerOptions)    \n",
173 |     "    \n",
174 |     "        # 将内容写入测验试卷和答案文件\n",
175 |     "        # Write the question and the answer options to the quiz file.\n",
176 |     "        quizFile.write('%s. What is the capital of %s?\\n' % (questionNum + 1,states[questionNum]))\n",
177 |     "        \n",
178 |     "    \n",
179 |     "        for i in range(4):\n",
180 |     "            quizFile.write(' %s. %s\\n' % ('ABCD'[i], answerOptions[i]))\n",
181 |     "        quizFile.write('\\n')\n",
182 |     "        \n",
183 |     "        # Write the answer key to a file.   \n",
184 |     "        answerKeyFile.write('%s. %s\\n' % (questionNum + 1, 'ABCD'[answerOptions.index(correctAnswer)]))\n",
185 |     "    quizFile.close()\n",
186 |     "    answerKeyFile.close()    "
187 |    ]
188 |   },
189 |   {
190 |    "cell_type": "markdown",
191 |    "metadata": {},
192 |    "source": [
193 |     "### 2.2 疯狂填词\n",
194 |     "创建一个疯狂填词（Mad Libs）程序，它将读入文本文件，并让用户在该文本文件中出现ADJECTIVE、NOUN、ADVERB 或VERB 等单词的地方，加上他们自己的文本。"
195 |    ]
196 |   },
197 |   {
198 |    "cell_type": "code",
199 |    "execution_count": 3,
200 |    "metadata": {},
201 |    "outputs": [
202 |     {
203 |      "name": "stdout",
204 |      "output_type": "stream",
205 |      "text": [
206 |       "Enter an ADJECTIVE:\n",
207 |       "silly\n",
208 |       "Enter a NOUN:\n",
209 |       "chandelier\n",
210 |       "Enter a VERB:\n",
211 |       "screamed\n",
212 |       "Enter a NOUN:\n",
213 |       "pickup truck\n",
214 |       "The silly panda walked to the chandelier and then screamed. A nearby pickup truck was unaffected by these events.\n"
215 |      ]
216 |     }
217 |    ],
218 |    "source": [
219 |     "import re\n",
220 |     "\n",
221 |     "# text:\n",
222 |     "'''\n",
223 |     "The ADJECTIVE panda walked to the NOUN and then VERB. A nearby NOUN was unaffected by these events\n",
224 |     "'''\n",
225 |     "# 输入文件\n",
226 |     "input_file = open('./input_text.txt')\n",
227 |     "input_text = input_file.read()\n",
228 |     "input_file.close()\n",
229 |     "\n",
230 |     "# 输出文本\n",
231 |     "output_text = input_text\n",
232 |     "\n",
233 |     "# 输入文本寻找关键词\n",
234 |     "keywords = re.compile('adjective|noun|verb', re.IGNORECASE)\n",
235 |     "mo = keywords.findall(input_text)\n",
236 |     "\n",
237 |     "# 一个一个词进行替换\n",
238 |     "for word in mo:\n",
239 |     "    if word[0].lower() == 'a':\n",
240 |     "        replacewords = input('Enter an {}:\\n'.format(word))\n",
241 |     "    else:\n",
242 |     "        replacewords = input('Enter a {}:\\n'.format(word))\n",
243 |     "    regex = re.compile(word)\n",
244 |     "    # 每一次替换一个词\n",
245 |     "    output_text = regex.sub(replacewords, output_text, 1)\n",
246 |     "\n",
247 |     "# 写入新文件\n",
248 |     "output_file = open('output_text.txt', 'w')\n",
249 |     "output_file.write(output_text)\n",
250 |     "output_file.close()\n",
251 |     "print(output_text)\n"
252 |    ]
253 |   },
254 |   {
255 |    "cell_type": "markdown",
256 |    "metadata": {},
257 |    "source": [
258 |     "### 2.3 正则表达式查找\n",
259 |     "编写一个程序，打开文件夹中所有的.txt 文件，查找匹配用户提供的正则表达式的所有行。结果应该打印到屏幕上。"
260 |    ]
261 |   },
262 |   {
263 |    "cell_type": "code",
264 |    "execution_count": 4,
265 |    "metadata": {},
266 |    "outputs": [
267 |     {
268 |      "name": "stdout",
269 |      "output_type": "stream",
270 |      "text": [
271 |       "The ADJECTIVE panda walked to the NOUN and then VERB. A nearby NOUN was unaffected by these events.\n"
272 |      ]
273 |     }
274 |    ],
275 |    "source": [
276 |     "\n",
277 |     "import os\n",
278 |     "import re\n",
279 |     "\n",
280 |     "path = \"./\"\n",
281 |     "\n",
282 |     "# 判断路径目录是否存在\n",
283 |     "folder = os.path.exists(path)\n",
284 |     "if folder == False:\n",
285 |     "    print(\"文件目录不存在！\")\n",
286 |     "\n",
287 |     "\n",
288 |     "#返回指定目录下所有的文件名和文件夹名列表\n",
289 |     "file_Name = os.listdir(path)\n",
290 |     "\n",
291 |     "#创建正则表达式\n",
292 |     "keywords = re.compile(r'ADJECTIVE')\n",
293 |     "\n",
294 |     "# 找出txt文件\n",
295 |     "for i in range(len(file_Name)):\n",
296 |     "    #判断是否为txt文件\n",
297 |     "    if os.path.isfile(os.path.join(path, file_Name[i])) and file_Name[i].split('.')[-1] == 'txt':\n",
298 |     "        # 打开文件\n",
299 |     "        file = open(os.path.join(path, file_Name[i]))\n",
300 |     "        file_texts = file.readlines()\n",
301 |     "        for texts in file_texts:\n",
302 |     "            #找出匹配正则表达式的内容\n",
303 |     "            text_put = keywords.search(texts)\n",
304 |     "            #输出结果\n",
305 |     "            if text_put != None:\n",
306 |     "                #打印当前行\n",
307 |     "                print('{}'.format(texts))"
308 |    ]
309 |   }
310 |  ],
311 |  "metadata": {
312 |   "kernelspec": {
313 |    "display_name": "Python 3",
314 |    "language": "python",
315 |    "name": "python3"
316 |   },
317 |   "language_info": {
318 |    "codemirror_mode": {
319 |     "name": "ipython",
320 |     "version": 3
321 |    },
322 |    "file_extension": ".py",
323 |    "mimetype": "text/x-python",
324 |    "name": "python",
325 |    "nbconvert_exporter": "python",
326 |    "pygments_lexer": "ipython3",
327 |    "version": "3.7.1"
328 |   }
329 |  },
330 |  "nbformat": 4,
331 |  "nbformat_minor": 2
332 | }
333 | 


--------------------------------------------------------------------------------
/Deep learning/NLP/[自然语言处理] 自然语言处理库spaCy使用指北/image/img1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/luohenyueji/Python-Study-Notes/32f4d53f6937dbb7433fa53cb011e4dd64c559c7/Deep learning/NLP/[自然语言处理] 自然语言处理库spaCy使用指北/image/img1.png


--------------------------------------------------------------------------------
/Deep learning/NLP/[自然语言处理] 自然语言处理库spaCy使用指北/image/img2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/luohenyueji/Python-Study-Notes/32f4d53f6937dbb7433fa53cb011e4dd64c559c7/Deep learning/NLP/[自然语言处理] 自然语言处理库spaCy使用指北/image/img2.png


--------------------------------------------------------------------------------
/Deep learning/NLP/[语音识别] 基于Python构建简易的音频录制与语音识别应用/asr.py:
--------------------------------------------------------------------------------
 1 | # asr.py
 2 | import whisper
 3 | import numpy as np
 4 | 
 5 | 
 6 | class ASR:
 7 |     """
 8 |     语音识别模型类
 9 |     """
10 | 
11 |     def __init__(self, modelType: str = "small", prompt: str = ""):
12 |         """
13 |         :param modelType: whisper模型类型
14 |         :param prompt: 提示词
15 |         """
16 |         # 模型默认使用cuda运行，没gpu跑模型很慢。
17 |         # 使用device="cpu"即可改为cpu运行
18 |         self.model = whisper.load_model(modelType, device="cuda")
19 |         # prompt作用就是提示模型输出指定类型的文字
20 |         # 这里使用简体中文就是告诉模型尽可能输出简体中文的识别结果
21 |         self.prompt = "简体中文" + prompt
22 | 
23 |     def predict(self, audio: np.ndarray) -> str:
24 |         """
25 |         语音识别
26 |         :param audio: 输入的numpy音频数组
27 |         :return: 输出识别的字符串结果
28 |         """
29 | 
30 |         # prompt在whisper中用法是作为transformer模型交叉注意力模块的初始值。transformer为自回归模型，会逐个生成识别文字，
31 |         # 如果输入的语音为空，initial_prompt的设置可能会导致语音识别输出结果为initial_prompt
32 |         result = self.model.transcribe(audio.astype(np.float32), initial_prompt=self.prompt)
33 |         return result["text"]


--------------------------------------------------------------------------------
/Deep learning/NLP/[语音识别] 基于Python构建简易的音频录制与语音识别应用/record.py:
--------------------------------------------------------------------------------
  1 | # record.py
  2 | import speech_recognition as sr
  3 | from PyQt5.QtCore import QThread, pyqtSignal
  4 | import time, os
  5 | import numpy as np
  6 | from asr import ASR
  7 | 
  8 | 
  9 | class AudioHandle(QThread):
 10 |     """
 11 |     录音控制类
 12 |     """
 13 | 
 14 |     # 用于展示信息的pyqt信号
 15 |     infoSignal = pyqtSignal(str)
 16 | 
 17 |     def __init__(self, sampleRate: int = 16000, adjustTime: int = 1, phraseLimitTime: int = 5,
 18 |                  saveAudio: bool = False, hotWord: str = ""):
 19 |         """
 20 |         :param sampleRate: 采样率
 21 |         :param adjustTime: 适应环境时长/s
 22 |         :param phraseLimitTime: 录音最长时长/s
 23 |         :param saveAudio: 是否保存音频
 24 |         :param hotWord: 热词数据
 25 |         """
 26 |         super(AudioHandle, self).__init__()
 27 |         self.sampleRate = sampleRate
 28 |         self.duration = adjustTime
 29 |         self.phraseTime = phraseLimitTime
 30 |         # 用于设置运行状态
 31 |         self.running = False
 32 |         self.rec = sr.Recognizer()
 33 |         # 麦克风对象
 34 |         self.mic = sr.Microphone(sample_rate=self.sampleRate)
 35 |         # 语音识别模型对象
 36 |         # hotWord为需要优先识别的热词
 37 |         # 输入"秦剑 无憾"表示优先匹配该字符串中的字符
 38 |         self.asr = ASR(prompt=hotWord)
 39 |         self.saveAudio = saveAudio
 40 |         self.savePath = "output"
 41 | 
 42 |     def run(self) -> None:
 43 |         self.listen()
 44 | 
 45 |     def stop(self) -> None:
 46 |         self.running = False
 47 | 
 48 |     def setInfo(self, text: str, type: str = "info") -> None:
 49 |         """
 50 |         展示信息
 51 |         :param text: 文本
 52 |         :param type: 文本类型
 53 |         """
 54 |         nowTime = time.strftime("%H:%M:%S", time.localtime())
 55 |         if type == "info":
 56 |             self.infoSignal.emit("<font color='blue'>{} {}</font>".format(nowTime, text))
 57 |         elif type == "text":
 58 |             self.infoSignal.emit("<font color='green'>{} {}</font>".format(nowTime, text))
 59 |         else:
 60 |             self.infoSignal.emit("<font color='red'>{} {}</font>".format(nowTime, text))
 61 | 
 62 |     def listen(self) -> None:
 63 |         """
 64 |         语音监听函数
 65 |         """
 66 |         try:
 67 |             with self.mic as source:
 68 |                 self.setInfo("录音开始")
 69 |                 self.running = True
 70 |                 while self.running:
 71 |                     # 设备监控
 72 |                     audioIndex = self.mic.audio.get_default_input_device_info()['index']
 73 |                     workAudio = self.mic.list_working_microphones()
 74 |                     if len(workAudio) == 0 or audioIndex not in workAudio:
 75 |                         self.setInfo("未检测到有效音频输入设备！！！", type='warning')
 76 |                         break
 77 |                     self.rec.adjust_for_ambient_noise(source, duration=self.duration)
 78 |                     self.setInfo("正在录音")
 79 |                     # self.running为否无法立即退出该函数，如果想立即退出则需要重写该函数
 80 |                     audio = self.rec.listen(source, phrase_time_limit=self.phraseTime)
 81 |                     # 将音频二进制数据转换为numpy类型
 82 |                     audionp = self.bytes2np(audio.frame_data)
 83 |                     if self.saveAudio:
 84 |                         self.saveWav(audio)
 85 |                     # 判断音频rms值是否超过经验阈值，如果没超过表明为环境噪声
 86 |                     if np.sqrt(np.mean(audionp ** 2)) < 0.02:
 87 |                         continue
 88 |                     self.setInfo("音频正在识别")
 89 |                     # 识别语音
 90 |                     result = self.asr.predict(audionp)
 91 |                     self.setInfo(f"识别结果为：{result}", "text")
 92 |         except Exception as e:
 93 |             self.setInfo(e, "warning")
 94 |         finally:
 95 |             self.setInfo("录音停止")
 96 |             self.running = False
 97 | 
 98 |     def bytes2np(self, inp: bytes, sampleWidth: int = 2) -> np.ndarray:
 99 |         """
100 |         将音频二进制数据转换为numpy类型
101 |         :param inp: 输入音频二进制流
102 |         :param sampleWidth: 音频采样宽度
103 |         :return: 音频numpy数组
104 |         """
105 | 
106 |         # 使用np.frombuffer函数将字节序列转换为numpy数组
107 |         tmp = np.frombuffer(inp, dtype=np.int16 if sampleWidth == 2 else np.int8)
108 |         # 确保tmp为numpy数组
109 |         tmp = np.asarray(tmp)
110 | 
111 |         # 获取tmp数组元素的数据类型信息
112 |         i = np.iinfo(tmp.dtype)
113 |         # 计算tmp元素的绝对最大值
114 |         absmax = 2 ** (i.bits - 1)
115 |         # 计算tmp元素的偏移量
116 |         offset = i.min + absmax
117 | 
118 |         # 将tmp数组元素转换为浮点型，并进行归一化
119 |         array = np.frombuffer((tmp.astype(np.float32) - offset) / absmax, dtype=np.float32)
120 | 
121 |         # 返回转换后的numpy数组
122 |         return array
123 | 
124 |     def saveWav(self, audio: sr.AudioData) -> None:
125 |         """
126 |         保存语音结果
127 |         :param audio: AudioData音频对象
128 |         """
129 |         nowTime = time.strftime("%H_%M_%S", time.localtime())
130 |         os.makedirs(self.savePath, exist_ok=True)
131 |         with open("{}/{}.wav".format(self.savePath, nowTime), 'wb') as f:
132 |             f.write(audio.get_wav_data())


--------------------------------------------------------------------------------
/Deep learning/NLP/[语音识别] 基于Python构建简易的音频录制与语音识别应用/run.py:
--------------------------------------------------------------------------------
  1 | # run.py
  2 | from PyQt5 import QtGui
  3 | from PyQt5.QtWidgets import *
  4 | from PyQt5.QtCore import QSize, Qt
  5 | import sys
  6 | from record import AudioHandle
  7 | 
  8 | 
  9 | class Window(QMainWindow):
 10 |     """
 11 |     界面类
 12 |     """
 13 | 
 14 |     def __init__(self):
 15 |         super().__init__()
 16 |         # --- 设置标题
 17 |         self.setWindowTitle('语音识别demo')
 18 |         # --- 设置窗口尺寸
 19 |         # 获取系统桌面尺寸
 20 |         desktop = app.desktop()
 21 |         # 设置界面初始尺寸
 22 |         self.width = int(desktop.screenGeometry().width() * 0.3)
 23 |         self.height = int(0.5 * self.width)
 24 |         self.resize(self.width, self.height)
 25 |         # 设置窗口最小值
 26 |         self.minWidth = 300
 27 |         self.setMinimumSize(QSize(self.minWidth, int(0.5 * self.minWidth)))
 28 | 
 29 |         # --- 创建组件
 30 |         self.showBox = QTextEdit()
 31 |         self.showBox.setReadOnly(True)
 32 |         self.startBtn = QPushButton("开始录音")
 33 |         self.stopBtn = QPushButton("停止录音")
 34 |         self.stopBtn.setEnabled(False)
 35 | 
 36 |         # --- 组件初始化
 37 |         self.initUI()
 38 | 
 39 |         # --- 初始化音频类
 40 |         self.ahl = AudioHandle()
 41 |         # 连接用于传递信息的信号
 42 |         self.ahl.infoSignal.connect(self.showInfo)
 43 |         self.showInfo("<font color='blue'>{}</font>".format("程序已初始化"))
 44 | 
 45 |     def initUI(self) -> None:
 46 |         """
 47 |         界面初始化
 48 |         """
 49 |         # 设置整体布局
 50 |         mainLayout = QVBoxLayout()
 51 |         mainLayout.addWidget(self.showBox)
 52 |         # 设置底部水平布局
 53 |         blayout = QHBoxLayout()
 54 |         blayout.addWidget(self.startBtn)
 55 |         blayout.addWidget(self.stopBtn)
 56 |         mainLayout.addLayout(blayout)
 57 | 
 58 |         mainWidget = QWidget()
 59 |         mainWidget.setLayout(mainLayout)
 60 |         self.setCentralWidget(mainWidget)
 61 | 
 62 |         # 设置事件
 63 |         self.startBtn.clicked.connect(self.record)
 64 |         self.stopBtn.clicked.connect(self.record)
 65 | 
 66 |     def record(self) -> None:
 67 |         """
 68 |         录音控制
 69 |         """
 70 |         sender = self.sender()
 71 |         if sender.text() == "开始录音":
 72 |             self.stopBtn.setEnabled(True)
 73 |             self.startBtn.setEnabled(False)
 74 |             # 开启录音线程
 75 |             self.ahl.start()
 76 |         elif sender.text() == "停止录音":
 77 |             self.stopBtn.setEnabled(False)
 78 |             # waitDialog用于等待录音停止
 79 |             waitDialog = QProgressDialog("正在停止录音...", None, 0, 0)
 80 |             waitDialog.setWindowTitle("请等待")
 81 |             waitDialog.setWindowModality(Qt.ApplicationModal)
 82 |             waitDialog.setCancelButton(None)
 83 |             waitDialog.setRange(0, 0)
 84 | 
 85 |             # 设置 Marquee 模式
 86 |             waitDialog.setWindowFlag(Qt.WindowContextHelpButtonHint, False)
 87 |             waitDialog.setWindowFlag(Qt.WindowCloseButtonHint, False)
 88 |             waitDialog.setWindowFlag(Qt.WindowMaximizeButtonHint, False)
 89 |             waitDialog.setWindowFlag(Qt.WindowMinimizeButtonHint, False)
 90 |             waitDialog.setWindowFlag(Qt.WindowTitleHint, False)
 91 |             # 关闭对话框边框
 92 |             waitDialog.setWindowFlags(self.windowFlags() | Qt.FramelessWindowHint)
 93 | 
 94 |             # 连接关闭信号，即ahl线程结束则waitDialog关闭
 95 |             self.ahl.finished.connect(waitDialog.accept)
 96 |             # 结束录音线程
 97 |             self.ahl.stop()
 98 |             if self.ahl.isRunning():
 99 |                 # 显示对话框
100 |                 waitDialog.exec_()
101 | 
102 |             # 关闭对话框
103 |             self.ahl.finished.disconnect(waitDialog.accept)
104 |             waitDialog.close()
105 | 
106 |             self.startBtn.setEnabled(True)
107 | 
108 |     def showInfo(self, text: str) -> None:
109 |         """
110 |         信息展示函数
111 |         :param text: 输入文字，可支持html
112 |         """
113 |         self.showBox.append(text)
114 |         if not self.ahl.running:
115 |             self.stopBtn.click()
116 | 
117 |     def closeEvent(self, event: QtGui.QCloseEvent):
118 |         """
119 |         重写退出事件
120 |         :param event: 事件对象
121 |         """
122 |         # 点击停止按钮
123 |         if self.ahl.running:
124 |             self.stopBtn.click()
125 |         del self.ahl
126 |         event.accept()
127 | 
128 | 
129 | if __name__ == '__main__':
130 |     app = QApplication(sys.argv)
131 |     ex = Window()
132 |     # 获取默认图标
133 |     default_icon = app.style().standardIcon(QStyle.SP_MediaVolume)
134 | 
135 |     # 设置窗口图标为默认图标
136 |     ex.setWindowIcon(default_icon)
137 | 
138 |     ex.show()
139 |     sys.exit(app.exec_())


--------------------------------------------------------------------------------
/Deep learning/face detection/[深度学习] Python人脸识别库Deepface使用教程/images/baijingting/0000.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/luohenyueji/Python-Study-Notes/32f4d53f6937dbb7433fa53cb011e4dd64c559c7/Deep learning/face detection/[深度学习] Python人脸识别库Deepface使用教程/images/baijingting/0000.jpg


--------------------------------------------------------------------------------
/Deep learning/face detection/[深度学习] Python人脸识别库Deepface使用教程/images/baijingting/0001.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/luohenyueji/Python-Study-Notes/32f4d53f6937dbb7433fa53cb011e4dd64c559c7/Deep learning/face detection/[深度学习] Python人脸识别库Deepface使用教程/images/baijingting/0001.jpg


--------------------------------------------------------------------------------
/Deep learning/face detection/[深度学习] Python人脸识别库Deepface使用教程/images/baijingting/0002.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/luohenyueji/Python-Study-Notes/32f4d53f6937dbb7433fa53cb011e4dd64c559c7/Deep learning/face detection/[深度学习] Python人脸识别库Deepface使用教程/images/baijingting/0002.jpg


--------------------------------------------------------------------------------
/Deep learning/face detection/[深度学习] Python人脸识别库Deepface使用教程/images/jiangwen/0000.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/luohenyueji/Python-Study-Notes/32f4d53f6937dbb7433fa53cb011e4dd64c559c7/Deep learning/face detection/[深度学习] Python人脸识别库Deepface使用教程/images/jiangwen/0000.jpg


--------------------------------------------------------------------------------
/Deep learning/face detection/[深度学习] Python人脸识别库Deepface使用教程/images/jiangwen/0001.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/luohenyueji/Python-Study-Notes/32f4d53f6937dbb7433fa53cb011e4dd64c559c7/Deep learning/face detection/[深度学习] Python人脸识别库Deepface使用教程/images/jiangwen/0001.jpg


--------------------------------------------------------------------------------
/Deep learning/face detection/[深度学习] Python人脸识别库Deepface使用教程/images/jiangwen/0002.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/luohenyueji/Python-Study-Notes/32f4d53f6937dbb7433fa53cb011e4dd64c559c7/Deep learning/face detection/[深度学习] Python人脸识别库Deepface使用教程/images/jiangwen/0002.jpg


--------------------------------------------------------------------------------
/Deep learning/face detection/[深度学习] Python人脸识别库Deepface使用教程/images/pengyuyan/0000.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/luohenyueji/Python-Study-Notes/32f4d53f6937dbb7433fa53cb011e4dd64c559c7/Deep learning/face detection/[深度学习] Python人脸识别库Deepface使用教程/images/pengyuyan/0000.jpg


--------------------------------------------------------------------------------
/Deep learning/face detection/[深度学习] Python人脸识别库Deepface使用教程/images/pengyuyan/0001.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/luohenyueji/Python-Study-Notes/32f4d53f6937dbb7433fa53cb011e4dd64c559c7/Deep learning/face detection/[深度学习] Python人脸识别库Deepface使用教程/images/pengyuyan/0001.jpg


--------------------------------------------------------------------------------
/Deep learning/face detection/[深度学习] Python人脸识别库Deepface使用教程/images/pengyuyan/0002.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/luohenyueji/Python-Study-Notes/32f4d53f6937dbb7433fa53cb011e4dd64c559c7/Deep learning/face detection/[深度学习] Python人脸识别库Deepface使用教程/images/pengyuyan/0002.jpg


--------------------------------------------------------------------------------
/Deep learning/face detection/[深度学习] Python人脸识别库Deepface使用教程/images/zhangziyi/0000.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/luohenyueji/Python-Study-Notes/32f4d53f6937dbb7433fa53cb011e4dd64c559c7/Deep learning/face detection/[深度学习] Python人脸识别库Deepface使用教程/images/zhangziyi/0000.jpg


--------------------------------------------------------------------------------
/Deep learning/face detection/[深度学习] Python人脸识别库Deepface使用教程/images/zhangziyi/0001.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/luohenyueji/Python-Study-Notes/32f4d53f6937dbb7433fa53cb011e4dd64c559c7/Deep learning/face detection/[深度学习] Python人脸识别库Deepface使用教程/images/zhangziyi/0001.jpg


--------------------------------------------------------------------------------
/Deep learning/face detection/[深度学习] Python人脸识别库Deepface使用教程/images/zhangziyi/0002.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/luohenyueji/Python-Study-Notes/32f4d53f6937dbb7433fa53cb011e4dd64c559c7/Deep learning/face detection/[深度学习] Python人脸识别库Deepface使用教程/images/zhangziyi/0002.jpg


--------------------------------------------------------------------------------
/Deep learning/face detection/[深度学习] Python人脸识别库Deepface使用教程/images/zhaoliying/0000.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/luohenyueji/Python-Study-Notes/32f4d53f6937dbb7433fa53cb011e4dd64c559c7/Deep learning/face detection/[深度学习] Python人脸识别库Deepface使用教程/images/zhaoliying/0000.jpg


--------------------------------------------------------------------------------
/Deep learning/face detection/[深度学习] Python人脸识别库Deepface使用教程/images/zhaoliying/0001.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/luohenyueji/Python-Study-Notes/32f4d53f6937dbb7433fa53cb011e4dd64c559c7/Deep learning/face detection/[深度学习] Python人脸识别库Deepface使用教程/images/zhaoliying/0001.jpg


--------------------------------------------------------------------------------
/Deep learning/face detection/[深度学习] Python人脸识别库Deepface使用教程/images/zhaoliying/0002.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/luohenyueji/Python-Study-Notes/32f4d53f6937dbb7433fa53cb011e4dd64c559c7/Deep learning/face detection/[深度学习] Python人脸识别库Deepface使用教程/images/zhaoliying/0002.jpg


--------------------------------------------------------------------------------
/Deep learning/face detection/[深度学习] Python人脸识别库face_recognition使用教程/test_img/README.md:
--------------------------------------------------------------------------------
1 | 测试图像来自于face_recognition官方代码仓库的examples文件夹，地址[face_recognition/examples](https://github.com/ageitgey/face_recognition/tree/master/examples)


--------------------------------------------------------------------------------
/Deep learning/face detection/[深度学习] Python人脸识别库face_recognition使用教程/test_img/eye_closed.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/luohenyueji/Python-Study-Notes/32f4d53f6937dbb7433fa53cb011e4dd64c559c7/Deep learning/face detection/[深度学习] Python人脸识别库face_recognition使用教程/test_img/eye_closed.jpg


--------------------------------------------------------------------------------
/Deep learning/face detection/[深度学习] Python人脸识别库face_recognition使用教程/test_img/eye_opened.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/luohenyueji/Python-Study-Notes/32f4d53f6937dbb7433fa53cb011e4dd64c559c7/Deep learning/face detection/[深度学习] Python人脸识别库face_recognition使用教程/test_img/eye_opened.jpg


--------------------------------------------------------------------------------
/Deep learning/fast-reid/fast-reid_tutorial/configs/Base-AGW.yml:
--------------------------------------------------------------------------------
 1 | _BASE_: Base-bagtricks.yml
 2 | 
 3 | MODEL:
 4 |   BACKBONE:
 5 |     WITH_NL: True
 6 | 
 7 |   HEADS:
 8 |     POOL_LAYER: GeneralizedMeanPooling
 9 | 
10 |   LOSSES:
11 |     NAME: ("CrossEntropyLoss", "TripletLoss")
12 |     CE:
13 |       EPSILON: 0.1
14 |       SCALE: 1.0
15 | 
16 |     TRI:
17 |       MARGIN: 0.0
18 |       HARD_MINING: False
19 |       SCALE: 1.0
20 | 


--------------------------------------------------------------------------------
/Deep learning/fast-reid/fast-reid_tutorial/configs/Base-MGN.yml:
--------------------------------------------------------------------------------
 1 | _BASE_: Base-SBS.yml
 2 | 
 3 | MODEL:
 4 |   META_ARCHITECTURE: MGN
 5 | 
 6 |   FREEZE_LAYERS: [backbone, b1, b2, b3,]
 7 | 
 8 |   BACKBONE:
 9 |     WITH_NL: False
10 | 
11 |   HEADS:
12 |     EMBEDDING_DIM: 256
13 | 


--------------------------------------------------------------------------------
/Deep learning/fast-reid/fast-reid_tutorial/configs/Base-SBS.yml:
--------------------------------------------------------------------------------
 1 | _BASE_: Base-bagtricks.yml
 2 | 
 3 | MODEL:
 4 |   FREEZE_LAYERS: [ backbone ]
 5 | 
 6 |   BACKBONE:
 7 |     WITH_NL: True
 8 | 
 9 |   HEADS:
10 |     NECK_FEAT: after
11 |     POOL_LAYER: GeneralizedMeanPoolingP
12 |     CLS_LAYER: CircleSoftmax
13 |     SCALE: 64
14 |     MARGIN: 0.35
15 | 
16 |   LOSSES:
17 |     NAME: ("CrossEntropyLoss", "TripletLoss",)
18 |     CE:
19 |       EPSILON: 0.1
20 |       SCALE: 1.0
21 | 
22 |     TRI:
23 |       MARGIN: 0.0
24 |       HARD_MINING: True
25 |       NORM_FEAT: False
26 |       SCALE: 1.0
27 | 
28 | INPUT:
29 |   SIZE_TRAIN: [ 384, 128 ]
30 |   SIZE_TEST: [ 384, 128 ]
31 | 
32 |   AUTOAUG:
33 |     ENABLED: True
34 |     PROB: 0.1
35 | 
36 | DATALOADER:
37 |   NUM_INSTANCE: 16
38 | 
39 | SOLVER:
40 |   AMP:
41 |     ENABLED: True
42 |   OPT: Adam
43 |   MAX_EPOCH: 60
44 |   BASE_LR: 0.00035
45 |   WEIGHT_DECAY: 0.0005
46 |   IMS_PER_BATCH: 64
47 | 
48 |   SCHED: CosineAnnealingLR
49 |   DELAY_EPOCHS: 30
50 |   ETA_MIN_LR: 0.0000007
51 | 
52 |   WARMUP_FACTOR: 0.1
53 |   WARMUP_ITERS: 2000
54 | 
55 |   FREEZE_ITERS: 1000
56 | 
57 |   CHECKPOINT_PERIOD: 20
58 | 
59 | TEST:
60 |   EVAL_PERIOD: 10
61 |   IMS_PER_BATCH: 128
62 | 
63 | CUDNN_BENCHMARK: True
64 | 


--------------------------------------------------------------------------------
/Deep learning/fast-reid/fast-reid_tutorial/configs/Base-bagtricks.yml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: Baseline
 3 | 
 4 |   BACKBONE: # 模型骨干结构
 5 |     NAME: build_resnet_backbone
 6 |     NORM: BN
 7 |     DEPTH: 50x
 8 |     LAST_STRIDE: 1
 9 |     FEAT_DIM: 2048
10 |     WITH_IBN: False
11 |     PRETRAIN: True
12 | 
13 |   HEADS:  # 模型头
14 |     NAME: EmbeddingHead
15 |     NORM: BN
16 |     WITH_BNNECK: True
17 |     POOL_LAYER: GlobalAvgPool
18 |     NECK_FEAT: before
19 |     CLS_LAYER: Linear
20 | 
21 |   LOSSES: # 训练loss
22 |     NAME: ("CrossEntropyLoss", "TripletLoss",)
23 | 
24 |     CE:
25 |       EPSILON: 0.1
26 |       SCALE: 1.
27 | 
28 |     TRI:
29 |       MARGIN: 0.3
30 |       HARD_MINING: True
31 |       NORM_FEAT: False
32 |       SCALE: 1.
33 | 
34 | INPUT: # 模型输入图像处理方式
35 |   SIZE_TRAIN: [ 256, 128 ]
36 |   SIZE_TEST: [ 256, 128 ]
37 | 
38 |   REA:
39 |     ENABLED: True
40 |     PROB: 0.5
41 | 
42 |   FLIP:
43 |     ENABLED: True
44 | 
45 |   PADDING:
46 |     ENABLED: True
47 | 
48 | DATALOADER: # 模型读取图像方式
49 |   SAMPLER_TRAIN: NaiveIdentitySampler
50 |   NUM_INSTANCE: 4
51 |   NUM_WORKERS: 8
52 | 
53 | SOLVER: # 模型训练配置文件
54 |   AMP:
55 |     ENABLED: True
56 |   OPT: Adam
57 |   MAX_EPOCH: 120
58 |   BASE_LR: 0.00035
59 |   WEIGHT_DECAY: 0.0005
60 |   WEIGHT_DECAY_NORM: 0.0005
61 |   IMS_PER_BATCH: 64
62 | 
63 |   SCHED: MultiStepLR
64 |   STEPS: [ 40, 90 ]
65 |   GAMMA: 0.1
66 | 
67 |   WARMUP_FACTOR: 0.1
68 |   WARMUP_ITERS: 2000
69 | 
70 |   CHECKPOINT_PERIOD: 30
71 | 
72 | TEST: # 模型测试配置
73 |   EVAL_PERIOD: 30
74 |   IMS_PER_BATCH: 128
75 | 
76 | CUDNN_BENCHMARK: True
77 | 


--------------------------------------------------------------------------------
/Deep learning/fast-reid/fast-reid_tutorial/configs/DukeMTMC/AGW_R101-ibn.yml:
--------------------------------------------------------------------------------
 1 | _BASE_: ../Base-AGW.yml
 2 | 
 3 | MODEL:
 4 |   BACKBONE:
 5 |     DEPTH: 101x
 6 |     WITH_IBN: True
 7 | 
 8 | DATASETS:
 9 |   NAMES: ("DukeMTMC",)
10 |   TESTS: ("DukeMTMC",)
11 | 
12 | OUTPUT_DIR: logs/dukemtmc/agw_R101-ibn
13 | 


--------------------------------------------------------------------------------
/Deep learning/fast-reid/fast-reid_tutorial/configs/DukeMTMC/AGW_R50-ibn.yml:
--------------------------------------------------------------------------------
 1 | _BASE_: ../Base-AGW.yml
 2 | 
 3 | MODEL:
 4 |   BACKBONE:
 5 |     WITH_IBN: True
 6 | 
 7 | DATASETS:
 8 |   NAMES: ("DukeMTMC",)
 9 |   TESTS: ("DukeMTMC",)
10 | 
11 | OUTPUT_DIR: logs/dukemtmc/agw_R50-ibn
12 | 


--------------------------------------------------------------------------------
/Deep learning/fast-reid/fast-reid_tutorial/configs/DukeMTMC/AGW_R50.yml:
--------------------------------------------------------------------------------
1 | _BASE_: ../Base-AGW.yml
2 | 
3 | DATASETS:
4 |   NAMES: ("DukeMTMC",)
5 |   TESTS: ("DukeMTMC",)
6 | 
7 | OUTPUT_DIR: logs/dukemtmc/agw_R50
8 | 


--------------------------------------------------------------------------------
/Deep learning/fast-reid/fast-reid_tutorial/configs/DukeMTMC/AGW_S50.yml:
--------------------------------------------------------------------------------
 1 | _BASE_: ../Base-AGW.yml
 2 | 
 3 | MODEL:
 4 |   BACKBONE:
 5 |     NAME: build_resnest_backbone
 6 | 
 7 | DATASETS:
 8 |   NAMES: ("DukeMTMC",)
 9 |   TESTS: ("DukeMTMC",)
10 | 
11 | OUTPUT_DIR: logs/dukemtmc/agw_S50
12 | 


--------------------------------------------------------------------------------
/Deep learning/fast-reid/fast-reid_tutorial/configs/DukeMTMC/bagtricks_R101-ibn.yml:
--------------------------------------------------------------------------------
 1 | _BASE_: ../Base-bagtricks.yml
 2 | 
 3 | MODEL:
 4 |   BACKBONE:
 5 |     DEPTH: 101x
 6 |     WITH_IBN: True
 7 | 
 8 | DATASETS:
 9 |   NAMES: ("DukeMTMC",)
10 |   TESTS: ("DukeMTMC",)
11 | 
12 | OUTPUT_DIR: logs/dukemtmc/bagtricks_R101-ibn
13 | 


--------------------------------------------------------------------------------
/Deep learning/fast-reid/fast-reid_tutorial/configs/DukeMTMC/bagtricks_R50-ibn.yml:
--------------------------------------------------------------------------------
 1 | _BASE_: ../Base-bagtricks.yml
 2 | 
 3 | MODEL:
 4 |   BACKBONE:
 5 |     WITH_IBN: True
 6 | 
 7 | DATASETS:
 8 |   NAMES: ("DukeMTMC",)
 9 |   TESTS: ("DukeMTMC",)
10 | 
11 | OUTPUT_DIR: logs/dukemtmc/bagtricks_R50-ibn
12 | 


--------------------------------------------------------------------------------
/Deep learning/fast-reid/fast-reid_tutorial/configs/DukeMTMC/bagtricks_R50.yml:
--------------------------------------------------------------------------------
1 | _BASE_: ../Base-bagtricks.yml
2 | 
3 | DATASETS:
4 |   NAMES: ("DukeMTMC",)
5 |   TESTS: ("DukeMTMC",)
6 | 
7 | OUTPUT_DIR: logs/dukemtmc/bagtricks_R50
8 | 


--------------------------------------------------------------------------------
/Deep learning/fast-reid/fast-reid_tutorial/configs/DukeMTMC/bagtricks_S50.yml:
--------------------------------------------------------------------------------
 1 | _BASE_: ../Base-bagtricks.yml
 2 | 
 3 | MODEL:
 4 |   BACKBONE:
 5 |     NAME: build_resnest_backbone
 6 | 
 7 | DATASETS:
 8 |   NAMES: ("DukeMTMC",)
 9 |   TESTS: ("DukeMTMC",)
10 | 
11 | OUTPUT_DIR: logs/dukemtmc/bagtricks_S50
12 | 


--------------------------------------------------------------------------------
/Deep learning/fast-reid/fast-reid_tutorial/configs/DukeMTMC/mgn_R50-ibn.yml:
--------------------------------------------------------------------------------
 1 | _BASE_: ../Base-MGN.yml
 2 | 
 3 | MODEL:
 4 |   BACKBONE:
 5 |     WITH_IBN: True
 6 | 
 7 | DATASETS:
 8 |   NAMES: ("DukeMTMC",)
 9 |   TESTS: ("DukeMTMC",)
10 | 
11 | OUTPUT_DIR: logs/dukemtmc/mgn_R50-ibn
12 | 


--------------------------------------------------------------------------------
/Deep learning/fast-reid/fast-reid_tutorial/configs/DukeMTMC/sbs_R101-ibn.yml:
--------------------------------------------------------------------------------
 1 | _BASE_: ../Base-SBS.yml
 2 | 
 3 | MODEL:
 4 |   BACKBONE:
 5 |     DEPTH: 101x
 6 |     WITH_IBN: True
 7 | 
 8 | DATASETS:
 9 |   NAMES: ("DukeMTMC",)
10 |   TESTS: ("DukeMTMC",)
11 | 
12 | OUTPUT_DIR: logs/dukemtmc/sbs_R101-ibn
13 | 


--------------------------------------------------------------------------------
/Deep learning/fast-reid/fast-reid_tutorial/configs/DukeMTMC/sbs_R50-ibn.yml:
--------------------------------------------------------------------------------
 1 | _BASE_: ../Base-SBS.yml
 2 | 
 3 | MODEL:
 4 |   BACKBONE:
 5 |     WITH_IBN: True
 6 | 
 7 | DATASETS:
 8 |   NAMES: ("DukeMTMC",)
 9 |   TESTS: ("DukeMTMC",)
10 | 
11 | OUTPUT_DIR: logs/dukemtmc/sbs_R50-ibn
12 | 


--------------------------------------------------------------------------------
/Deep learning/fast-reid/fast-reid_tutorial/configs/DukeMTMC/sbs_R50.yml:
--------------------------------------------------------------------------------
1 | _BASE_: ../Base-SBS.yml
2 | 
3 | DATASETS:
4 |   NAMES: ("DukeMTMC",)
5 |   TESTS: ("DukeMTMC",)
6 | 
7 | OUTPUT_DIR: logs/dukemtmc/sbs_R50
8 | 


--------------------------------------------------------------------------------
/Deep learning/fast-reid/fast-reid_tutorial/configs/DukeMTMC/sbs_S50.yml:
--------------------------------------------------------------------------------
 1 | _BASE_: ../Base-SBS.yml
 2 | 
 3 | MODEL:
 4 |   BACKBONE:
 5 |     NAME: build_resnest_backbone
 6 | 
 7 | DATASETS:
 8 |   NAMES: ("DukeMTMC",)
 9 |   TESTS: ("DukeMTMC",)
10 | 
11 | OUTPUT_DIR: logs/dukemtmc/sbs_S50
12 | 


--------------------------------------------------------------------------------
/Deep learning/fast-reid/fast-reid_tutorial/configs/MSMT17/AGW_R101-ibn.yml:
--------------------------------------------------------------------------------
 1 | _BASE_: ../Base-AGW.yml
 2 | 
 3 | MODEL:
 4 |   BACKBONE:
 5 |     DEPTH: 101x
 6 |     WITH_IBN: True
 7 | 
 8 | DATASETS:
 9 |   NAMES: ("MSMT17",)
10 |   TESTS: ("MSMT17",)
11 | 
12 | OUTPUT_DIR: logs/msmt17/agw_R101-ibn
13 | 


--------------------------------------------------------------------------------
/Deep learning/fast-reid/fast-reid_tutorial/configs/MSMT17/AGW_R50-ibn.yml:
--------------------------------------------------------------------------------
 1 | _BASE_: ../Base-AGW.yml
 2 | 
 3 | MODEL:
 4 |   BACKBONE:
 5 |     WITH_IBN: True
 6 | 
 7 | DATASETS:
 8 |   NAMES: ("MSMT17",)
 9 |   TESTS: ("MSMT17",)
10 | 
11 | OUTPUT_DIR: logs/msmt17/agw_R50-ibn
12 | 


--------------------------------------------------------------------------------
/Deep learning/fast-reid/fast-reid_tutorial/configs/MSMT17/AGW_R50.yml:
--------------------------------------------------------------------------------
1 | _BASE_: ../Base-AGW.yml
2 | 
3 | DATASETS:
4 |   NAMES: ("MSMT17",)
5 |   TESTS: ("MSMT17",)
6 | 
7 | OUTPUT_DIR: logs/msmt17/agw_R50
8 | 


--------------------------------------------------------------------------------
/Deep learning/fast-reid/fast-reid_tutorial/configs/MSMT17/AGW_S50.yml:
--------------------------------------------------------------------------------
 1 | _BASE_: ../Base-AGW.yml
 2 | 
 3 | MODEL:
 4 |   BACKBONE:
 5 |     NAME: build_resnest_backbone
 6 | 
 7 | DATASETS:
 8 |   NAMES: ("MSMT17",)
 9 |   TESTS: ("MSMT17",)
10 | 
11 | OUTPUT_DIR: logs/msmt17/agw_S50
12 | 


--------------------------------------------------------------------------------
/Deep learning/fast-reid/fast-reid_tutorial/configs/MSMT17/bagtricks_R101-ibn.yml:
--------------------------------------------------------------------------------
 1 | _BASE_: ../Base-bagtricks.yml
 2 | 
 3 | MODEL:
 4 |   BACKBONE:
 5 |     DEPTH: 101x
 6 |     WITH_IBN: True
 7 | 
 8 | DATASETS:
 9 |   NAMES: ("MSMT17",)
10 |   TESTS: ("MSMT17",)
11 | 
12 | OUTPUT_DIR: logs/msmt17/bagtricks_R101-ibn
13 | 
14 | 


--------------------------------------------------------------------------------
/Deep learning/fast-reid/fast-reid_tutorial/configs/MSMT17/bagtricks_R50-ibn.yml:
--------------------------------------------------------------------------------
 1 | _BASE_: ../Base-bagtricks.yml
 2 | 
 3 | MODEL:
 4 |   BACKBONE:
 5 |     WITH_IBN: True
 6 | 
 7 | DATASETS:
 8 |   NAMES: ("MSMT17",)
 9 |   TESTS: ("MSMT17",)
10 | 
11 | OUTPUT_DIR: logs/msmt17/bagtricks_R50-ibn
12 | 
13 | 


--------------------------------------------------------------------------------
/Deep learning/fast-reid/fast-reid_tutorial/configs/MSMT17/bagtricks_R50.yml:
--------------------------------------------------------------------------------
1 | _BASE_: ../Base-bagtricks.yml
2 | 
3 | DATASETS:
4 |   NAMES: ("MSMT17",)
5 |   TESTS: ("MSMT17",)
6 | 
7 | OUTPUT_DIR: logs/msmt17/bagtricks_R50
8 | 


--------------------------------------------------------------------------------
/Deep learning/fast-reid/fast-reid_tutorial/configs/MSMT17/bagtricks_S50.yml:
--------------------------------------------------------------------------------
 1 | _BASE_: ../Base-bagtricks.yml
 2 | 
 3 | MODEL:
 4 |   BACKBONE:
 5 |     NAME: build_resnest_backbone
 6 | 
 7 | DATASETS:
 8 |   NAMES: ("MSMT17",)
 9 |   TESTS: ("MSMT17",)
10 | 
11 | OUTPUT_DIR: logs/msmt17/bagtricks_S50
12 | 
13 | 


--------------------------------------------------------------------------------
/Deep learning/fast-reid/fast-reid_tutorial/configs/MSMT17/mgn_R50-ibn.yml:
--------------------------------------------------------------------------------
 1 | _BASE_: ../Base-MGN.yml
 2 | 
 3 | MODEL:
 4 |   BACKBONE:
 5 |     WITH_IBN: True
 6 | 
 7 | DATASETS:
 8 |   NAMES: ("MSMT17",)
 9 |   TESTS: ("MSMT17",)
10 | 
11 | OUTPUT_DIR: logs/msmt17/mgn_R50-ibn
12 | 


--------------------------------------------------------------------------------
/Deep learning/fast-reid/fast-reid_tutorial/configs/MSMT17/sbs_R101-ibn.yml:
--------------------------------------------------------------------------------
 1 | _BASE_: ../Base-SBS.yml
 2 | 
 3 | MODEL:
 4 |   BACKBONE:
 5 |     DEPTH: 101x
 6 |     WITH_IBN: True
 7 | 
 8 | DATASETS:
 9 |   NAMES: ("MSMT17",)
10 |   TESTS: ("MSMT17",)
11 | 
12 | OUTPUT_DIR: logs/msmt17/sbs_R101-ibn
13 | 


--------------------------------------------------------------------------------
/Deep learning/fast-reid/fast-reid_tutorial/configs/MSMT17/sbs_R50-ibn.yml:
--------------------------------------------------------------------------------
 1 | _BASE_: ../Base-SBS.yml
 2 | 
 3 | MODEL:
 4 |   BACKBONE:
 5 |     WITH_IBN: True
 6 | 
 7 | DATASETS:
 8 |   NAMES: ("MSMT17",)
 9 |   TESTS: ("MSMT17",)
10 | 
11 | OUTPUT_DIR: logs/msmt17/sbs_R50-ibn
12 | 


--------------------------------------------------------------------------------
/Deep learning/fast-reid/fast-reid_tutorial/configs/MSMT17/sbs_R50.yml:
--------------------------------------------------------------------------------
1 | _BASE_: ../Base-SBS.yml
2 | 
3 | DATASETS:
4 |   NAMES: ("MSMT17",)
5 |   TESTS: ("MSMT17",)
6 | 
7 | OUTPUT_DIR: logs/msmt17/sbs_R50
8 | 


--------------------------------------------------------------------------------
/Deep learning/fast-reid/fast-reid_tutorial/configs/MSMT17/sbs_S50.yml:
--------------------------------------------------------------------------------
 1 | _BASE_: ../Base-SBS.yml
 2 | 
 3 | MODEL:
 4 |   BACKBONE:
 5 |     NAME: build_resnest_backbone
 6 | 
 7 | DATASETS:
 8 |   NAMES: ("MSMT17",)
 9 |   TESTS: ("MSMT17",)
10 | 
11 | OUTPUT_DIR: logs/msmt17/sbs_S50
12 | 


--------------------------------------------------------------------------------
/Deep learning/fast-reid/fast-reid_tutorial/configs/Market1501/AGW_R101-ibn.yml:
--------------------------------------------------------------------------------
 1 | _BASE_: ../Base-AGW.yml
 2 | 
 3 | MODEL:
 4 |   BACKBONE:
 5 |     DEPTH: 101x
 6 |     WITH_IBN: True
 7 | 
 8 | DATASETS:
 9 |   NAMES: ("Market1501",)
10 |   TESTS: ("Market1501",)
11 | 
12 | OUTPUT_DIR: logs/market1501/agw_R101-ibn
13 | 


--------------------------------------------------------------------------------
/Deep learning/fast-reid/fast-reid_tutorial/configs/Market1501/AGW_R50-ibn.yml:
--------------------------------------------------------------------------------
 1 | _BASE_: ../Base-AGW.yml
 2 | 
 3 | MODEL:
 4 |   BACKBONE:
 5 |     WITH_IBN: True
 6 | 
 7 | DATASETS:
 8 |   NAMES: ("Market1501",)
 9 |   TESTS: ("Market1501",)
10 | 
11 | OUTPUT_DIR: logs/market1501/agw_R50-ibn
12 | 


--------------------------------------------------------------------------------
/Deep learning/fast-reid/fast-reid_tutorial/configs/Market1501/AGW_R50.yml:
--------------------------------------------------------------------------------
1 | _BASE_: ../Base-AGW.yml
2 | 
3 | DATASETS:
4 |   NAMES: ("Market1501",)
5 |   TESTS: ("Market1501",)
6 | 
7 | OUTPUT_DIR: logs/market1501/agw_R50
8 | 


--------------------------------------------------------------------------------
/Deep learning/fast-reid/fast-reid_tutorial/configs/Market1501/AGW_S50.yml:
--------------------------------------------------------------------------------
 1 | _BASE_: ../Base-AGW.yml
 2 | 
 3 | MODEL:
 4 |   BACKBONE:
 5 |     NAME: build_resnest_backbone
 6 | 
 7 | DATASETS:
 8 |   NAMES: ("Market1501",)
 9 |   TESTS: ("Market1501",)
10 | 
11 | OUTPUT_DIR: logs/market1501/agw_S50
12 | 


--------------------------------------------------------------------------------
/Deep learning/fast-reid/fast-reid_tutorial/configs/Market1501/bagtricks_R101-ibn.yml:
--------------------------------------------------------------------------------
 1 | _BASE_: ../Base-bagtricks.yml
 2 | 
 3 | MODEL:
 4 |   BACKBONE:
 5 |     DEPTH: 101x
 6 |     WITH_IBN: True
 7 | 
 8 | DATASETS:
 9 |   NAMES: ("Market1501",)
10 |   TESTS: ("Market1501",)
11 | 
12 | OUTPUT_DIR: logs/market1501/bagtricks_R101-ibn
13 | 


--------------------------------------------------------------------------------
/Deep learning/fast-reid/fast-reid_tutorial/configs/Market1501/bagtricks_R50-ibn.yml:
--------------------------------------------------------------------------------
 1 | _BASE_: ../Base-bagtricks.yml
 2 | 
 3 | MODEL:
 4 |   BACKBONE:
 5 |     WITH_IBN: True
 6 | 
 7 | DATASETS:
 8 |   NAMES: ("Market1501",)
 9 |   TESTS: ("Market1501",)
10 | 
11 | OUTPUT_DIR: logs/market1501/bagtricks_R50-ibn
12 | 


--------------------------------------------------------------------------------
/Deep learning/fast-reid/fast-reid_tutorial/configs/Market1501/bagtricks_R50.yml:
--------------------------------------------------------------------------------
 1 | _BASE_: ../Base-bagtricks.yml # 链接父目录下的Base-bagtricks.yml
 2 | 
 3 | DATASETS:
 4 |   NAMES: ("Market1501",) # 数据集路径
 5 |   TESTS: ("Market1501",) # 测试集路径
 6 | 
 7 | OUTPUT_DIR: logs/market1501/bagtricks_R50 # 输出结果路径
 8 | 
 9 | MODEL:
10 |   WEIGHTS: model/market_bot_R50.pth # 预训练模型路径
11 | 


--------------------------------------------------------------------------------
/Deep learning/fast-reid/fast-reid_tutorial/configs/Market1501/bagtricks_S50.yml:
--------------------------------------------------------------------------------
 1 | _BASE_: ../Base-bagtricks.yml
 2 | 
 3 | MODEL:
 4 |   BACKBONE:
 5 |     NAME: build_resnest_backbone
 6 | 
 7 | DATASETS:
 8 |   NAMES: ("Market1501",)
 9 |   TESTS: ("Market1501",)
10 | 
11 | OUTPUT_DIR: logs/market1501/bagtricks_S50
12 | 


--------------------------------------------------------------------------------
/Deep learning/fast-reid/fast-reid_tutorial/configs/Market1501/bagtricks_vit.yml:
--------------------------------------------------------------------------------
 1 | 
 2 | MODEL:
 3 |   META_ARCHITECTURE: Baseline
 4 |   PIXEL_MEAN: [127.5, 127.5, 127.5]
 5 |   PIXEL_STD: [127.5, 127.5, 127.5]
 6 | 
 7 |   BACKBONE:
 8 |     NAME: build_vit_backbone
 9 |     DEPTH: base
10 |     FEAT_DIM: 768
11 |     PRETRAIN: True
12 |     PRETRAIN_PATH: /export/home/lxy/.cache/torch/checkpoints/jx_vit_base_p16_224-80ecf9dd.pth
13 |     STRIDE_SIZE: (16, 16)
14 |     DROP_PATH_RATIO: 0.1
15 |     DROP_RATIO: 0.0
16 |     ATT_DROP_RATE: 0.0
17 | 
18 |   HEADS:
19 |     NAME: EmbeddingHead
20 |     NORM: BN
21 |     WITH_BNNECK: True
22 |     POOL_LAYER: Identity
23 |     NECK_FEAT: before
24 |     CLS_LAYER: Linear
25 | 
26 |   LOSSES:
27 |     NAME: ("CrossEntropyLoss", "TripletLoss",)
28 | 
29 |     CE:
30 |       EPSILON: 0. # no smooth
31 |       SCALE: 1.
32 | 
33 |     TRI:
34 |       MARGIN: 0.0
35 |       HARD_MINING: True
36 |       NORM_FEAT: False
37 |       SCALE: 1.
38 | 
39 | INPUT:
40 |   SIZE_TRAIN: [ 256, 128 ]
41 |   SIZE_TEST: [ 256, 128 ]
42 | 
43 |   REA:
44 |     ENABLED: True
45 |     PROB: 0.5
46 | 
47 |   FLIP:
48 |     ENABLED: True
49 | 
50 |   PADDING:
51 |     ENABLED: True
52 | 
53 | DATALOADER:
54 |   SAMPLER_TRAIN: NaiveIdentitySampler
55 |   NUM_INSTANCE: 4
56 |   NUM_WORKERS: 8
57 | 
58 | SOLVER:
59 |   AMP:
60 |     ENABLED: False
61 |   OPT: SGD
62 |   MAX_EPOCH: 120
63 |   BASE_LR: 0.008
64 |   WEIGHT_DECAY: 0.0001
65 |   IMS_PER_BATCH: 64
66 | 
67 |   SCHED: CosineAnnealingLR
68 |   ETA_MIN_LR: 0.000016
69 | 
70 |   WARMUP_FACTOR: 0.01
71 |   WARMUP_ITERS: 1000
72 | 
73 |   CLIP_GRADIENTS:
74 |     ENABLED: True
75 | 
76 |   CHECKPOINT_PERIOD: 30
77 | 
78 | TEST:
79 |   EVAL_PERIOD: 5
80 |   IMS_PER_BATCH: 128
81 | 
82 | CUDNN_BENCHMARK: True
83 | 
84 | DATASETS:
85 |   NAMES: ("Market1501",)
86 |   TESTS: ("Market1501",)
87 | 
88 | OUTPUT_DIR: logs/market1501/sbs_vit_base
89 | 


--------------------------------------------------------------------------------
/Deep learning/fast-reid/fast-reid_tutorial/configs/Market1501/mgn_R50-ibn.yml:
--------------------------------------------------------------------------------
 1 | _BASE_: ../Base-MGN.yml
 2 | 
 3 | MODEL:
 4 |   BACKBONE:
 5 |     WITH_IBN: True
 6 | 
 7 | DATASETS:
 8 |   NAMES: ("Market1501",)
 9 |   TESTS: ("Market1501",)
10 | 
11 | OUTPUT_DIR: logs/market1501/mgn_R50-ibn
12 | 


--------------------------------------------------------------------------------
/Deep learning/fast-reid/fast-reid_tutorial/configs/Market1501/sbs_R101-ibn.yml:
--------------------------------------------------------------------------------
 1 | _BASE_: ../Base-SBS.yml
 2 | 
 3 | MODEL:
 4 |   BACKBONE:
 5 |     DEPTH: 101x
 6 |     WITH_IBN: True
 7 | 
 8 | DATASETS:
 9 |   NAMES: ("Market1501",)
10 |   TESTS: ("Market1501",)
11 | 
12 | OUTPUT_DIR: logs/market1501/sbs_R101-ibn
13 | 


--------------------------------------------------------------------------------
/Deep learning/fast-reid/fast-reid_tutorial/configs/Market1501/sbs_R50-ibn.yml:
--------------------------------------------------------------------------------
 1 | _BASE_: ../Base-SBS.yml
 2 | 
 3 | MODEL:
 4 |   BACKBONE:
 5 |     WITH_IBN: True
 6 | 
 7 | DATASETS:
 8 |   NAMES: ("Market1501",)
 9 |   TESTS: ("Market1501",)
10 | 
11 | OUTPUT_DIR: logs/market1501/sbs_R50-ibn
12 | 


--------------------------------------------------------------------------------
/Deep learning/fast-reid/fast-reid_tutorial/configs/Market1501/sbs_R50.yml:
--------------------------------------------------------------------------------
1 | _BASE_: ../Base-SBS.yml
2 | 
3 | DATASETS:
4 |   NAMES: ("Market1501",)
5 |   TESTS: ("Market1501",)
6 | 
7 | OUTPUT_DIR: logs/market1501/sbs_R50
8 | 


--------------------------------------------------------------------------------
/Deep learning/fast-reid/fast-reid_tutorial/configs/Market1501/sbs_S50.yml:
--------------------------------------------------------------------------------
 1 | _BASE_: ../Base-SBS.yml
 2 | 
 3 | MODEL:
 4 |   BACKBONE:
 5 |     NAME: build_resnest_backbone
 6 | 
 7 | DATASETS:
 8 |   NAMES: ("Market1501",)
 9 |   TESTS: ("Market1501",)
10 | 
11 | OUTPUT_DIR: logs/market1501/sbs_S50
12 | 


--------------------------------------------------------------------------------
/Deep learning/fast-reid/fast-reid_tutorial/configs/VERIWild/bagtricks_R50-ibn.yml:
--------------------------------------------------------------------------------
 1 | _BASE_: ../Base-bagtricks.yml
 2 | 
 3 | INPUT:
 4 |   SIZE_TRAIN: [256, 256]
 5 |   SIZE_TEST: [256, 256]
 6 | 
 7 | MODEL:
 8 |   BACKBONE:
 9 |     WITH_IBN: True
10 | 
11 |   HEADS:
12 |     POOL_LAYER: GeneralizedMeanPooling
13 | 
14 |   LOSSES:
15 |     TRI:
16 |       HARD_MINING: False
17 |       MARGIN: 0.0
18 | 
19 | DATASETS:
20 |   NAMES: ("VeRiWild",)
21 |   TESTS: ("SmallVeRiWild", "MediumVeRiWild", "LargeVeRiWild",)
22 | 
23 | SOLVER:
24 |   IMS_PER_BATCH: 512 # 512 For 4 GPUs
25 |   MAX_EPOCH: 120
26 |   STEPS: [30, 70, 90]
27 |   WARMUP_ITERS: 5000
28 | 
29 |   CHECKPOINT_PERIOD: 20
30 | 
31 | TEST:
32 |   EVAL_PERIOD: 10
33 |   IMS_PER_BATCH: 128
34 | 
35 | OUTPUT_DIR: logs/veriwild/bagtricks_R50-ibn_4gpu
36 | 


--------------------------------------------------------------------------------
/Deep learning/fast-reid/fast-reid_tutorial/configs/VeRi/sbs_R50-ibn.yml:
--------------------------------------------------------------------------------
 1 | _BASE_: ../Base-SBS.yml
 2 | 
 3 | INPUT:
 4 |   SIZE_TRAIN: [256, 256]
 5 |   SIZE_TEST: [256, 256]
 6 | 
 7 | MODEL:
 8 |   BACKBONE:
 9 |     WITH_IBN: True
10 |     WITH_NL: True
11 | 
12 | SOLVER:
13 |   OPT: SGD
14 |   BASE_LR: 0.01
15 |   ETA_MIN_LR: 7.7e-5
16 | 
17 |   IMS_PER_BATCH: 64
18 |   MAX_EPOCH: 60
19 |   WARMUP_ITERS: 3000
20 |   FREEZE_ITERS: 3000
21 | 
22 |   CHECKPOINT_PERIOD: 10
23 | 
24 | DATASETS:
25 |   NAMES: ("VeRi",)
26 |   TESTS: ("VeRi",)
27 | 
28 | DATALOADER:
29 |   SAMPLER_TRAIN: BalancedIdentitySampler
30 | 
31 | TEST:
32 |   EVAL_PERIOD: 10
33 |   IMS_PER_BATCH: 256
34 | 
35 | OUTPUT_DIR: logs/veri/sbs_R50-ibn
36 | 


--------------------------------------------------------------------------------
/Deep learning/fast-reid/fast-reid_tutorial/configs/VehicleID/bagtricks_R50-ibn.yml:
--------------------------------------------------------------------------------
 1 | _BASE_: ../Base-bagtricks.yml
 2 | 
 3 | INPUT:
 4 |   SIZE_TRAIN: [256, 256]
 5 |   SIZE_TEST: [256, 256]
 6 | 
 7 | MODEL:
 8 |   BACKBONE:
 9 |     WITH_IBN: True
10 |   HEADS:
11 |     POOL_LAYER: GeneralizedMeanPooling
12 | 
13 |   LOSSES:
14 |     TRI:
15 |       HARD_MINING: False
16 |       MARGIN: 0.0
17 | 
18 | DATASETS:
19 |   NAMES: ("VehicleID",)
20 |   TESTS: ("SmallVehicleID", "MediumVehicleID", "LargeVehicleID",)
21 | 
22 | SOLVER:
23 |   BIAS_LR_FACTOR: 1.
24 | 
25 |   IMS_PER_BATCH: 512
26 |   MAX_EPOCH: 60
27 |   STEPS: [30, 50]
28 |   WARMUP_ITERS: 2000
29 | 
30 |   CHECKPOINT_PERIOD: 20
31 | 
32 | TEST:
33 |   EVAL_PERIOD: 20
34 |   IMS_PER_BATCH: 128
35 | 
36 | OUTPUT_DIR: logs/vehicleid/bagtricks_R50-ibn_4gpu
37 | 


--------------------------------------------------------------------------------
/Deep learning/fast-reid/fast-reid_tutorial/datasets/Market-1501-v15.09.15/README.md:
--------------------------------------------------------------------------------
1 | 本文件夹的文件来自于[ Market-1501-v15.09.15](https://pan.baidu.com/s/1ntIi2Op)
2 | 
3 | 文件存放样式如下图所示：
4 | 
5 | ![](img.png)


--------------------------------------------------------------------------------
/Deep learning/fast-reid/fast-reid_tutorial/datasets/Market-1501-v15.09.15/img.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/luohenyueji/Python-Study-Notes/32f4d53f6937dbb7433fa53cb011e4dd64c559c7/Deep learning/fast-reid/fast-reid_tutorial/datasets/Market-1501-v15.09.15/img.png


--------------------------------------------------------------------------------
/Deep learning/fast-reid/fast-reid_tutorial/demo.py:
--------------------------------------------------------------------------------
  1 | # encoding: utf-8
  2 | """
  3 | @author:  liaoxingyu
  4 | @contact: sherlockliao01@gmail.com
  5 | 提取图像的特征，并保存
  6 | """
  7 | 
  8 | import argparse
  9 | import glob
 10 | import os
 11 | import sys
 12 | 
 13 | import torch.nn.functional as F
 14 | import cv2
 15 | import numpy as np
 16 | import tqdm
 17 | from torch.backends import cudnn
 18 | 
 19 | sys.path.append('.')
 20 | 
 21 | from fastreid.config import get_cfg
 22 | from fastreid.utils.logger import setup_logger
 23 | from fastreid.utils.file_io import PathManager
 24 | 
 25 | from predictor import FeatureExtractionDemo
 26 | 
 27 | # import some modules added in project like this below
 28 | # sys.path.append("projects/PartialReID")
 29 | # from partialreid import *
 30 | 
 31 | cudnn.benchmark = True
 32 | setup_logger(name="fastreid")
 33 | 
 34 | 
 35 | # 读取配置文件
 36 | def setup_cfg(args):
 37 |     # load config from file and command-line arguments
 38 |     cfg = get_cfg()
 39 |     # add_partialreid_config(cfg)
 40 |     cfg.merge_from_file(args.config_file)
 41 |     cfg.merge_from_list(args.opts)
 42 |     cfg.freeze()
 43 |     return cfg
 44 | 
 45 | 
 46 | def get_parser():
 47 |     parser = argparse.ArgumentParser(description="Feature extraction with reid models")
 48 |     parser.add_argument(
 49 |         "--config-file",  # config路径，通常包含模型配置文件
 50 |         metavar="FILE",
 51 |         help="path to config file",
 52 |     )
 53 |     parser.add_argument(
 54 |         "--parallel",  # 是否并行
 55 |         action='store_true',
 56 |         help='If use multiprocess for feature extraction.'
 57 |     )
 58 |     parser.add_argument(
 59 |         "--input",  # 输入图像路径
 60 |         nargs="+",
 61 |         help="A list of space separated input images; "
 62 |              "or a single glob pattern such as 'directory/*.jpg'",
 63 |     )
 64 |     parser.add_argument(
 65 |         "--output",  # 输出结果路径
 66 |         default='demo_output',
 67 |         help='path to save features'
 68 |     )
 69 |     parser.add_argument(
 70 |         "--opts",
 71 |         help="Modify config options using the command-line 'KEY VALUE' pairs",
 72 |         default=[],
 73 |         nargs=argparse.REMAINDER,
 74 |     )
 75 |     return parser
 76 | 
 77 | 
 78 | def postprocess(features):
 79 |     # Normalize feature to compute cosine distance
 80 |     features = F.normalize(features)  # 特征归一化
 81 |     features = features.cpu().data.numpy()
 82 |     return features
 83 | 
 84 | 
 85 | if __name__ == '__main__':
 86 |     args = get_parser().parse_args()  # 解析输入参数
 87 |     # 调试使用，使用的时候删除下面代码
 88 |     # ---
 89 |     args.config_file = "./configs/Market1501/bagtricks_R50.yml"  # config路径
 90 |     args.input = "./datasets/Market-1501-v15.09.15/query/*.jpg"  # 图像路径
 91 |     # ---
 92 | 
 93 |     cfg = setup_cfg(args)  # 读取cfg文件
 94 |     demo = FeatureExtractionDemo(cfg, parallel=args.parallel)  # 加载特征提取器，也就是加载模型
 95 | 
 96 |     PathManager.mkdirs(args.output)  # 创建输出路径
 97 |     if args.input:
 98 |         if PathManager.isdir(args.input[0]):  # 判断输入的是否为路径
 99 |             # args.input = glob.glob(os.path.expanduser(args.input[0])) # 原来的代码有问题
100 |             args.input = glob.glob(os.path.expanduser(args.input))  # 获取输入路径下所有的文件路径
101 |             assert args.input, "The input path(s) was not found"
102 |         for path in tqdm.tqdm(args.input):  # 逐张处理
103 |             img = cv2.imread(path)
104 |             feat = demo.run_on_image(img)  # 提取图像特征
105 |             feat = postprocess(feat)  # 后处理主要是特征归一化
106 |             np.save(os.path.join(args.output, os.path.basename(path).split('.')[0] + '.npy'), feat)  # 保存图像对应的特征，以便下次使用
107 | 


--------------------------------------------------------------------------------
/Deep learning/fast-reid/fast-reid_tutorial/fastreid/README.md:
--------------------------------------------------------------------------------
1 | 本文件夹的文件来自于[fast-reid/fastreid](https://github.com/JDAI-CV/fast-reid/tree/master/fastreid)
2 | 
3 | 文件存放样式如下图所示：
4 | 
5 | ![](img.png)


--------------------------------------------------------------------------------
/Deep learning/fast-reid/fast-reid_tutorial/fastreid/img.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/luohenyueji/Python-Study-Notes/32f4d53f6937dbb7433fa53cb011e4dd64c559c7/Deep learning/fast-reid/fast-reid_tutorial/fastreid/img.png


--------------------------------------------------------------------------------
/Deep learning/fast-reid/fast-reid_tutorial/model/README.md:
--------------------------------------------------------------------------------
1 | 本文件夹的文件来自于[market_bot_R50.pth](https://github.com/JDAI-CV/fast-reid/releases/download/v0.1.1/market_bot_R50.pth)
2 | 
3 | 文件存放样式如下图所示：
4 | 
5 | ![](img.png)


--------------------------------------------------------------------------------
/Deep learning/fast-reid/fast-reid_tutorial/model/img.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/luohenyueji/Python-Study-Notes/32f4d53f6937dbb7433fa53cb011e4dd64c559c7/Deep learning/fast-reid/fast-reid_tutorial/model/img.png


--------------------------------------------------------------------------------
/Deep learning/fast-reid/fast-reid_tutorial/predictor.py:
--------------------------------------------------------------------------------
  1 | # encoding: utf-8
  2 | """
  3 | @author:  xingyu liao
  4 | @contact: sherlockliao01@gmail.com
  5 | 模型加载文件
  6 | """
  7 | 
  8 | import atexit
  9 | import bisect
 10 | from collections import deque
 11 | 
 12 | import cv2
 13 | import torch
 14 | import torch.multiprocessing as mp
 15 | 
 16 | from fastreid.engine import DefaultPredictor
 17 | 
 18 | try:
 19 |     mp.set_start_method('spawn')
 20 | except RuntimeError:
 21 |     pass
 22 | 
 23 | # 加载特征提取器
 24 | class FeatureExtractionDemo(object):
 25 |     def __init__(self, cfg, parallel=False):
 26 |         """
 27 |         Args:
 28 |             cfg (CfgNode):
 29 |             parallel (bool) whether to run the model in different processes from visualization.:
 30 |                 Useful since the visualization logic can be slow.
 31 |         """
 32 |         self.cfg = cfg
 33 |         self.parallel = parallel
 34 | 
 35 |         if parallel:
 36 |             self.num_gpus = torch.cuda.device_count()
 37 |             self.predictor = AsyncPredictor(cfg, self.num_gpus)
 38 |         else:
 39 |             self.predictor = DefaultPredictor(cfg)
 40 | 
 41 |     def run_on_image(self, original_image):
 42 |         """
 43 | 
 44 |         Args:
 45 |             original_image (np.ndarray): an image of shape (H, W, C) (in BGR order).
 46 |                 This is the format used by OpenCV.
 47 | 
 48 |         Returns:
 49 |             predictions (np.ndarray): normalized feature of the model.
 50 |         """
 51 |         # the model expects RGB inputs opencv输入图像的顺序是bgr，这里更换为rgb顺序
 52 |         original_image = original_image[:, :, ::-1]
 53 |         # Apply pre-processing to image. 图像大小重置
 54 |         image = cv2.resize(original_image, tuple(self.cfg.INPUT.SIZE_TEST[::-1]), interpolation=cv2.INTER_CUBIC)
 55 |         # Make shape with a new batch dimension which is adapted for
 56 |         # network input 制作模型输入
 57 |         image = torch.as_tensor(image.astype("float32").transpose(2, 0, 1))[None]
 58 |         predictions = self.predictor(image) # 获取图像特征
 59 |         return predictions
 60 | 
 61 |     def run_on_loader(self, data_loader):
 62 |         if self.parallel:
 63 |             buffer_size = self.predictor.default_buffer_size
 64 | 
 65 |             batch_data = deque()
 66 | 
 67 |             for cnt, batch in enumerate(data_loader):
 68 |                 batch_data.append(batch)
 69 |                 self.predictor.put(batch["images"])
 70 | 
 71 |                 if cnt >= buffer_size:
 72 |                     batch = batch_data.popleft()
 73 |                     predictions = self.predictor.get()
 74 |                     yield predictions, batch["targets"].cpu().numpy(), batch["camids"].cpu().numpy()
 75 | 
 76 |             while len(batch_data):
 77 |                 batch = batch_data.popleft()
 78 |                 predictions = self.predictor.get()
 79 |                 yield predictions, batch["targets"].cpu().numpy(), batch["camids"].cpu().numpy()
 80 |         else:
 81 |             for batch in data_loader:
 82 |                 predictions = self.predictor(batch["images"])
 83 |                 yield predictions, batch["targets"].cpu().numpy(), batch["camids"].cpu().numpy()
 84 | 
 85 | 
 86 | class AsyncPredictor:
 87 |     """
 88 |     A predictor that runs the model asynchronously, possibly on >1 GPUs.
 89 |     Because when the amount of data is large.
 90 |     """
 91 | 
 92 |     class _StopToken:
 93 |         pass
 94 | 
 95 |     class _PredictWorker(mp.Process):
 96 |         def __init__(self, cfg, task_queue, result_queue):
 97 |             self.cfg = cfg
 98 |             self.task_queue = task_queue
 99 |             self.result_queue = result_queue
100 |             super().__init__()
101 | 
102 |         def run(self):
103 |             predictor = DefaultPredictor(self.cfg)
104 | 
105 |             while True:
106 |                 task = self.task_queue.get()
107 |                 if isinstance(task, AsyncPredictor._StopToken):
108 |                     break
109 |                 idx, data = task
110 |                 result = predictor(data)
111 |                 self.result_queue.put((idx, result))
112 | 
113 |     def __init__(self, cfg, num_gpus: int = 1):
114 |         """
115 | 
116 |         Args:
117 |             cfg (CfgNode):
118 |             num_gpus (int): if 0, will run on CPU
119 |         """
120 |         num_workers = max(num_gpus, 1)
121 |         self.task_queue = mp.Queue(maxsize=num_workers * 3)
122 |         self.result_queue = mp.Queue(maxsize=num_workers * 3)
123 |         self.procs = []
124 |         for gpuid in range(max(num_gpus, 1)):
125 |             cfg = cfg.clone()
126 |             cfg.defrost()
127 |             cfg.MODEL.DEVICE = "cuda:{}".format(gpuid) if num_gpus > 0 else "cpu"
128 |             self.procs.append(
129 |                 AsyncPredictor._PredictWorker(cfg, self.task_queue, self.result_queue)
130 |             )
131 | 
132 |         self.put_idx = 0
133 |         self.get_idx = 0
134 |         self.result_rank = []
135 |         self.result_data = []
136 | 
137 |         for p in self.procs:
138 |             p.start()
139 | 
140 |         atexit.register(self.shutdown)
141 | 
142 |     def put(self, image):
143 |         self.put_idx += 1
144 |         self.task_queue.put((self.put_idx, image))
145 | 
146 |     def get(self):
147 |         self.get_idx += 1
148 |         if len(self.result_rank) and self.result_rank[0] == self.get_idx:
149 |             res = self.result_data[0]
150 |             del self.result_data[0], self.result_rank[0]
151 |             return res
152 | 
153 |         while True:
154 |             # Make sure the results are returned in the correct order
155 |             idx, res = self.result_queue.get()
156 |             if idx == self.get_idx:
157 |                 return res
158 |             insert = bisect.bisect(self.result_rank, idx)
159 |             self.result_rank.insert(insert, idx)
160 |             self.result_data.insert(insert, res)
161 | 
162 |     def __len__(self):
163 |         return self.put_idx - self.get_idx
164 | 
165 |     def __call__(self, image):
166 |         self.put(image)
167 |         return self.get()
168 | 
169 |     def shutdown(self):
170 |         for _ in self.procs:
171 |             self.task_queue.put(AsyncPredictor._StopToken())
172 | 
173 |     @property
174 |     def default_buffer_size(self):
175 |         return len(self.procs) * 5
176 | 


--------------------------------------------------------------------------------
/Deep learning/fast-reid/fast-reid_tutorial/train_net.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # encoding: utf-8
 3 | """
 4 | @author:  sherlock
 5 | @contact: sherlockliao01@gmail.com
 6 | 模型训练与测试封装版代码
 7 | """
 8 | 
 9 | import sys
10 | 
11 | sys.path.append('.')
12 | 
13 | from fastreid.config import get_cfg
14 | from fastreid.engine import DefaultTrainer, default_argument_parser, default_setup, launch
15 | from fastreid.utils.checkpoint import Checkpointer
16 | 
17 | 
18 | # 读取配置文件
19 | def setup(args):
20 |     """
21 |     Create configs and perform basic setups.
22 |     """
23 |     cfg = get_cfg()
24 |     cfg.merge_from_file(args.config_file)
25 |     cfg.merge_from_list(args.opts)
26 |     cfg.freeze()
27 |     default_setup(cfg, args)
28 |     return cfg
29 | 
30 | 
31 | def main(args):
32 |     cfg = setup(args)
33 |     # 模型测试
34 |     if args.eval_only:
35 |         cfg.defrost()
36 |         cfg.MODEL.BACKBONE.PRETRAIN = False
37 |         model = DefaultTrainer.build_model(cfg)
38 |         # 加载预训练模型
39 |         Checkpointer(model).load(cfg.MODEL.WEIGHTS)  # load trained model
40 | 
41 |         res = DefaultTrainer.test(cfg, model)
42 |         return res
43 |     # 模型训练
44 |     trainer = DefaultTrainer(cfg)
45 | 
46 |     trainer.resume_or_load(resume=args.resume)
47 |     return trainer.train()
48 | 
49 | 
50 | if __name__ == "__main__":
51 |     args = default_argument_parser().parse_args()
52 |     # 调试使用，使用的时候删除下面代码
53 |     # ---
54 |     args.config_file = "./configs/Market1501/bagtricks_R50.yml"  # config路径
55 |     args.eval_only = True  # 是否测试模型,False表示训练模型，True表示测试模型
56 |     # ---
57 | 
58 |     print("Command Line Args:", args)
59 |     launch(
60 |         main,
61 |         args.num_gpus,
62 |         num_machines=args.num_machines,
63 |         machine_rank=args.machine_rank,
64 |         dist_url=args.dist_url,
65 |         args=(args,),
66 |     )
67 | 


--------------------------------------------------------------------------------
/Deep learning/fast-reid/fast-reid_tutorial/visualize_result.py:
--------------------------------------------------------------------------------
  1 | # encoding: utf-8
  2 | """
  3 | @author:  xingyu liao
  4 | @contact: sherlockliao01@gmail.com
  5 | 可视化特征提取结果
  6 | """
  7 | 
  8 | import argparse
  9 | import logging
 10 | import sys
 11 | 
 12 | import numpy as np
 13 | import torch
 14 | import tqdm
 15 | from torch.backends import cudnn
 16 | 
 17 | sys.path.append('.')
 18 | 
 19 | import torch.nn.functional as F
 20 | from fastreid.evaluation.rank import evaluate_rank
 21 | from fastreid.config import get_cfg
 22 | from fastreid.utils.logger import setup_logger
 23 | from fastreid.data import build_reid_test_loader
 24 | from predictor import FeatureExtractionDemo
 25 | from fastreid.utils.visualizer import Visualizer
 26 | 
 27 | # import some modules added in project
 28 | # for example, add partial reid like this below
 29 | # sys.path.append("projects/PartialReID")
 30 | # from partialreid import *
 31 | 
 32 | cudnn.benchmark = True
 33 | setup_logger(name="fastreid")
 34 | 
 35 | logger = logging.getLogger('fastreid.visualize_result')
 36 | 
 37 | 
 38 | # 读取配置文件
 39 | def setup_cfg(args):
 40 |     # load config from file and command-line arguments
 41 |     cfg = get_cfg()
 42 |     # add_partialreid_config(cfg)
 43 |     cfg.merge_from_file(args.config_file)
 44 |     cfg.merge_from_list(args.opts)
 45 |     cfg.freeze()
 46 |     return cfg
 47 | 
 48 | 
 49 | def get_parser():
 50 |     parser = argparse.ArgumentParser(description="Feature extraction with reid models")
 51 |     parser.add_argument(
 52 |         "--config-file",  # config路径，通常包含模型配置文件
 53 |         metavar="FILE",
 54 |         help="path to config file",
 55 |     )
 56 |     parser.add_argument(
 57 |         '--parallel',  # 是否并行
 58 |         action='store_true',
 59 |         help='if use multiprocess for feature extraction.'
 60 |     )
 61 |     parser.add_argument(
 62 |         "--dataset-name",  # 数据集名字
 63 |         help="a test dataset name for visualizing ranking list."
 64 |     )
 65 |     parser.add_argument(
 66 |         "--output",  # 输出结果路径
 67 |         default="./vis_rank_list",
 68 |         help="a file or directory to save rankling list result.",
 69 | 
 70 |     )
 71 |     parser.add_argument(
 72 |         "--vis-label",  # 输出结果是否查看
 73 |         action='store_true',
 74 |         help="if visualize label of query instance"
 75 |     )
 76 |     parser.add_argument(
 77 |         "--num-vis",  # 挑选多少张图像用于结果展示
 78 |         default=1000,
 79 |         help="number of query images to be visualized",
 80 |     )
 81 |     parser.add_argument(
 82 |         "--rank-sort",  # 结果展示是相似度排序方式，默认从小到大排序
 83 |         default="ascending",
 84 |         help="rank order of visualization images by AP metric",
 85 |     )
 86 |     parser.add_argument(
 87 |         "--label-sort",  # label结果展示是相似度排序方式，默认从小到大排序
 88 |         default="ascending",
 89 |         help="label order of visualization images by cosine similarity metric",
 90 |     )
 91 |     parser.add_argument(
 92 |         "--max-rank",  # 显示topk的结果，默认显示前10个结果
 93 |         default=5,
 94 |         help="maximum number of rank list to be visualized",
 95 |     )
 96 |     parser.add_argument(
 97 |         "--opts",
 98 |         help="Modify config options using the command-line 'KEY VALUE' pairs",
 99 |         default=[],
100 |         nargs=argparse.REMAINDER,
101 |     )
102 |     return parser
103 | 
104 | 
105 | if __name__ == '__main__':
106 |     args = get_parser().parse_args()
107 |     # 调试使用，使用的时候删除下面代码
108 |     # ---
109 |     args.config_file = "./configs/Market1501/bagtricks_R50.yml"  # config路径
110 |     args.dataset_name = 'Market1501'  # 数据集名字
111 |     args.vis_label = False  # 是否显示正确label结果
112 |     args.rank_sort = 'descending'  # 从大到小显示关联结果
113 |     args.label_sort = 'descending'  # 从大到小显示关联结果
114 |     # ---
115 | 
116 |     cfg = setup_cfg(args)
117 |     # 可以直接在代码中设置cfg中设置模型路径
118 |     # cfg["MODEL"]["WEIGHTS"] = './configs/Market1501/bagtricks_R50.yml'
119 |     test_loader, num_query = build_reid_test_loader(cfg, dataset_name=args.dataset_name)  # 创建测试数据集
120 |     demo = FeatureExtractionDemo(cfg, parallel=args.parallel)  # 加载特征提取器，也就是加载模型
121 | 
122 |     logger.info("Start extracting image features")
123 |     feats = []  # 图像特征，用于保存每个行人的图像特征
124 |     pids = []  # 行人id，用于保存每个行人的id
125 |     camids = []  # 拍摄的摄像头，行人出现的摄像头id
126 |     # 逐张保存读入行人图像，并保存相关信息
127 |     for (feat, pid, camid) in tqdm.tqdm(demo.run_on_loader(test_loader), total=len(test_loader)):
128 |         feats.append(feat)
129 |         pids.extend(pid)
130 |         camids.extend(camid)
131 | 
132 |     feats = torch.cat(feats, dim=0)  # 将feats转换为tensor的二维向量，向量维度为[图像数，特征维度]
133 |     # 这里把query和gallery数据放在一起了，需要切分query和gallery的数据
134 |     q_feat = feats[:num_query]
135 |     g_feat = feats[num_query:]
136 |     q_pids = np.asarray(pids[:num_query])
137 |     g_pids = np.asarray(pids[num_query:])
138 |     q_camids = np.asarray(camids[:num_query])
139 |     g_camids = np.asarray(camids[num_query:])
140 | 
141 |     # compute cosine distance 计算余弦距离
142 |     q_feat = F.normalize(q_feat, p=2, dim=1)
143 |     g_feat = F.normalize(g_feat, p=2, dim=1)
144 |     distmat = 1 - torch.mm(q_feat, g_feat.t())  # 这里distmat表示两张图像的距离，越小越接近
145 |     distmat = distmat.numpy()
146 | 
147 |     # 计算各种评价指标 cmc[0]就是top1精度，应该是93%左右，这里精度会有波动
148 |     logger.info("Computing APs for all query images ...")
149 |     cmc, all_ap, all_inp = evaluate_rank(distmat, q_pids, g_pids, q_camids, g_camids)
150 |     logger.info("Finish computing APs for all query images!")
151 | 
152 |     visualizer = Visualizer(test_loader.dataset)  # 创建Visualizer类
153 |     visualizer.get_model_output(all_ap, distmat, q_pids, g_pids, q_camids, g_camids)  # 保存结果
154 | 
155 |     logger.info("Start saving ROC curve ...")  # 保存ROC曲线
156 |     fpr, tpr, pos, neg = visualizer.vis_roc_curve(args.output)
157 |     visualizer.save_roc_info(args.output, fpr, tpr, pos, neg)
158 |     logger.info("Finish saving ROC curve!")
159 | 
160 |     logger.info("Saving rank list result ...")  # 保存部分查询图像的关联结果，按照顺序排列
161 |     query_indices = visualizer.vis_rank_list(args.output, args.vis_label, args.num_vis,
162 |                                              args.rank_sort, args.label_sort, args.max_rank)
163 |     logger.info("Finish saving rank list results!")
164 | 


--------------------------------------------------------------------------------
/Deep learning/基于切片辅助超推理库SAHI优化小目标识别/image/sliced_inference.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/luohenyueji/Python-Study-Notes/32f4d53f6937dbb7433fa53cb011e4dd64c559c7/Deep learning/基于切片辅助超推理库SAHI优化小目标识别/image/sliced_inference.gif


--------------------------------------------------------------------------------
/Deep learning/基于切片辅助超推理库SAHI优化小目标识别/image/small-vehicles1.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/luohenyueji/Python-Study-Notes/32f4d53f6937dbb7433fa53cb011e4dd64c559c7/Deep learning/基于切片辅助超推理库SAHI优化小目标识别/image/small-vehicles1.jpeg


--------------------------------------------------------------------------------
/Deep learning/基于切片辅助超推理库SAHI优化小目标识别/image/terrain2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/luohenyueji/Python-Study-Notes/32f4d53f6937dbb7433fa53cb011e4dd64c559c7/Deep learning/基于切片辅助超推理库SAHI优化小目标识别/image/terrain2.png


--------------------------------------------------------------------------------
/Deep learning/基于切片辅助超推理库SAHI优化小目标识别/image/terrain2_coco.json:
--------------------------------------------------------------------------------
  1 | {
  2 |     "images": [
  3 |         {
  4 |             "height": 682,
  5 |             "width": 1024,
  6 |             "id": 1,
  7 |             "file_name": "terrain2.png"
  8 |         }
  9 |     ],
 10 |     "categories": [
 11 |         {
 12 |             "supercategory": "car",
 13 |             "id": 1,
 14 |             "name": "car"
 15 |         }
 16 |     ],
 17 |     "annotations": [
 18 |         {
 19 |             "iscrowd": 0,
 20 |             "image_id": 1,
 21 |             "bbox": [
 22 |                 218.0,
 23 |                 448.0,
 24 |                 222.0,
 25 |                 161.0
 26 |             ],
 27 |             "segmentation": [
 28 |                 [
 29 |                     218.0,
 30 |                     448.0,
 31 |                     440.0,
 32 |                     448.0,
 33 |                     440.0,
 34 |                     609.0,
 35 |                     218.0,
 36 |                     609.0
 37 |                 ]
 38 |             ],
 39 |             "category_id": 1,
 40 |             "id": 1,
 41 |             "area": 698368
 42 |         },
 43 |         {
 44 |             "iscrowd": 0,
 45 |             "image_id": 1,
 46 |             "bbox": [
 47 |                 501.0,
 48 |                 451.0,
 49 |                 121.0,
 50 |                 92.0
 51 |             ],
 52 |             "segmentation": [
 53 |                 [
 54 |                     501.0,
 55 |                     451.0,
 56 |                     622.0,
 57 |                     451.0,
 58 |                     622.0,
 59 |                     543.0,
 60 |                     501.0,
 61 |                     543.0
 62 |                 ]
 63 |             ],
 64 |             "category_id": 1,
 65 |             "id": 2,
 66 |             "area": 698368
 67 |         },
 68 |         {
 69 |             "iscrowd": 0,
 70 |             "image_id": 1,
 71 |             "bbox": [
 72 |                 634.0,
 73 |                 437.0,
 74 |                 81.0,
 75 |                 56.0
 76 |             ],
 77 |             "segmentation": [
 78 |                 [
 79 |                     634.0,
 80 |                     437.0,
 81 |                     715.0,
 82 |                     437.0,
 83 |                     715.0,
 84 |                     493.0,
 85 |                     634.0,
 86 |                     493.0
 87 |                 ]
 88 |             ],
 89 |             "category_id": 1,
 90 |             "id": 3,
 91 |             "area": 698368
 92 |         },
 93 |         {
 94 |             "iscrowd": 0,
 95 |             "image_id": 1,
 96 |             "bbox": [
 97 |                 725.0,
 98 |                 423.0,
 99 |                 70.0,
100 |                 51.0
101 |             ],
102 |             "segmentation": [
103 |                 [
104 |                     725.0,
105 |                     423.0,
106 |                     795.0,
107 |                     423.0,
108 |                     795.0,
109 |                     474.0,
110 |                     725.0,
111 |                     474.0
112 |                 ]
113 |             ],
114 |             "category_id": 1,
115 |             "id": 4,
116 |             "area": 698368
117 |         },
118 |         {
119 |             "iscrowd": 0,
120 |             "image_id": 1,
121 |             "bbox": [
122 |                 791.0,
123 |                 404.0,
124 |                 40.0,
125 |                 47.0
126 |             ],
127 |             "segmentation": [
128 |                 [
129 |                     791.0,
130 |                     404.0,
131 |                     831.0,
132 |                     404.0,
133 |                     831.0,
134 |                     451.0,
135 |                     791.0,
136 |                     451.0
137 |                 ]
138 |             ],
139 |             "category_id": 1,
140 |             "id": 5,
141 |             "area": 698368
142 |         }
143 |     ]
144 | }
145 | 


--------------------------------------------------------------------------------
/Documents/Yellowbrick使用笔记/Yellowbrick使用笔记2-模型选择.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {
  6 |     "collapsed": false
  7 |    },
  8 |    "source": [
  9 |     "在本教程中，我们将查看各种Scikit Learn模型的分数，并使用Yellowbrick的可视化诊断工具对它们进行比较，以便为我们的数据选择最佳的模型。"
 10 |    ]
 11 |   },
 12 |   {
 13 |    "cell_type": "markdown",
 14 |    "metadata": {
 15 |     "collapsed": false
 16 |    },
 17 |    "source": [
 18 |     "# 1 使用说明\n",
 19 |     "## 1.1 模型选择三原则\n",
 20 |     "关于机器学习的讨论通常以单一的模型选择为特点。不管是logistic回归、随机森林、贝叶斯方法，还是人工神经网络，机器学习的实践者通常很快就能表达他们的偏好。原因主要是历史原因。尽管现代的第三方机器学习库使得多个模型的部署变得几乎微不足道，但传统上，即使是其中一个算法的应用和调整都需要多年的研究。因此，机器学习的实践者往往对特定的（可能更熟悉的）模型有强烈的偏好。\n",
 21 |     "然而，模型选择要比简单地选择“正确”或“错误”算法更为微妙。实际上，工作流程包括：\n",
 22 |     "+ 选择和/或工程最小和最具预测性的特征集\n",
 23 |     "+ 从模型族中选择一组算法\n",
 24 |     "+ 调整算法超参数以优化性能\n",
 25 |     "\n",
 26 |     "最近，通过网格搜索方法、标准化的api和基于GUI的应用程序，这个工作流的大部分已经实现了自动化。然而，在实践中，人类的直觉和指导比穷尽搜索更有效地磨练质量模型。通过可视化模型选择过程，数据科学家可以转向最终的、可解释的模型，并避免陷阱。\n",
 27 |     "\n",
 28 |     "Yellowbrick库是一种用于机器学习的诊断可视化平台，它使数据科学家可以控制模型的选择过程。Yellowbrick通过新的核心对象：Visualizer扩展了Scikit-Learn API。可视化工具允许在Scikit-Learn流水线过程中对可视模型进行拟合和转换，从而在整个高维数据转换过程中提供可视化诊断。\n",
 29 |     "\n"
 30 |    ]
 31 |   },
 32 |   {
 33 |    "cell_type": "markdown",
 34 |    "metadata": {
 35 |     "collapsed": false
 36 |    },
 37 |    "source": [
 38 |     "## 1.2 关于数据\n",
 39 |     "本教程使用Yellowbrick 示例数据集模块中的蘑菇数据。我们的目标是根据蘑菇的特征来预测蘑菇是否有毒或可食用。蘑菇数据的YB版本不同于[UCI机器学习存储库](http://archive.ics.uci.edu/ml/)中的蘑菇数据集。Yellowbrick版本已被有意修改，使建模更具挑战性。这些数据包括对蘑菇和麻风菌科23种有鳃蘑菇的假设样本的描述。每一个物种都被确定为绝对可食用，绝对有毒，或未知的食用性和不推荐（后一类是与有毒的一类相结合）。\n",
 40 |     "\n",
 41 |     "Yellowbrick的数据包含3个属性的信息和8124个蘑菇实例的目标值（4208个可食，3916个有毒）。\n",
 42 |     "\n",
 43 |     "让我们加载数据：\n",
 44 |     "\n",
 45 |     "\n"
 46 |    ]
 47 |   },
 48 |   {
 49 |    "cell_type": "code",
 50 |    "execution_count": 7,
 51 |    "metadata": {
 52 |     "collapsed": false
 53 |    },
 54 |    "outputs": [
 55 |     {
 56 |      "name": "stdout",
 57 |      "output_type": "stream",
 58 |      "text": [
 59 |       "    shape surface   color\n",
 60 |       "0  convex  smooth  yellow\n",
 61 |       "1    bell  smooth   white\n",
 62 |       "2  convex   scaly   white\n",
 63 |       "3  convex  smooth    gray\n",
 64 |       "4  convex   scaly  yellow\n"
 65 |      ]
 66 |     }
 67 |    ],
 68 |    "source": [
 69 |     "from yellowbrick.datasets import load_mushroom\r\n",
 70 |     "\r\n",
 71 |     "X, y = load_mushroom()\r\n",
 72 |     "# inspect the first five rows\r\n",
 73 |     "# 查看前五行数据\r\n",
 74 |     "print(X[:5]) "
 75 |    ]
 76 |   },
 77 |   {
 78 |    "cell_type": "markdown",
 79 |    "metadata": {
 80 |     "collapsed": false
 81 |    },
 82 |    "source": [
 83 |     "## 1.3 特征提取\n",
 84 |     "我们的数据（包括目标）是分类的。我们将需要将这些值更改为数字值以进行机器学习。为了从数据集中提取数据，我们必须使用scikit-learn转换器将输入数据集转换为适合模型的数据。幸运的是，scikit-learn确实提供了将类别标签转换为数字整数的转换器：[sklearn.preprocessing.LabelEncoder](http://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.LabelEncoder.html)和[sklearn.preprocessing.OneHotEncoder](http://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.OneHotEncoder.html)。\n",
 85 |     "\n",
 86 |     "我们将结合使用scikit-learn的Pipeline对象。\n",
 87 |     "```\n",
 88 |     "from sklearn.pipeline import Pipeline\n",
 89 |     "from sklearn.preprocessing import OneHotEncoder, LabelEncoder\n",
 90 |     "\n",
 91 |     "# Label-encode targets before modeling\n",
 92 |     "# 标记编码目标\n",
 93 |     "y = LabelEncoder().fit_transform(y)\n",
 94 |     "\n",
 95 |     "# One-hot encode columns before modeling\n",
 96 |     "# 建立一个热编码建模器\n",
 97 |     "model = Pipeline([\n",
 98 |     " ('one_hot_encoder', OneHotEncoder()),\n",
 99 |     " ('estimator', estimator)\n",
100 |     "])\n",
101 |     "```"
102 |    ]
103 |   },
104 |   {
105 |    "cell_type": "markdown",
106 |    "metadata": {
107 |     "collapsed": false
108 |    },
109 |    "source": [
110 |     "## 1.4 建模与评估\n",
111 |     "### 1.4.1 评估分类器的通用指标\n",
112 |     "\n",
113 |     "精确度是正确阳性结果的数量除以所有阳性结果的数量（例如，我们预测的蘑菇中实际有多少可以食用？）。\n",
114 |     "召回率是正确的阳性结果数除以应该返回的阳性结果数（例如，我们准确预测了多少有毒的蘑菇是有毒的？）。\n",
115 |     "F1成绩是测试准确性的一个衡量标准。它同时考虑了测试的精确性和召回率来计算分数。F1分数可以解释为精确度和召回率的加权平均值，F1分数在1时达到最佳值，最差值在0时达到最差值。\n",
116 |     "\n",
117 |     "其计算公式如下：\n",
118 |     "```\n",
119 |     "precision = true positives / (true positives + false positives)\n",
120 |     "\n",
121 |     "recall = true positives / (false negatives + true positives)\n",
122 |     "\n",
123 |     "F1 score = 2 * ((precision * recall) / (precision + recall))\n",
124 |     "```\n",
125 |     "\n",
126 |     "现在我们准备做出一些预测！\n",
127 |     "\n",
128 |     "让我们建立一种评估多个估计量的方法-首先使用传统的数字评分（稍后将与Yellowbrick库中的某些视觉诊断进行比较）。"
129 |    ]
130 |   },
131 |   {
132 |    "cell_type": "code",
133 |    "execution_count": 8,
134 |    "metadata": {
135 |     "collapsed": false
136 |    },
137 |    "outputs": [
138 |     {
139 |      "name": "stdout",
140 |      "output_type": "stream",
141 |      "text": [
142 |       "SVC: 0.6624286455630514\n",
143 |       "NuSVC: 0.6726016476215785\n",
144 |       "LinearSVC: 0.6583804143126177\n",
145 |       "SGDClassifier: 0.6730980583709855\n",
146 |       "KNeighborsClassifier: 0.6581185045215279\n",
147 |       "LogisticRegression: 0.6580434509606933\n"
148 |      ]
149 |     },
150 |     {
151 |      "name": "stderr",
152 |      "output_type": "stream",
153 |      "text": [
154 |       "/opt/conda/envs/python35-paddle120-env/lib/python3.7/site-packages/sklearn/linear_model/logistic.py:757: ConvergenceWarning: lbfgs failed to converge. Increase the number of iterations.\n",
155 |       "  \"of iterations.\", ConvergenceWarning)\n"
156 |      ]
157 |     },
158 |     {
159 |      "name": "stdout",
160 |      "output_type": "stream",
161 |      "text": [
162 |       "LogisticRegressionCV: 0.6583804143126177\n",
163 |       "BaggingClassifier: 0.6871364804544838\n",
164 |       "ExtraTreesClassifier: 0.6871364804544838\n",
165 |       "RandomForestClassifier: 0.6873056644585642\n"
166 |      ]
167 |     }
168 |    ],
169 |    "source": [
170 |     "from sklearn.metrics import f1_score\r\n",
171 |     "from sklearn.pipeline import Pipeline\r\n",
172 |     "from sklearn.svm import LinearSVC, NuSVC, SVC\r\n",
173 |     "from sklearn.neighbors import KNeighborsClassifier\r\n",
174 |     "from sklearn.preprocessing import OneHotEncoder, LabelEncoder\r\n",
175 |     "from sklearn.linear_model import LogisticRegressionCV, LogisticRegression, SGDClassifier\r\n",
176 |     "from sklearn.ensemble import BaggingClassifier, ExtraTreesClassifier, RandomForestClassifier\r\n",
177 |     "\r\n",
178 |     "\r\n",
179 |     "models = [\r\n",
180 |     "    SVC(gamma='auto'), NuSVC(gamma='auto'), LinearSVC(),\r\n",
181 |     "    SGDClassifier(max_iter=100, tol=1e-3), KNeighborsClassifier(),\r\n",
182 |     "    LogisticRegression(solver='lbfgs'), LogisticRegressionCV(cv=3),\r\n",
183 |     "    BaggingClassifier(), ExtraTreesClassifier(n_estimators=300),\r\n",
184 |     "    RandomForestClassifier(n_estimators=300)\r\n",
185 |     "]\r\n",
186 |     "\r\n",
187 |     "\r\n",
188 |     "def score_model(X, y, estimator, **kwargs):\r\n",
189 |     "    \"\"\"\r\n",
190 |     "    Test various estimators.\r\n",
191 |     "    \"\"\"\r\n",
192 |     "    y = LabelEncoder().fit_transform(y)\r\n",
193 |     "    model = Pipeline([\r\n",
194 |     "        ('one_hot_encoder', OneHotEncoder()),\r\n",
195 |     "        ('estimator', estimator)\r\n",
196 |     "    ])\r\n",
197 |     "\r\n",
198 |     "    # Instantiate the classification model and visualizer\r\n",
199 |     "    # 初始化模型\r\n",
200 |     "    model.fit(X, y, **kwargs)\r\n",
201 |     "\r\n",
202 |     "    # 真实值\r\n",
203 |     "    expected  = y\r\n",
204 |     "    # 预测值\r\n",
205 |     "    predicted = model.predict(X)\r\n",
206 |     "\r\n",
207 |     "    # Compute and return F1 (harmonic mean of precision and recall)\r\n",
208 |     "    #   #计算并返回F1（精度和召回率的平均值）\r\n",
209 |     "    print(\"{}: {}\".format(estimator.__class__.__name__, f1_score(expected, predicted)))\r\n",
210 |     "\r\n",
211 |     "for model in models:\r\n",
212 |     "    score_model(X, y, model);"
213 |    ]
214 |   },
215 |   {
216 |    "cell_type": "markdown",
217 |    "metadata": {
218 |     "collapsed": false
219 |    },
220 |    "source": [
221 |     "初步模型评估看来，根据上述F1分数的结果，哪种模型表现最好？\n",
222 |     "\n",
223 |     "SGDClassifier\n"
224 |    ]
225 |   },
226 |   {
227 |    "cell_type": "markdown",
228 |    "metadata": {
229 |     "collapsed": false
230 |    },
231 |    "source": [
232 |     "### 1.4.2 视觉模型评估\n",
233 |     "现在，让我们重构我们的模型评估函数，以使用Yellowbrick的ClassificationReport类，一个显示精度、召回率和F1分数的模型可视化工具。这个可视化的模型分析工具集成了数字分数和彩色编码的热图，以支持简单的解释和检测，特别是I型和II型错误的细微差别，它们与我们的用例非常相关（甚至可以挽救生命）！\n",
234 |     "\n",
235 |     "I型错误（或“假阳性”）是指检测不存在的影响（例如，当蘑菇事实上可以食用时，但判断为蘑菇有毒）。  \n",
236 |     "II错误（或“假阴性”）是指未能检测到存在的影响（例如，蘑菇实际上有毒，但可判断为以食用）。\n",
237 |     "\n",
238 |     "因此我们通过一下代码显示了各个模型的混淆矩阵"
239 |    ]
240 |   },
241 |   {
242 |    "cell_type": "code",
243 |    "execution_count": 6,
244 |    "metadata": {
245 |     "collapsed": false
246 |    },
247 |    "outputs": [],
248 |    "source": [
249 |     "from sklearn.pipeline import Pipeline\r\n",
250 |     "from yellowbrick.classifier import ClassificationReport\r\n",
251 |     "\r\n",
252 |     "\r\n",
253 |     "def visualize_model(X, y, estimator, **kwargs):\r\n",
254 |     "    \"\"\"\r\n",
255 |     "    Test various estimators.\r\n",
256 |     "    \"\"\"\r\n",
257 |     "    y = LabelEncoder().fit_transform(y)\r\n",
258 |     "    model = Pipeline([\r\n",
259 |     "        ('one_hot_encoder', OneHotEncoder()),\r\n",
260 |     "        ('estimator', estimator)\r\n",
261 |     "    ])\r\n",
262 |     "\r\n",
263 |     "    # Instantiate the classification model and visualizer\r\n",
264 |     "    visualizer = ClassificationReport(\r\n",
265 |     "        model, classes=['edible', 'poisonous'],\r\n",
266 |     "        cmap=\"YlGn\", size=(600, 360), **kwargs\r\n",
267 |     "    )\r\n",
268 |     "    visualizer.fit(X, y)\r\n",
269 |     "    visualizer.score(X, y)\r\n",
270 |     "    visualizer.show()\r\n",
271 |     "\r\n",
272 |     "for model in models:\r\n",
273 |     "    visualize_model(X, y, model)"
274 |    ]
275 |   },
276 |   {
277 |    "cell_type": "markdown",
278 |    "metadata": {
279 |     "collapsed": false
280 |    },
281 |    "source": [
282 |     "# 2 参考\n",
283 |     "[https://www.scikit-yb.org/en/latest/tutorial.html#modeling-and-evaluation](https://www.scikit-yb.org/en/latest/tutorial.html#modeling-and-evaluation)"
284 |    ]
285 |   }
286 |  ],
287 |  "metadata": {
288 |   "kernelspec": {
289 |    "display_name": "PaddlePaddle 1.8.0 (Python 3.5)",
290 |    "language": "python",
291 |    "name": "py35-paddle1.2.0"
292 |   },
293 |   "language_info": {
294 |    "codemirror_mode": {
295 |     "name": "ipython",
296 |     "version": 3
297 |    },
298 |    "file_extension": ".py",
299 |    "mimetype": "text/x-python",
300 |    "name": "python",
301 |    "nbconvert_exporter": "python",
302 |    "pygments_lexer": "ipython3",
303 |    "version": "3.7.4"
304 |   }
305 |  },
306 |  "nbformat": 4,
307 |  "nbformat_minor": 1
308 | }
309 | 


--------------------------------------------------------------------------------
/Documents/[python] CairoSVG使用教程.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# 1 CairoSVG介绍 \n",
  8 |     "CairoSVG是一个将SVG1.1转为PNG，PDF, PS格式的转化。SVG算目前火热的图像文件格式了，它的英文全称为Scalable Vector Graphics，意思为可缩放的矢量图形，但是SVG要专用软件才能编辑打开,通过CairSVG我们就能将SVG格式转换为常用的格式。它为类Unix操作系统（至少Linux和macOS）和Windows提供了命令行界面和Python 3.5+库。它是一个开源软件，具有LGPLv3许可。  \n",
  9 |     "CairoSVG用Python编写，基于著名的2D图形库Cairo。它在来自W3C测试套件的 SVG样本上进行了测试。它还依赖tinycss2和 cssselect2来应用CSS，并依赖 defusedxml来检测不安全的SVG文件。嵌入式栅格图像由Pillow处理。 \n",
 10 |     "CarioSVG仅支持python3，你可以用pip命令安装，安装代码如下：\n",
 11 |     "> pip3 install cairosvg\n",
 12 |     "\n",
 13 |     "本文主要使用2.4.2版本，当前版本的CairoSVG至少需要Python 3.5，但不适用于Python2.x。较旧的CairoSVG（1.x）版本可在Python 2.x中使用，但不再受支持。CairoSVG及其依赖项在安装过程中可能需要其他工具，这些工具的名称取决于您使用的操作系统。具体如下：\n",
 14 |     "+ 在Windows上，您必须安装适用于Python和Cairo的Visual C ++编译器\n",
 15 |     "+ 在macOS上，您必须安装cairo和libffi \n",
 16 |     "+ 在Linux上，你必须安装cairo，python3-dev和libffi-dev（名称可能为你的系统版本有所不同)\n",
 17 |     "如果您不知道如何安装这些工具，则可以按照[WeasyPrint安装指南](https://weasyprint.readthedocs.io/en/latest/install.html)中的简单步骤进行操作：安装WeasyPrint还将安装CairoSVG。"
 18 |    ]
 19 |   },
 20 |   {
 21 |    "cell_type": "markdown",
 22 |    "metadata": {},
 23 |    "source": [
 24 |     "# 2 CairoSVG的使用 "
 25 |    ]
 26 |   },
 27 |   {
 28 |    "cell_type": "markdown",
 29 |    "metadata": {},
 30 |    "source": [
 31 |     "## 2.1 命令行使用 \n",
 32 |     "通过命令行你就可以使用CairoSVG，以下代码能够将当前目录下的image.svg文件转换为image.png文件：\n",
 33 |     "> cairosvg image.svg -o image.png\n",
 34 |     "\n",
 35 |     "具体CairoSVG命令行参数如下：\n",
 36 |     "```\n",
 37 |     "cairosvg --help\n",
 38 |     "usage: cairosvg [-h] [-v] [-f {pdf,png,ps,svg}] [-d DPI] [-W WIDTH]\n",
 39 |     "                [-H HEIGHT] [-s SCALE] [-u] [--output-width OUTPUT_WIDTH]\n",
 40 |     "                [--output-height OUTPUT_HEIGHT] [-o OUTPUT]\n",
 41 |     "                input\n",
 42 |     "\n",
 43 |     "Convert SVG files to other formats\n",
 44 |     "\n",
 45 |     "positional arguments:\n",
 46 |     "  input                 input filename or URL 文件名或者url链接名\n",
 47 |     "\n",
 48 |     "optional arguments:\n",
 49 |     "  -h, --help            show this help message and exit 帮助\n",
 50 |     "  -v, --version         show program's version number and exit 版本查看\n",
 51 |     "  -f {pdf,png,ps,svg}   --format {pdf,png,ps,svg} output format 输出格式                      \n",
 52 |     "  -d DPI, --dpi DPI     ratio between 1 inch and 1 pixel 输出图像dpi比率设置 DPI比率介于1英寸和1像素之间\n",
 53 |     "  -W WIDTH, --width WIDTH    width of the parent container in pixels 输入图像宽\n",
 54 |     "  -H HEIGHT, --height HEIGHT  height of the parent container in pixels 输入图像高\n",
 55 |     "  -s SCALE, --scale SCALE    output scaling factor 输出图像缩放比例\n",
 56 |     "  -u, --unsafe          resolve XML entities and allow very large files 解析XML实体\n",
 57 |     "                        (WARNING: vulnerable to XXE attacks and various DoS) 但是有安全问题\n",
 58 |     "  --output-width OUTPUT_WIDTH     desired output width in pixels 期望图像输出宽\n",
 59 |     "  --output-height OUTPUT_HEIGHT   desired output height in pixels 期望图像输出高\n",
 60 |     "  -o OUTPUT, --output OUTPUT     output filename 图像输出名\n",
 61 |     "```\n",
 62 |     "支持的输出格式是pdf，ps，png和svg（默认为 pdf）。默认output为标准输出。如果提供了输出文件名，则会根据扩展名自动选择格式。这些dpi选项设置像素与实际单位（例如，毫米和英寸）之间的比率（如[规范](https://www.w3.org/TR/SVG11/coords.html)中所述）。可以为SVG文件提供宽度和高度选项来设置容器大小。此外，如果-用作文件名，CairoSVG将从标准输入中读取SVG字符串。"
 63 |    ]
 64 |   },
 65 |   {
 66 |    "cell_type": "markdown",
 67 |    "metadata": {},
 68 |    "source": [
 69 |     "## 2.2 python库使用\n",
 70 |     "CairoSVG为Python 3.5+提供了一个模块。该cairosvg模块提供4个功能：\n",
 71 |     "+ svg转pdf\n",
 72 |     "+ svg转png\n",
 73 |     "+ svg转ps\n",
 74 |     "+ svg2转svg(svg文件切割)\n",
 75 |     "\n",
 76 |     "这些函数需要以下命名参数之一：\n",
 77 |     "+ url，URL或文件名\n",
 78 |     "+ file_obj，类似文件的对象\n",
 79 |     "+ bytestring，一个包含SVG的字节字符串\n",
 80 |     "\n",
 81 |     "他们还可以接收与命令行选项相对应的这些可选参数：\n",
 82 |     "+ parent_width\n",
 83 |     "+ parent_height\n",
 84 |     "+ dpi\n",
 85 |     "+ scale\n",
 86 |     "+ unsafe\n",
 87 |     "\n",
 88 |     "如果write_to提供了参数（文件名或类似文件的对象），则将输出写入此处。否则，该函数将返回一个字节字符串。例如：\n",
 89 |     "> cairosvg.svg2png(url=\"/path/to/input.svg\", write_to=\"/tmp/output.png\")  \n",
 90 |     "cairosvg.svg2pdf(file_obj=open(\"/path/to/input.svg\", \"rb\"), write_to=\"/tmp/output.pdf\")  \n",
 91 |     "output = cairosvg.svg2ps(bytestring=open(\"/path/to/input.svg\").read().encode('utf-8'))\n",
 92 |     "\n",
 93 |     "使用实例：\n",
 94 |     "``` python\n",
 95 |     "# -*- coding: utf-8 -*-\n",
 96 |     "\n",
 97 |     "# 导入cairosvg库\n",
 98 |     "import cairosvg\n",
 99 |     "\n",
100 |     "# svg转pdf\n",
101 |     "# file_obj输入文件名 write_to输出文件名\n",
102 |     "cairosvg.svg2pdf(file_obj=open(\"image.svg\", \"rb\"), write_to=\"output.pdf\")  \n",
103 |     "\n",
104 |     "# svg转png\n",
105 |     "# file_obj输入文件名 write_to输出文件名 scale输出图像放大倍数\n",
106 |     "cairosvg.svg2png(file_obj=open(\"image.svg\", \"rb\"), write_to=\"d:/output.png\",scale=3.0)\n",
107 |     "```"
108 |    ]
109 |   },
110 |   {
111 |    "cell_type": "markdown",
112 |    "metadata": {},
113 |    "source": [
114 |     "# 3 参考\n",
115 |     "+ [CairoSVG官网](https://cairosvg.org/)"
116 |    ]
117 |   }
118 |  ],
119 |  "metadata": {
120 |   "kernelspec": {
121 |    "display_name": "Python 3",
122 |    "language": "python",
123 |    "name": "python3"
124 |   },
125 |   "language_info": {
126 |    "codemirror_mode": {
127 |     "name": "ipython",
128 |     "version": 3
129 |    },
130 |    "file_extension": ".py",
131 |    "mimetype": "text/x-python",
132 |    "name": "python",
133 |    "nbconvert_exporter": "python",
134 |    "pygments_lexer": "ipython3",
135 |    "version": "3.7.1"
136 |   }
137 |  },
138 |  "nbformat": 4,
139 |  "nbformat_minor": 2
140 | }
141 | 


--------------------------------------------------------------------------------
/Documents/[python] Python二维码生成器qrcode库入门.md:
--------------------------------------------------------------------------------
  1 | # Python二维码生成器qrcode库入门
  2 | 
  3 | qrcode是二维码生成的Python开源库，官方地址为[python-qrcode](https://github.com/lincolnloop/python-qrcode)
  4 | 
  5 | # 1 简介
  6 | 
  7 | qrcode依赖于pillow，安装代码如下：
  8 | 
  9 | > pip install qrcode[pil]
 10 | 
 11 | **什么是二维码QRCode？**
 12 | 
 13 | 快速响应码Quick Response Code（QRCode）是一种二维象形码，具有快速的可读性和较大的存储容量。 该码由在白色背景上以方形图案排列的黑色模块组成（可以更改颜色）。 编码的信息可以由任何类型的数据组成（例如，二进制、字母数字或汉字符号）。二维码能存储大量的数据，将所有数据存储为方形网格中的一系列像素。二维码详细的细节和原理见[二维码的生成细节和原理](https://blog.csdn.net/wangguchao/article/details/85328655)。
 14 | 
 15 | 
 16 | # 2 用法
 17 | ## 2.1 基础使用
 18 | 
 19 | ### 2.1.1 命令行使用
 20 | 
 21 | 从命令行，使用已安装的qr脚本：
 22 | 
 23 | 
 24 | ```python
 25 | ! qr "hello world!" > test1.png
 26 | ```
 27 | 
 28 | 然后我们可以在当前路径获得一个名为test1.png的二维码文件，图像宽高为290。图片显示代码如下：
 29 | 
 30 | 
 31 | ```python
 32 | from PIL import Image
 33 | from IPython.display import display
 34 |  
 35 | img = Image.open('test1.png', 'r')
 36 | print("img size is {}".format(img.size))
 37 | 
 38 | # 显示图片
 39 | display(img)
 40 | ```
 41 | 
 42 |     img size is (290, 290)
 43 | 
 44 | 
 45 | 
 46 | ![png](image/output_5_1.png)
 47 | 
 48 | 
 49 | ### 2.1.2 Python接口
 50 | 
 51 | 在 Python 中，使用make快捷功能，也可以输出二维码图像，代码如下：
 52 | 
 53 | 
 54 | ```python
 55 | import qrcode
 56 | # 构建二维码
 57 | data = 'hello world!'
 58 | img = qrcode.make(data)
 59 | # 显示图片格式，为qrcode.image.pil.PilImage
 60 | print(type(img))
 61 | # 保存图片 
 62 | img.save("test2.png")
 63 | ```
 64 | 
 65 |     <class 'qrcode.image.pil.PilImage'>
 66 | 
 67 | 
 68 | ### 2.1.3 二维码解析
 69 | 
 70 | 如果想查看生成的二维码信息，可以用手机扫描二维码，或者使用[草料二维码解析器](https://cli.im/deqr)在线解析图片。解析结果如下图所示：
 71 | 
 72 | 
 73 | ![](https://gitee.com/luohenyueji/article_picture_warehouse/raw/master/Python-Study-Notes/qrcode/image/result.png)
 74 | 
 75 | ## 2.2 高级使用
 76 | ### 2.2.1 二维码自定义
 77 | 我们还可以通过在之前使用该QRCode函数创建的qr对象中添加一些属性来自定义QR 码的设计和结构。基本参数如下：
 78 | 
 79 | + version：一个1 到40之间的整数，用于控制 QR 码的大小（最小的版本1是一个21x21矩阵）。默认为None，表示代码自动确认该参数。
 80 | + error_correction：用于二维码的纠错。qrcode 包中提供了以下四个常量：
 81 | 	1. ERROR_CORRECT_L 大约可以纠正 7% 或更少的错误。
 82 | 	2. ERROR_CORRECT_M （默认）大约 15% 或更少的错误可以被纠正。
 83 | 	3. ERROR_CORRECT_Q 大约 25% 或更少的错误可以被纠正。
 84 | 	4. ERROR_CORRECT_H 大约可以纠正 30% 或更少的错误。
 85 | + box_size：控制二维码的每个“盒子”有多少像素，默认为10。
 86 | + border：控制边框应该有多少个框厚（默认为 4，这是根据规范的最小值）。
 87 | 
 88 | 
 89 | ```python
 90 | import qrcode
 91 | qr = qrcode.QRCode(
 92 |     version=1,
 93 |     error_correction=qrcode.constants.ERROR_CORRECT_L,
 94 |     box_size=10,
 95 |     border=4,
 96 | )
 97 | data = "hello world!"
 98 | qr.add_data(data)
 99 | qr.make(fit=True)
100 | 
101 | # fill_color和back_color分别控制前景颜色和背景颜色，支持输入RGB色，注意颜色更改可能会导致二维码扫描识别失败
102 | img = qr.make_image(fill_color=( 213 , 143 , 1 ), back_color="lightblue")
103 | display(img)
104 | ```
105 | 
106 | 
107 | ![png](image/output_10_0.png)
108 | 
109 | 
110 | ### 2.2.2 二维码输出
111 | 我们还可以将二维码可以导出为SVG图片。
112 | 
113 | 
114 | ```python
115 | import qrcode
116 | import qrcode.image.svg
117 | method = 'fragment'
118 | if method == 'basic':
119 |     # Simple factory, just a set of rects.
120 |     # 简单模式
121 |     factory = qrcode.image.svg.SvgImage
122 | elif method == 'fragment':
123 |     # Fragment factory (also just a set of rects)
124 |     # 碎片模式
125 |     factory = qrcode.image.svg.SvgFragmentImage
126 | else:
127 |     # Combined path factory, fixes white space that may occur when zooming
128 |     # 组合模式，修复缩放时可能出现的空白
129 |     factory = qrcode.image.svg.SvgPathImage
130 | 
131 | img = qrcode.make('hello world!', image_factory=factory)
132 | 
133 | # 保存图片 
134 | img.save("test3.svg")
135 | ```
136 | 
137 | ### 2.2.3 二维码图像样式
138 | 
139 | 要将样式应用于QRCode，请使用StyledPilImage。这需要一个可选的module_drawers参数来控制二维码的形状，一个可选的color_mask参数来改变二维码的颜色，还有一个可选的embeded_image_path参数来嵌入图像。这些二维码并不能保证对所有的二维码识别器都有效，所以做一些实验并将纠错error_correction设置为高（尤其是嵌入图像时）。
140 | 
141 | 
142 | python-qrcode提供的二维码的形状列表如下：
143 | 
144 | ![](https://gitee.com/luohenyueji/article_picture_warehouse/raw/master/Python-Study-Notes/qrcode/image/module_drawers.png)
145 | 
146 | python-qrcode提供的二维码的颜色列表如下：
147 | 
148 | 
149 | ![](https://gitee.com/luohenyueji/article_picture_warehouse/raw/master/Python-Study-Notes/qrcode/image/color_masks.png)
150 | 
151 | 
152 | 具体使用代码如下：
153 | 
154 | 
155 | ```python
156 | import qrcode
157 | from qrcode.image.styledpil import StyledPilImage
158 | from qrcode.image.styles.moduledrawers import RoundedModuleDrawer,SquareModuleDrawer
159 | from qrcode.image.styles.colormasks import RadialGradiantColorMask,SquareGradiantColorMask
160 | 
161 | # 纠错设置为高
162 | qr = qrcode.QRCode(error_correction=qrcode.constants.ERROR_CORRECT_H)
163 | # 如果想扫描二维码后跳转到网页，需要添加https://
164 | qr.add_data('https://www.baidu.com')
165 | 
166 | # 修改二维码形状
167 | img_1 = qr.make_image(image_factory=StyledPilImage, module_drawer=RoundedModuleDrawer())
168 | # 修改二维码颜色
169 | img_2 = qr.make_image(image_factory=StyledPilImage, color_mask=SquareGradiantColorMask())
170 | # 嵌入图像
171 | img_3 = qr.make_image(image_factory=StyledPilImage, embeded_image_path="lena.jpg")
172 | # 嵌入图像
173 | img_4 = qr.make_image(image_factory=StyledPilImage, module_drawer=SquareModuleDrawer(), color_mask=RadialGradiantColorMask(), embeded_image_path="lena.jpg")
174 | ```
175 | 
176 | 
177 | ```python
178 | img_1
179 | ```
180 | 
181 | 
182 | 
183 | 
184 | ![png](image/output_15_0.png)
185 | 
186 | 
187 | 
188 | 
189 | ```python
190 | img_2
191 | ```
192 | 
193 | 
194 | 
195 | 
196 | ![png](image/output_16_0.png)
197 | 
198 | 
199 | 
200 | 
201 | ```python
202 | img_3
203 | ```
204 | 
205 | 
206 | 
207 | 
208 | ![png](image/output_17_0.png)
209 | 
210 | 
211 | 
212 | 
213 | ```python
214 | img_4
215 | ```
216 | 
217 | 
218 | 
219 | 
220 | ![png](image/output_18_0.png)
221 | 
222 | 
223 | 
224 | # 3 参考
225 | 
226 | + [python-qrcode](https://github.com/lincolnloop/python-qrcode)
227 | + [草料二维码解析器](https://cli.im/deqr)
228 | + [二维码的生成细节和原理](https://blog.csdn.net/wangguchao/article/details/85328655)
229 | 


--------------------------------------------------------------------------------
/Documents/特征选择笔记/删除低方差的特征.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {
  6 |     "collapsed": false
  7 |    },
  8 |    "source": [
  9 |     "\n",
 10 |     "本文主要介绍sklearn中进行特征选择的方法。[sklearn.feature_selection](https://scikit-learn.org/stable/modules/feature_selection.html)模块中的类可用于样本集的特征选择/降维，以提高估计量的准确性得分或提高其在超高维数据集上的性能。\n",
 11 |     "\n",
 12 |     "\n"
 13 |    ]
 14 |   },
 15 |   {
 16 |    "cell_type": "code",
 17 |    "execution_count": 1,
 18 |    "metadata": {
 19 |     "collapsed": false
 20 |    },
 21 |    "outputs": [],
 22 |    "source": [
 23 |     "# 多行输出\r\n",
 24 |     "from IPython.core.interactiveshell import InteractiveShell\r\n",
 25 |     "InteractiveShell.ast_node_interactivity = \"all\" "
 26 |    ]
 27 |   },
 28 |   {
 29 |    "cell_type": "markdown",
 30 |    "metadata": {
 31 |     "collapsed": false
 32 |    },
 33 |    "source": [
 34 |     "# 1  删除低方差的特征\n",
 35 |     "sklearn中[VarianceThreshold](https://scikit-learn.org/stable/modules/generated/sklearn.feature_selection.VarianceThreshold.html#sklearn.feature_selection.VarianceThreshold)是特征选择的简单基准方法。它将删除方差未达到某个阈值的所有要素。默认情况下，它将删除所有零方差特征，即在所有样本中具有相同值的特征。\n",
 36 |     "其原理很简单，假设某个特征的特征值只有0和1，在所有输入样本中，某个特征百分之95的值都是取相同的数，那就可以认为这个特征作用不大。如果100%都是1，那这个特征就没意义了。当特征值都是离散型变量的时候这种方法才能用，如果是连续型变量，就需要将连续变量离散化之后才能用，在实际当中，很少出现95%以上都取某个值的特征存在，所以这种方法虽然简单但是不太好用。可以把它作为特征选择的预处理，先去掉那些取值变化小的特征，然后再从接下来提到的的特征选择方法中选择合适的进行进一步的特征选择。\n",
 37 |     "\n",
 38 |     "举个例子，假设我们有一个具有布尔特征的数据集，我们想要删除80%以上的样本中要么是1要么是0的所有特征。布尔特征是伯努利随机变量，这些变量的方差由下式给出：\n",
 39 |     "\n",
 40 |     "$\\mathrm{Var}[X] = p(1 - p)$\n",
 41 |     "\n",
 42 |     "因此我们可以使用阈值进行选择：0.8 * (1-0.8)\n",
 43 |     "\n"
 44 |    ]
 45 |   },
 46 |   {
 47 |    "cell_type": "code",
 48 |    "execution_count": 2,
 49 |    "metadata": {
 50 |     "collapsed": false
 51 |    },
 52 |    "outputs": [
 53 |     {
 54 |      "name": "stdout",
 55 |      "output_type": "stream",
 56 |      "text": [
 57 |       "原始值为： [[0, 0, 1], [0, 1, 0], [1, 0, 0], [0, 1, 1], [0, 1, 0], [0, 1, 1]]\n"
 58 |      ]
 59 |     },
 60 |     {
 61 |      "data": {
 62 |       "text/plain": [
 63 |        "array([[0, 1],\n",
 64 |        "       [1, 0],\n",
 65 |        "       [0, 0],\n",
 66 |        "       [1, 1],\n",
 67 |        "       [1, 0],\n",
 68 |        "       [1, 1]])"
 69 |       ]
 70 |      },
 71 |      "execution_count": 2,
 72 |      "metadata": {},
 73 |      "output_type": "execute_result"
 74 |     }
 75 |    ],
 76 |    "source": [
 77 |     "from sklearn.feature_selection import VarianceThreshold\r\n",
 78 |     "X = [[0, 0, 1],[0, 1, 0], [1, 0, 0], [0, 1, 1], [0, 1, 0], [0, 1, 1]]\r\n",
 79 |     "print(\"原始值为：\",X)\r\n",
 80 |     "selector = VarianceThreshold(threshold=(0.8 * (1 - 0.8)))\r\n",
 81 |     "# 方差过滤后的值\r\n",
 82 |     "# 这一步与selector.fit(X)和selector.transform(X)实现的功能一样\r\n",
 83 |     "selector.fit_transform(X)"
 84 |    ]
 85 |   },
 86 |   {
 87 |    "cell_type": "markdown",
 88 |    "metadata": {
 89 |     "collapsed": false
 90 |    },
 91 |    "source": [
 92 |     "该方法的原理为我们可以计算每列的总体方差，然后给定一个方差阈值，比如0.8 * (1 - 0.8)表示伯努利分布80%数为同一个数时样本的方差。如果某列方差小于0.8 * (1 - 0.8)，表示该列超过80%的数据为同一个数值。"
 93 |    ]
 94 |   },
 95 |   {
 96 |    "cell_type": "code",
 97 |    "execution_count": 3,
 98 |    "metadata": {
 99 |     "collapsed": false
100 |    },
101 |    "outputs": [
102 |     {
103 |      "name": "stdout",
104 |      "output_type": "stream",
105 |      "text": [
106 |       "原始数据每列总体方差为\n",
107 |       "[0.13888889 0.22222222 0.25      ]\n",
108 |       "保留的特征列号为\n",
109 |       "[1 2]\n",
110 |       "反推原始数据列为\n",
111 |       "[[0 0 1]\n",
112 |       " [0 1 0]\n",
113 |       " [0 0 0]\n",
114 |       " [0 1 1]\n",
115 |       " [0 1 0]\n",
116 |       " [0 1 1]]\n"
117 |      ]
118 |     }
119 |    ],
120 |    "source": [
121 |     "print(\"原始数据每列总体方差为\\n{}\".format(selector.variances_))\r\n",
122 |     "print(\"保留的特征列号为\\n{}\".format(selector.get_support(True)))\r\n",
123 |     "print(\"反推原始数据列为\\n{}\".format(selector.inverse_transform(selector.transform(X))))"
124 |    ]
125 |   },
126 |   {
127 |    "cell_type": "markdown",
128 |    "metadata": {
129 |     "collapsed": false
130 |    },
131 |    "source": [
132 |     "我们也可以不设置过滤方差，但是默认阈值为0，也就是100%的数为同一个值的特征才过滤"
133 |    ]
134 |   },
135 |   {
136 |    "cell_type": "code",
137 |    "execution_count": 4,
138 |    "metadata": {
139 |     "collapsed": false
140 |    },
141 |    "outputs": [
142 |     {
143 |      "data": {
144 |       "text/plain": [
145 |        "array([[2, 0],\n",
146 |        "       [1, 4],\n",
147 |        "       [1, 1]])"
148 |       ]
149 |      },
150 |      "execution_count": 4,
151 |      "metadata": {},
152 |      "output_type": "execute_result"
153 |     }
154 |    ],
155 |    "source": [
156 |     "X = [[0, 2, 0, 3], [0, 1, 4, 3], [0, 1, 1, 3]]\r\n",
157 |     "selector = VarianceThreshold()\r\n",
158 |     "selector.fit_transform(X)\r\n"
159 |    ]
160 |   },
161 |   {
162 |    "cell_type": "code",
163 |    "execution_count": 5,
164 |    "metadata": {
165 |     "collapsed": false
166 |    },
167 |    "outputs": [
168 |     {
169 |      "name": "stdout",
170 |      "output_type": "stream",
171 |      "text": [
172 |       "原始数据每列总体方差为\n",
173 |       "[0.         0.22222222 2.88888889 0.        ]\n",
174 |       "保留的特征列号为\n",
175 |       "[1 2]\n",
176 |       "反推原始数据列为\n",
177 |       "[[0 2 0 0]\n",
178 |       " [0 1 4 0]\n",
179 |       " [0 1 1 0]]\n"
180 |      ]
181 |     }
182 |    ],
183 |    "source": [
184 |     "print(\"原始数据每列总体方差为\\n{}\".format(selector.variances_))\r\n",
185 |     "# 第0列和第3列方差为0需要删除\r\n",
186 |     "print(\"保留的特征列号为\\n{}\".format(selector.get_support(True)))\r\n",
187 |     "print(\"反推原始数据列为\\n{}\".format(selector.inverse_transform(selector.transform(X))))"
188 |    ]
189 |   },
190 |   {
191 |    "cell_type": "markdown",
192 |    "metadata": {
193 |     "collapsed": false
194 |    },
195 |    "source": [
196 |     "# 2 参考\n",
197 |     "> [https://scikit-learn.org/stable/modules/generated/sklearn.feature_selection.VarianceThreshold.html#sklearn.feature_selection.VarianceThreshold](https://scikit-learn.org/stable/modules/generated/sklearn.feature_selection.VarianceThreshold.html#sklearn.feature_selection.VarianceThreshold)\n",
198 |     "\n",
199 |     "> [https://www.cnblogs.com/tszr/p/10802018.html](https://www.cnblogs.com/tszr/p/10802018.html)"
200 |    ]
201 |   },
202 |   {
203 |    "cell_type": "code",
204 |    "execution_count": null,
205 |    "metadata": {
206 |     "collapsed": false
207 |    },
208 |    "outputs": [],
209 |    "source": []
210 |   }
211 |  ],
212 |  "metadata": {
213 |   "kernelspec": {
214 |    "display_name": "PaddlePaddle 1.8.0 (Python 3.5)",
215 |    "language": "python",
216 |    "name": "py35-paddle1.2.0"
217 |   },
218 |   "language_info": {
219 |    "codemirror_mode": {
220 |     "name": "ipython",
221 |     "version": 3
222 |    },
223 |    "file_extension": ".py",
224 |    "mimetype": "text/x-python",
225 |    "name": "python",
226 |    "nbconvert_exporter": "python",
227 |    "pygments_lexer": "ipython3",
228 |    "version": "3.7.4"
229 |   }
230 |  },
231 |  "nbformat": 4,
232 |  "nbformat_minor": 1
233 | }
234 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2020 luohenyueji
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/Plot Items/Mapping/Python绘制数据地图1-GeoPandas入门指北/image/1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/luohenyueji/Python-Study-Notes/32f4d53f6937dbb7433fa53cb011e4dd64c559c7/Plot Items/Mapping/Python绘制数据地图1-GeoPandas入门指北/image/1.png


--------------------------------------------------------------------------------
/Plot Items/Mapping/Python绘制数据地图1-GeoPandas入门指北/image/10.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/luohenyueji/Python-Study-Notes/32f4d53f6937dbb7433fa53cb011e4dd64c559c7/Plot Items/Mapping/Python绘制数据地图1-GeoPandas入门指北/image/10.gif


--------------------------------------------------------------------------------
/Plot Items/Mapping/Python绘制数据地图1-GeoPandas入门指北/image/11.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/luohenyueji/Python-Study-Notes/32f4d53f6937dbb7433fa53cb011e4dd64c559c7/Plot Items/Mapping/Python绘制数据地图1-GeoPandas入门指北/image/11.png


--------------------------------------------------------------------------------
/Plot Items/Mapping/Python绘制数据地图1-GeoPandas入门指北/image/12.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/luohenyueji/Python-Study-Notes/32f4d53f6937dbb7433fa53cb011e4dd64c559c7/Plot Items/Mapping/Python绘制数据地图1-GeoPandas入门指北/image/12.png


--------------------------------------------------------------------------------
/Plot Items/Mapping/Python绘制数据地图1-GeoPandas入门指北/image/13.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/luohenyueji/Python-Study-Notes/32f4d53f6937dbb7433fa53cb011e4dd64c559c7/Plot Items/Mapping/Python绘制数据地图1-GeoPandas入门指北/image/13.png


--------------------------------------------------------------------------------
/Plot Items/Mapping/Python绘制数据地图1-GeoPandas入门指北/image/14.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/luohenyueji/Python-Study-Notes/32f4d53f6937dbb7433fa53cb011e4dd64c559c7/Plot Items/Mapping/Python绘制数据地图1-GeoPandas入门指北/image/14.png


--------------------------------------------------------------------------------
/Plot Items/Mapping/Python绘制数据地图1-GeoPandas入门指北/image/2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/luohenyueji/Python-Study-Notes/32f4d53f6937dbb7433fa53cb011e4dd64c559c7/Plot Items/Mapping/Python绘制数据地图1-GeoPandas入门指北/image/2.png


--------------------------------------------------------------------------------
/Plot Items/Mapping/Python绘制数据地图1-GeoPandas入门指北/image/3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/luohenyueji/Python-Study-Notes/32f4d53f6937dbb7433fa53cb011e4dd64c559c7/Plot Items/Mapping/Python绘制数据地图1-GeoPandas入门指北/image/3.png


--------------------------------------------------------------------------------
/Plot Items/Mapping/Python绘制数据地图1-GeoPandas入门指北/image/4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/luohenyueji/Python-Study-Notes/32f4d53f6937dbb7433fa53cb011e4dd64c559c7/Plot Items/Mapping/Python绘制数据地图1-GeoPandas入门指北/image/4.png


--------------------------------------------------------------------------------
/Plot Items/Mapping/Python绘制数据地图1-GeoPandas入门指北/image/5.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/luohenyueji/Python-Study-Notes/32f4d53f6937dbb7433fa53cb011e4dd64c559c7/Plot Items/Mapping/Python绘制数据地图1-GeoPandas入门指北/image/5.png


--------------------------------------------------------------------------------
/Plot Items/Mapping/Python绘制数据地图1-GeoPandas入门指北/image/6.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/luohenyueji/Python-Study-Notes/32f4d53f6937dbb7433fa53cb011e4dd64c559c7/Plot Items/Mapping/Python绘制数据地图1-GeoPandas入门指北/image/6.png


--------------------------------------------------------------------------------
/Plot Items/Mapping/Python绘制数据地图1-GeoPandas入门指北/image/7.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/luohenyueji/Python-Study-Notes/32f4d53f6937dbb7433fa53cb011e4dd64c559c7/Plot Items/Mapping/Python绘制数据地图1-GeoPandas入门指北/image/7.png


--------------------------------------------------------------------------------
/Plot Items/Mapping/Python绘制数据地图1-GeoPandas入门指北/image/8.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/luohenyueji/Python-Study-Notes/32f4d53f6937dbb7433fa53cb011e4dd64c559c7/Plot Items/Mapping/Python绘制数据地图1-GeoPandas入门指北/image/8.png


--------------------------------------------------------------------------------
/Plot Items/Mapping/Python绘制数据地图1-GeoPandas入门指北/image/9.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/luohenyueji/Python-Study-Notes/32f4d53f6937dbb7433fa53cb011e4dd64c559c7/Plot Items/Mapping/Python绘制数据地图1-GeoPandas入门指北/image/9.png


--------------------------------------------------------------------------------
/Plot Items/Mapping/Python绘制数据地图2-GeoPandas地图可视化/2022江苏省各市GDP.csv:
--------------------------------------------------------------------------------
 1 | 排行,地级市,2022年GDP（亿元）
 2 | 1,苏州市,23958.3 
 3 | 2,南京市,16907.9 
 4 | 3,无锡市,14850.8 
 5 | 4,南通市,11379.6 
 6 | 5,常州市,9550.1 
 7 | 6,徐州市,8457.8 
 8 | 7,盐城市,7079.8 
 9 | 8,扬州市,6696.4 
10 | 9,泰州市,6401.8 
11 | 10,镇江市,5017.0 
12 | 11,淮安市,4742.4 
13 | 12,宿迁市,4112.0 
14 | 13,连云港市,4005.0 
15 | 


--------------------------------------------------------------------------------
/Plot Items/Mapping/Python绘制数据地图4-MovingPandas入门指北/data.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/luohenyueji/Python-Study-Notes/32f4d53f6937dbb7433fa53cb011e4dd64c559c7/Plot Items/Mapping/Python绘制数据地图4-MovingPandas入门指北/data.zip


--------------------------------------------------------------------------------
/Plot Items/Mapping/Python绘制数据地图5-MovingPandas绘图实例/data.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/luohenyueji/Python-Study-Notes/32f4d53f6937dbb7433fa53cb011e4dd64c559c7/Plot Items/Mapping/Python绘制数据地图5-MovingPandas绘图实例/data.zip


--------------------------------------------------------------------------------
/Plot Items/Matplotlib-related/基于matplotlib-scalebar库绘制比例尺/image/green.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/luohenyueji/Python-Study-Notes/32f4d53f6937dbb7433fa53cb011e4dd64c559c7/Plot Items/Matplotlib-related/基于matplotlib-scalebar库绘制比例尺/image/green.png


--------------------------------------------------------------------------------
/Plot Items/Matplotlib-related/基于matplotlib-scalebar库绘制比例尺/image/img1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/luohenyueji/Python-Study-Notes/32f4d53f6937dbb7433fa53cb011e4dd64c559c7/Plot Items/Matplotlib-related/基于matplotlib-scalebar库绘制比例尺/image/img1.png


--------------------------------------------------------------------------------
/Plot Items/Matplotlib-related/基于matplotlib-scalebar库绘制比例尺/image/img2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/luohenyueji/Python-Study-Notes/32f4d53f6937dbb7433fa53cb011e4dd64c559c7/Plot Items/Matplotlib-related/基于matplotlib-scalebar库绘制比例尺/image/img2.png


--------------------------------------------------------------------------------
/Plot Items/Matplotlib-related/基于matplotlib-scalebar库绘制比例尺/image/img3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/luohenyueji/Python-Study-Notes/32f4d53f6937dbb7433fa53cb011e4dd64c559c7/Plot Items/Matplotlib-related/基于matplotlib-scalebar库绘制比例尺/image/img3.png


--------------------------------------------------------------------------------
/Plot Items/Matplotlib-related/基于matplotlib-scalebar库绘制比例尺/image/orange.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/luohenyueji/Python-Study-Notes/32f4d53f6937dbb7433fa53cb011e4dd64c559c7/Plot Items/Matplotlib-related/基于matplotlib-scalebar库绘制比例尺/image/orange.png


--------------------------------------------------------------------------------
/Plot Items/Matplotlib-related/基于matplotlib-scalebar库绘制比例尺/image/purple.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/luohenyueji/Python-Study-Notes/32f4d53f6937dbb7433fa53cb011e4dd64c559c7/Plot Items/Matplotlib-related/基于matplotlib-scalebar库绘制比例尺/image/purple.png


--------------------------------------------------------------------------------
/Plot Items/Matplotlib-related/基于matplotlib-scalebar库绘制比例尺/image/red.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/luohenyueji/Python-Study-Notes/32f4d53f6937dbb7433fa53cb011e4dd64c559c7/Plot Items/Matplotlib-related/基于matplotlib-scalebar库绘制比例尺/image/red.png


--------------------------------------------------------------------------------
/Plot Items/Matplotlib-related/基于matplotlib-scalebar库绘制比例尺/image/yellow.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/luohenyueji/Python-Study-Notes/32f4d53f6937dbb7433fa53cb011e4dd64c559c7/Plot Items/Matplotlib-related/基于matplotlib-scalebar库绘制比例尺/image/yellow.png


--------------------------------------------------------------------------------
/Plot Items/Matplotlib-related/基于matplotlib-scalebar库绘制比例尺/image/zoom1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/luohenyueji/Python-Study-Notes/32f4d53f6937dbb7433fa53cb011e4dd64c559c7/Plot Items/Matplotlib-related/基于matplotlib-scalebar库绘制比例尺/image/zoom1.png


--------------------------------------------------------------------------------
/Plot Items/Matplotlib-related/基于matplotlib-scalebar库绘制比例尺/image/zoom2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/luohenyueji/Python-Study-Notes/32f4d53f6937dbb7433fa53cb011e4dd64c559c7/Plot Items/Matplotlib-related/基于matplotlib-scalebar库绘制比例尺/image/zoom2.png


--------------------------------------------------------------------------------
/Plot Items/Matplotlib-related/基于matplotlib-scalebar库绘制比例尺/image/zoom3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/luohenyueji/Python-Study-Notes/32f4d53f6937dbb7433fa53cb011e4dd64c559c7/Plot Items/Matplotlib-related/基于matplotlib-scalebar库绘制比例尺/image/zoom3.png


--------------------------------------------------------------------------------
/Plot Items/Matplotlib-related/基于matplotlib-scalebar库绘制比例尺/image/zoom4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/luohenyueji/Python-Study-Notes/32f4d53f6937dbb7433fa53cb011e4dd64c559c7/Plot Items/Matplotlib-related/基于matplotlib-scalebar库绘制比例尺/image/zoom4.png


--------------------------------------------------------------------------------
/Plot Items/Matplotlib-related/基于matplotlib和plottable库绘制精美表格/data/bundesliga_crests_22_23/1. FC Union Berlin.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/luohenyueji/Python-Study-Notes/32f4d53f6937dbb7433fa53cb011e4dd64c559c7/Plot Items/Matplotlib-related/基于matplotlib和plottable库绘制精美表格/data/bundesliga_crests_22_23/1. FC Union Berlin.png


--------------------------------------------------------------------------------
/Plot Items/Matplotlib-related/基于matplotlib和plottable库绘制精美表格/data/bundesliga_crests_22_23/Bayer Leverkusen.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/luohenyueji/Python-Study-Notes/32f4d53f6937dbb7433fa53cb011e4dd64c559c7/Plot Items/Matplotlib-related/基于matplotlib和plottable库绘制精美表格/data/bundesliga_crests_22_23/Bayer Leverkusen.png


--------------------------------------------------------------------------------
/Plot Items/Matplotlib-related/基于matplotlib和plottable库绘制精美表格/data/bundesliga_crests_22_23/Bayern Munich.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/luohenyueji/Python-Study-Notes/32f4d53f6937dbb7433fa53cb011e4dd64c559c7/Plot Items/Matplotlib-related/基于matplotlib和plottable库绘制精美表格/data/bundesliga_crests_22_23/Bayern Munich.png


--------------------------------------------------------------------------------
/Plot Items/Matplotlib-related/基于matplotlib和plottable库绘制精美表格/data/bundesliga_crests_22_23/Borussia Dortmund.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/luohenyueji/Python-Study-Notes/32f4d53f6937dbb7433fa53cb011e4dd64c559c7/Plot Items/Matplotlib-related/基于matplotlib和plottable库绘制精美表格/data/bundesliga_crests_22_23/Borussia Dortmund.png


--------------------------------------------------------------------------------
/Plot Items/Matplotlib-related/基于matplotlib和plottable库绘制精美表格/data/bundesliga_crests_22_23/Borussia Monchengladbach.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/luohenyueji/Python-Study-Notes/32f4d53f6937dbb7433fa53cb011e4dd64c559c7/Plot Items/Matplotlib-related/基于matplotlib和plottable库绘制精美表格/data/bundesliga_crests_22_23/Borussia Monchengladbach.png


--------------------------------------------------------------------------------
/Plot Items/Matplotlib-related/基于matplotlib和plottable库绘制精美表格/data/bundesliga_crests_22_23/Eintracht Frankfurt.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/luohenyueji/Python-Study-Notes/32f4d53f6937dbb7433fa53cb011e4dd64c559c7/Plot Items/Matplotlib-related/基于matplotlib和plottable库绘制精美表格/data/bundesliga_crests_22_23/Eintracht Frankfurt.png


--------------------------------------------------------------------------------
/Plot Items/Matplotlib-related/基于matplotlib和plottable库绘制精美表格/data/bundesliga_crests_22_23/FC Augsburg.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/luohenyueji/Python-Study-Notes/32f4d53f6937dbb7433fa53cb011e4dd64c559c7/Plot Items/Matplotlib-related/基于matplotlib和plottable库绘制精美表格/data/bundesliga_crests_22_23/FC Augsburg.png


--------------------------------------------------------------------------------
/Plot Items/Matplotlib-related/基于matplotlib和plottable库绘制精美表格/data/bundesliga_crests_22_23/FC Cologne.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/luohenyueji/Python-Study-Notes/32f4d53f6937dbb7433fa53cb011e4dd64c559c7/Plot Items/Matplotlib-related/基于matplotlib和plottable库绘制精美表格/data/bundesliga_crests_22_23/FC Cologne.png


--------------------------------------------------------------------------------
/Plot Items/Matplotlib-related/基于matplotlib和plottable库绘制精美表格/data/bundesliga_crests_22_23/Hertha Berlin.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/luohenyueji/Python-Study-Notes/32f4d53f6937dbb7433fa53cb011e4dd64c559c7/Plot Items/Matplotlib-related/基于matplotlib和plottable库绘制精美表格/data/bundesliga_crests_22_23/Hertha Berlin.png


--------------------------------------------------------------------------------
/Plot Items/Matplotlib-related/基于matplotlib和plottable库绘制精美表格/data/bundesliga_crests_22_23/Mainz.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/luohenyueji/Python-Study-Notes/32f4d53f6937dbb7433fa53cb011e4dd64c559c7/Plot Items/Matplotlib-related/基于matplotlib和plottable库绘制精美表格/data/bundesliga_crests_22_23/Mainz.png


--------------------------------------------------------------------------------
/Plot Items/Matplotlib-related/基于matplotlib和plottable库绘制精美表格/data/bundesliga_crests_22_23/RB Leipzig.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/luohenyueji/Python-Study-Notes/32f4d53f6937dbb7433fa53cb011e4dd64c559c7/Plot Items/Matplotlib-related/基于matplotlib和plottable库绘制精美表格/data/bundesliga_crests_22_23/RB Leipzig.png


--------------------------------------------------------------------------------
/Plot Items/Matplotlib-related/基于matplotlib和plottable库绘制精美表格/data/bundesliga_crests_22_23/SC Freiburg.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/luohenyueji/Python-Study-Notes/32f4d53f6937dbb7433fa53cb011e4dd64c559c7/Plot Items/Matplotlib-related/基于matplotlib和plottable库绘制精美表格/data/bundesliga_crests_22_23/SC Freiburg.png


--------------------------------------------------------------------------------
/Plot Items/Matplotlib-related/基于matplotlib和plottable库绘制精美表格/data/bundesliga_crests_22_23/Schalke 04.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/luohenyueji/Python-Study-Notes/32f4d53f6937dbb7433fa53cb011e4dd64c559c7/Plot Items/Matplotlib-related/基于matplotlib和plottable库绘制精美表格/data/bundesliga_crests_22_23/Schalke 04.png


--------------------------------------------------------------------------------
/Plot Items/Matplotlib-related/基于matplotlib和plottable库绘制精美表格/data/bundesliga_crests_22_23/TSG Hoffenheim.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/luohenyueji/Python-Study-Notes/32f4d53f6937dbb7433fa53cb011e4dd64c559c7/Plot Items/Matplotlib-related/基于matplotlib和plottable库绘制精美表格/data/bundesliga_crests_22_23/TSG Hoffenheim.png


--------------------------------------------------------------------------------
/Plot Items/Matplotlib-related/基于matplotlib和plottable库绘制精美表格/data/bundesliga_crests_22_23/VfB Stuttgart.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/luohenyueji/Python-Study-Notes/32f4d53f6937dbb7433fa53cb011e4dd64c559c7/Plot Items/Matplotlib-related/基于matplotlib和plottable库绘制精美表格/data/bundesliga_crests_22_23/VfB Stuttgart.png


--------------------------------------------------------------------------------
/Plot Items/Matplotlib-related/基于matplotlib和plottable库绘制精美表格/data/bundesliga_crests_22_23/VfL Bochum.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/luohenyueji/Python-Study-Notes/32f4d53f6937dbb7433fa53cb011e4dd64c559c7/Plot Items/Matplotlib-related/基于matplotlib和plottable库绘制精美表格/data/bundesliga_crests_22_23/VfL Bochum.png


--------------------------------------------------------------------------------
/Plot Items/Matplotlib-related/基于matplotlib和plottable库绘制精美表格/data/bundesliga_crests_22_23/VfL Wolfsburg.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/luohenyueji/Python-Study-Notes/32f4d53f6937dbb7433fa53cb011e4dd64c559c7/Plot Items/Matplotlib-related/基于matplotlib和plottable库绘制精美表格/data/bundesliga_crests_22_23/VfL Wolfsburg.png


--------------------------------------------------------------------------------
/Plot Items/Matplotlib-related/基于matplotlib和plottable库绘制精美表格/data/bundesliga_crests_22_23/Werder Bremen.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/luohenyueji/Python-Study-Notes/32f4d53f6937dbb7433fa53cb011e4dd64c559c7/Plot Items/Matplotlib-related/基于matplotlib和plottable库绘制精美表格/data/bundesliga_crests_22_23/Werder Bremen.png


--------------------------------------------------------------------------------
/Plot Items/Matplotlib-related/基于matplotlib和plottable库绘制精美表格/data/country_flags/Argentina.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/luohenyueji/Python-Study-Notes/32f4d53f6937dbb7433fa53cb011e4dd64c559c7/Plot Items/Matplotlib-related/基于matplotlib和plottable库绘制精美表格/data/country_flags/Argentina.png


--------------------------------------------------------------------------------
/Plot Items/Matplotlib-related/基于matplotlib和plottable库绘制精美表格/data/country_flags/Australia.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/luohenyueji/Python-Study-Notes/32f4d53f6937dbb7433fa53cb011e4dd64c559c7/Plot Items/Matplotlib-related/基于matplotlib和plottable库绘制精美表格/data/country_flags/Australia.png


--------------------------------------------------------------------------------
/Plot Items/Matplotlib-related/基于matplotlib和plottable库绘制精美表格/data/country_flags/Brazil.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/luohenyueji/Python-Study-Notes/32f4d53f6937dbb7433fa53cb011e4dd64c559c7/Plot Items/Matplotlib-related/基于matplotlib和plottable库绘制精美表格/data/country_flags/Brazil.png


--------------------------------------------------------------------------------
/Plot Items/Matplotlib-related/基于matplotlib和plottable库绘制精美表格/data/country_flags/Cameroon.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/luohenyueji/Python-Study-Notes/32f4d53f6937dbb7433fa53cb011e4dd64c559c7/Plot Items/Matplotlib-related/基于matplotlib和plottable库绘制精美表格/data/country_flags/Cameroon.png


--------------------------------------------------------------------------------
/Plot Items/Matplotlib-related/基于matplotlib和plottable库绘制精美表格/data/country_flags/Canada.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/luohenyueji/Python-Study-Notes/32f4d53f6937dbb7433fa53cb011e4dd64c559c7/Plot Items/Matplotlib-related/基于matplotlib和plottable库绘制精美表格/data/country_flags/Canada.png


--------------------------------------------------------------------------------
/Plot Items/Matplotlib-related/基于matplotlib和plottable库绘制精美表格/data/country_flags/Chile.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/luohenyueji/Python-Study-Notes/32f4d53f6937dbb7433fa53cb011e4dd64c559c7/Plot Items/Matplotlib-related/基于matplotlib和plottable库绘制精美表格/data/country_flags/Chile.png


--------------------------------------------------------------------------------
/Plot Items/Matplotlib-related/基于matplotlib和plottable库绘制精美表格/data/country_flags/China.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/luohenyueji/Python-Study-Notes/32f4d53f6937dbb7433fa53cb011e4dd64c559c7/Plot Items/Matplotlib-related/基于matplotlib和plottable库绘制精美表格/data/country_flags/China.png


--------------------------------------------------------------------------------
/Plot Items/Matplotlib-related/基于matplotlib和plottable库绘制精美表格/data/country_flags/England.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/luohenyueji/Python-Study-Notes/32f4d53f6937dbb7433fa53cb011e4dd64c559c7/Plot Items/Matplotlib-related/基于matplotlib和plottable库绘制精美表格/data/country_flags/England.png


--------------------------------------------------------------------------------
/Plot Items/Matplotlib-related/基于matplotlib和plottable库绘制精美表格/data/country_flags/France.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/luohenyueji/Python-Study-Notes/32f4d53f6937dbb7433fa53cb011e4dd64c559c7/Plot Items/Matplotlib-related/基于matplotlib和plottable库绘制精美表格/data/country_flags/France.png


--------------------------------------------------------------------------------
/Plot Items/Matplotlib-related/基于matplotlib和plottable库绘制精美表格/data/country_flags/Germany.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/luohenyueji/Python-Study-Notes/32f4d53f6937dbb7433fa53cb011e4dd64c559c7/Plot Items/Matplotlib-related/基于matplotlib和plottable库绘制精美表格/data/country_flags/Germany.png


--------------------------------------------------------------------------------
/Plot Items/Matplotlib-related/基于matplotlib和plottable库绘制精美表格/data/country_flags/Italy.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/luohenyueji/Python-Study-Notes/32f4d53f6937dbb7433fa53cb011e4dd64c559c7/Plot Items/Matplotlib-related/基于matplotlib和plottable库绘制精美表格/data/country_flags/Italy.png


--------------------------------------------------------------------------------
/Plot Items/Matplotlib-related/基于matplotlib和plottable库绘制精美表格/data/country_flags/Jamaica.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/luohenyueji/Python-Study-Notes/32f4d53f6937dbb7433fa53cb011e4dd64c559c7/Plot Items/Matplotlib-related/基于matplotlib和plottable库绘制精美表格/data/country_flags/Jamaica.png


--------------------------------------------------------------------------------
/Plot Items/Matplotlib-related/基于matplotlib和plottable库绘制精美表格/data/country_flags/Japan.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/luohenyueji/Python-Study-Notes/32f4d53f6937dbb7433fa53cb011e4dd64c559c7/Plot Items/Matplotlib-related/基于matplotlib和plottable库绘制精美表格/data/country_flags/Japan.png


--------------------------------------------------------------------------------
/Plot Items/Matplotlib-related/基于matplotlib和plottable库绘制精美表格/data/country_flags/Netherlands.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/luohenyueji/Python-Study-Notes/32f4d53f6937dbb7433fa53cb011e4dd64c559c7/Plot Items/Matplotlib-related/基于matplotlib和plottable库绘制精美表格/data/country_flags/Netherlands.png


--------------------------------------------------------------------------------
/Plot Items/Matplotlib-related/基于matplotlib和plottable库绘制精美表格/data/country_flags/New Zealand.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/luohenyueji/Python-Study-Notes/32f4d53f6937dbb7433fa53cb011e4dd64c559c7/Plot Items/Matplotlib-related/基于matplotlib和plottable库绘制精美表格/data/country_flags/New Zealand.png


--------------------------------------------------------------------------------
/Plot Items/Matplotlib-related/基于matplotlib和plottable库绘制精美表格/data/country_flags/Nigeria.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/luohenyueji/Python-Study-Notes/32f4d53f6937dbb7433fa53cb011e4dd64c559c7/Plot Items/Matplotlib-related/基于matplotlib和plottable库绘制精美表格/data/country_flags/Nigeria.png


--------------------------------------------------------------------------------
/Plot Items/Matplotlib-related/基于matplotlib和plottable库绘制精美表格/data/country_flags/Norway.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/luohenyueji/Python-Study-Notes/32f4d53f6937dbb7433fa53cb011e4dd64c559c7/Plot Items/Matplotlib-related/基于matplotlib和plottable库绘制精美表格/data/country_flags/Norway.png


--------------------------------------------------------------------------------
/Plot Items/Matplotlib-related/基于matplotlib和plottable库绘制精美表格/data/country_flags/Scotland.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/luohenyueji/Python-Study-Notes/32f4d53f6937dbb7433fa53cb011e4dd64c559c7/Plot Items/Matplotlib-related/基于matplotlib和plottable库绘制精美表格/data/country_flags/Scotland.png


--------------------------------------------------------------------------------
/Plot Items/Matplotlib-related/基于matplotlib和plottable库绘制精美表格/data/country_flags/South Africa.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/luohenyueji/Python-Study-Notes/32f4d53f6937dbb7433fa53cb011e4dd64c559c7/Plot Items/Matplotlib-related/基于matplotlib和plottable库绘制精美表格/data/country_flags/South Africa.png


--------------------------------------------------------------------------------
/Plot Items/Matplotlib-related/基于matplotlib和plottable库绘制精美表格/data/country_flags/South Korea.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/luohenyueji/Python-Study-Notes/32f4d53f6937dbb7433fa53cb011e4dd64c559c7/Plot Items/Matplotlib-related/基于matplotlib和plottable库绘制精美表格/data/country_flags/South Korea.png


--------------------------------------------------------------------------------
/Plot Items/Matplotlib-related/基于matplotlib和plottable库绘制精美表格/data/country_flags/Spain.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/luohenyueji/Python-Study-Notes/32f4d53f6937dbb7433fa53cb011e4dd64c559c7/Plot Items/Matplotlib-related/基于matplotlib和plottable库绘制精美表格/data/country_flags/Spain.png


--------------------------------------------------------------------------------
/Plot Items/Matplotlib-related/基于matplotlib和plottable库绘制精美表格/data/country_flags/Sweden.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/luohenyueji/Python-Study-Notes/32f4d53f6937dbb7433fa53cb011e4dd64c559c7/Plot Items/Matplotlib-related/基于matplotlib和plottable库绘制精美表格/data/country_flags/Sweden.png


--------------------------------------------------------------------------------
/Plot Items/Matplotlib-related/基于matplotlib和plottable库绘制精美表格/data/country_flags/Thailand.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/luohenyueji/Python-Study-Notes/32f4d53f6937dbb7433fa53cb011e4dd64c559c7/Plot Items/Matplotlib-related/基于matplotlib和plottable库绘制精美表格/data/country_flags/Thailand.png


--------------------------------------------------------------------------------
/Plot Items/Matplotlib-related/基于matplotlib和plottable库绘制精美表格/data/country_flags/USA.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/luohenyueji/Python-Study-Notes/32f4d53f6937dbb7433fa53cb011e4dd64c559c7/Plot Items/Matplotlib-related/基于matplotlib和plottable库绘制精美表格/data/country_flags/USA.png


--------------------------------------------------------------------------------
/Plot Items/Matplotlib-related/基于matplotlib和plottable库绘制精美表格/data/wwc_forecasts.csv:
--------------------------------------------------------------------------------
 1 | "forecast_timestamp","team","group","spi","global_o","global_d","sim_wins","sim_ties","sim_losses","sim_goal_diff","goals_scored","goals_against","group_1","group_2","group_3","group_4","make_round_of_16","make_quarters","make_semis","make_final","win_league","points"
 2 | "2019-06-16 17:54:33 UTC","USA","F",98.32748,5.52561,0.58179,2.60922,0.22034,0.17044,16.95412,17.72834,0.77422,0.82956,0.17044,0,0,1,0.78079,0.47307,0.35076,0.23618,6
 3 | "2019-06-16 17:54:33 UTC","France","A",96.29671,4.31375,0.52137,2.83658,0.12907,0.03435,6.97992,8.25201,1.27209,0.99483,0.00515,2e-05,0,1,0.78367,0.42052,0.30038,0.19428,6
 4 | "2019-06-16 17:54:33 UTC","Germany","B",93.76549,3.96791,0.67818,2.85072,0.12325,0.02603,4.02534,4.23404,0.2087,0.98483,0.01517,0,0,1,0.8928,0.48039,0.2771,0.12256,6
 5 | "2019-06-16 17:54:33 UTC","Canada","E",93.51599,3.67537,0.5698,2.3883,0.26705,0.34465,3.08796,4.16952,1.08156,0.3883,0.6117,0,0,1,0.59192,0.3614,0.20157,0.09031,6
 6 | "2019-06-16 17:54:33 UTC","England","D",91.92311,3.5156,0.63717,2.45472,0.25098,0.2943,2.3368,4.4104,2.0736,0.7057,0.2943,0,0,1,0.6851,0.43053,0.16465,0.08003,6
 7 | "2019-06-16 17:54:33 UTC","Netherlands","E",92.67529,3.8588,0.73539,2.34465,0.26705,0.3883,2.91204,5.08156,2.16952,0.6117,0.3883,0,0,1,0.59166,0.36983,0.18514,0.07576,6
 8 | "2019-06-16 17:54:33 UTC","Australia","C",92.82054,4.21769,0.89761,1.88791,0.07186,1.04023,2.85093,7.60702,4.75609,0.12812,0.53618,0.33529,0.00041,0.99862,0.53811,0.26179,0.10214,0.04791,3
 9 | "2019-06-16 17:54:33 UTC","Sweden","F",88.44996,2.98755,0.6263,2.17044,0.22034,0.60922,5.04588,7.77422,2.72834,0.17044,0.82956,0,0,1,0.4669,0.20267,0.10316,0.04006,6
10 | "2019-06-16 17:54:33 UTC","Japan","D",90.31291,3.78342,0.90887,1.2943,1.25098,0.45472,0.6632,3.0736,2.4104,0.2943,0.62818,0.07752,0,1,0.46314,0.26616,0.09314,0.03515,4
11 | "2019-06-16 17:54:33 UTC","Brazil","C",89.47398,3.589,0.87289,1.5792,0.20453,1.21627,2.85079,6.95614,4.10535,0.25916,0.22349,0.51735,0,0.9994,0.42681,0.17353,0.06693,0.02666,3
12 | "2019-06-16 17:54:33 UTC","Spain","B",86.53061,3.12364,0.82417,1.41905,0.29009,1.29086,1.24323,4.09506,2.85183,0.01082,0.69832,0.2907,0.00016,0.99367,0.30836,0.10742,0.04509,0.01759,3
13 | "2019-06-16 17:54:33 UTC","Norway","A",83.68324,2.96979,0.90942,1.48876,0.2523,1.25894,2.47825,5.42313,2.94488,0.00515,0.94029,0.05358,0.00098,0.99766,0.44246,0.17536,0.04192,0.01617,3
14 | "2019-06-16 17:54:33 UTC","China","B",82.6652,2.69201,0.79838,1.29086,0.29009,1.41905,-0.24323,1.85183,2.09506,0.00435,0.28651,0.70479,0.00435,0.92813,0.33744,0.10723,0.03367,0.00994,3
15 | "2019-06-16 17:54:33 UTC","Italy","C",76.137,2.97985,1.34724,2.21627,0.20453,0.5792,5.14921,8.10535,2.95614,0.61272,0.24033,0.14695,0,1,0.37442,0.08802,0.01963,0.00407,6
16 | "2019-06-16 17:54:33 UTC","New Zealand","E",77.62324,2.82453,1.15653,0.48334,0.25687,2.25979,-2.5438,1.38452,3.92832,0,0,0.48334,0.51666,0.40427,0.09699,0.03725,0.00614,0.00178,0
17 | "2019-06-16 17:54:33 UTC","Nigeria","A",71.65054,2.35033,1.11681,1.03435,0.12907,1.83658,-2.97992,2.27209,5.25201,2e-05,0.05077,0.90873,0.04048,0.49519,0.13261,0.02736,0.00609,0.00111,3
18 | "2019-06-16 17:54:33 UTC","Cameroon","E",65.8396,2.13554,1.21653,0.25979,0.25687,2.48334,-3.4562,1.92832,5.38452,0,0,0.51666,0.48334,0.2266,0.03779,0.01084,0.00124,0.00028,0
19 | "2019-06-16 17:54:33 UTC","South Korea","A",76.36337,2.70504,1.14036,0.25894,0.2523,2.48876,-6.47825,0.94488,7.42313,0,0.00379,0.03767,0.95854,0.017,0.00621,0.00209,0.00051,1e-04,0
20 | "2019-06-16 17:54:33 UTC","Scotland","D",53.97942,2.0491,1.69041,0.52512,0.24249,2.23239,-1.38437,3.5153,4.89967,0,0,0.52512,0.47488,0.51934,0.02828,0.00324,0.00068,5e-05,0
21 | "2019-06-16 17:54:33 UTC","Argentina","D",39.18661,1.50978,1.85584,0.23239,1.24249,1.52512,-1.61563,0.89967,2.5153,0,0.07752,0.39736,0.52512,0.24587,0.00546,0.00043,4e-05,1e-05,1
22 | "2019-06-16 17:54:33 UTC","Chile","F",46.53391,1.82545,1.83067,0.4457,0.21692,2.33738,-4.7484,1.69129,6.43969,0,0,0.66262,0.33738,0.14801,0.00781,0.00072,1e-05,0,0
23 | "2019-06-16 17:54:33 UTC","South Africa","B",56.69038,1.71768,1.2548,0.02603,0.12325,2.85072,-5.02534,1.2087,6.23404,0,0,0.00451,0.99549,0.0038,0.00072,1e-04,1e-05,0,0
24 | "2019-06-16 17:54:33 UTC","Thailand","F",40.69282,2.04038,2.37858,0.33738,0.21692,2.4457,-17.2516,2.43969,19.69129,0,0,0.33738,0.66262,0.02218,0.00054,5e-05,0,0,0
25 | "2019-06-16 17:54:33 UTC","Jamaica","C",53.51525,2.47074,2.11961,0.04023,0.07186,2.88791,-10.85093,0.75609,11.60702,0,0,0.00041,0.99959,0.00026,1e-05,0,0,0,0
26 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Python-Study-Notes
2 | 我的Python语言代码笔记和代码仓库
3 | 
4 | ---
5 | 
6 | Python语言使用平台为jupyter notebook/Python3.7，提供Python3.7文件源码，推荐使用jupyter notebook。**个人博客：[LuohenYJ](https://blog.csdn.net/luohenyj/category_9061431.html)**


--------------------------------------------------------------------------------
/Seaborn Study/README.md:
--------------------------------------------------------------------------------
 1 | # Seaborn-Study-Notes
 2 | My study nodes about how to use seaborn for drawing statistical graphics.  
 3 | There provides the reproducible python code and aims to showcase the usage of seaborn and to help you benefit it.
 4 | ## List of my study nodes
 5 | |Notes|笔记|
 6 | |------|------|
 7 | |[1 BOXPLOT](./sources/1%20%E7%AE%B1%E5%BD%A2%E5%9B%BEBOXPLOT.ipynb)|[1箱形图](https://blog.csdn.net/LuohenYJ/article/details/90677918)
 8 | |[2 SCATTERPLOT](./sources/2%20%E6%95%A3%E7%82%B9%E5%9B%BESCATTERPLOT.ipynb)|[2散点图](https://blog.csdn.net/LuohenYJ/article/details/90698499)
 9 | |[3 HISTOGRAMPLOT](./sources/3%20%E7%9B%B4%E6%96%B9%E5%9B%BEHISTOGRAMPLOT.ipynb)|[3直方图](https://blog.csdn.net/LuohenYJ/article/details/90704424)
10 | |[4 DENSITYPLOT](./sources/4%20%E6%A0%B8%E5%AF%86%E5%BA%A6%E5%9B%BEDENSITYPLOT.ipynb)|[4核密度图](https://blog.csdn.net/LuohenYJ/article/details/90711712)
11 | |[5 VIOLINPLOT](./sources/5%20%E5%B0%8F%E6%8F%90%E7%90%B4%E5%9B%BEVIOLINPLOT.ipynb)|[5小提琴图](https://blog.csdn.net/LuohenYJ/article/details/90715644)
12 | |[6 HEATMAPPLOT](./sources/6%20%E7%83%AD%E5%9B%BEHEATMAPPLOT.ipynb)|[6热图](https://blog.csdn.net/LuohenYJ/article/details/90719830)
13 | |[7 Adjustment of Common Parameters6 HEATMAPPLOT](./sources/7%20%E5%B8%B8%E7%94%A8%E5%8F%82%E6%95%B0%E8%B0%83%E6%95%B4Adjustment%20of%20Common%20Parameters.ipynb)|[7常用参数调整](https://blog.csdn.net/LuohenYJ/article/details/90729614)
14 | |[8 Avoid Overplotting](./sources/8%20%E9%81%BF%E5%85%8D%E8%BF%87%E5%BA%A6%E7%BB%98%E5%9B%BEAvoid%20Overplotting.ipynb)|[8避免过度绘图](https://blog.csdn.net/LuohenYJ/article/details/90731214)
15 | |[9 Drawing example(1)](./sources/9%20%E7%BB%98%E5%9B%BE%E5%AE%9E%E4%BE%8B(1)%20Drawing%20example(1).ipynb)|[9绘图实例(1)](https://blog.csdn.net/LuohenYJ/article/details/90901320)
16 | |[10 Drawing example(2)](./sources/10%20%E7%BB%98%E5%9B%BE%E5%AE%9E%E4%BE%8B(2)%20Drawing%20example(2).ipynb)|[10绘图实例(2)](https://blog.csdn.net/LuohenYJ/article/details/90904890)
17 | |[11 Drawing example(3)](./sources/11%20%E7%BB%98%E5%9B%BE%E5%AE%9E%E4%BE%8B(3)%20Drawing%20example(3).ipynb)|[11绘图实例(3)](https://blog.csdn.net/LuohenYJ/article/details/91044374)
18 | |[12 Drawing example(4)](./sources/12%20%E7%BB%98%E5%9B%BE%E5%AE%9E%E4%BE%8B(4)%20Drawing%20example(4).ipynb)|[12绘图实例(4)](https://blog.csdn.net/LuohenYJ/article/details/91045940)
19 | 


--------------------------------------------------------------------------------
/Spider Study/0x01maoyanTop100_spider/maoyanTop100_spider.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | 猫眼top100电影排行榜抓取http://maoyan.com/board/4
 4 | Created on Thu Sep 17 19:40:58 2020
 5 | 
 6 | @author: luohenyueji
 7 | """
 8 | 
 9 | import requests
10 | import re
11 | import json
12 | import time
13 | import csv
14 | 
15 | 
16 | # ----- 抓取首页
17 | 
18 | 
19 | def get_one_page(url):
20 |     headers = {
21 |         'User-Agent': "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:34.0) Gecko/20100101 Firefox/34.0"}
22 |     # 获得对象
23 |     # 需要伪装成浏览器，不然会解析失败
24 |     response = requests.get(url, headers=headers)
25 |     # 状态
26 |     if response.status_code == 200:
27 |         # 设置编码
28 |         response.encoding = "utf-8"
29 |         return response.text
30 |     return None
31 | 
32 | 
33 | # ----- 解析网页
34 | def parse_one_page(html):
35 |     # 将一页的10个电影信息都提取出来
36 |     # re.s的作用是使.匹配包括换行符在内的所有字符
37 |     pattern = re.compile(
38 |         '<dd>.*?board-index.*?>(.*?)</i>'
39 |         + '.*?data-src="(.*?)".*?name.*?a.*?>(.*?)</a>'
40 |         + '.*?star.*?>(.*?)</p>.*?releasetime.*?>(.*?)</p>'
41 |         + '.*?integer.*?>(.*?)</i>.*?fraction.*?>(.*?)</i>.*?</dd>',
42 |         re.S)
43 |     items = re.findall(pattern, html)
44 |     for item in items:
45 |         # yield关键字使用见https://blog.csdn.net/mieleizhi0522/article/details/82142856
46 |         yield {
47 |             'index': item[0],
48 |             'image': item[1],
49 |             # 去除.strip空格
50 |             'title': item[2].strip(),
51 |             # 先去除空格和主演：然后判断是否为空
52 |             'actor': item[3].strip()[3:] if len(item[3]) > 3 else '',
53 |             'time': item[4].strip()[5:] if len(item[4]) > 5 else '',
54 |             # item[5]整数，item[6]小数
55 |             'score': item[5].strip() + item[6].strip()
56 |         }
57 | 
58 | 
59 | # ----- 保存数据为json文件
60 | def write_to_json(content):
61 |     with open('result.txt', 'a', encoding='utf-8') as f:
62 |         # ensure_ascii不将中文格式化
63 |         f.write(json.dumps(content, ensure_ascii=False) + '\n')
64 | 
65 | 
66 | # ----- 保存数据为csv文件
67 | def write_to_csvFile(content):
68 |     with open("result.csv", 'a', encoding='utf-8-sig', newline='') as f:
69 |         writer = csv.DictWriter(f, fieldnames=content.keys())
70 |         # 如果是第一行，写入表头
71 |         if int(content['index']) == 1:
72 |             writer.writeheader()
73 |         writer.writerow(content)
74 | 
75 | 
76 | def main(offset):
77 |     # 地址
78 |     url = 'http://maoyan.com/board/4?offset=' + str(offset)
79 |     # 获得网页源代码
80 |     html = get_one_page(url)
81 |     # 解析数据
82 |     for item in parse_one_page(html):
83 |         print(item)
84 |         write_to_json(item)
85 |         write_to_csvFile(item)
86 | 
87 | 
88 | if __name__ == '__main__':
89 |     for i in range(10):
90 |         # 分页爬取
91 |         main(offset=i * 10)
92 |         # 延时爬取
93 |         time.sleep(1)
94 | 


--------------------------------------------------------------------------------
/Spider Study/0x01maoyanTop100_spider/ref.md:
--------------------------------------------------------------------------------
1 | # 参考
2 | + https://cuiqingcai.com/5534.html
3 | + https://blog.csdn.net/u013211009/article/details/101428977
4 | 


--------------------------------------------------------------------------------
/Spider Study/0x02weiboSingleUser_spider/ref.md:
--------------------------------------------------------------------------------
1 | # 参考
2 | + https://github.com/Python3WebSpider/WeiboList/issues/9
3 | + https://cuiqingcai.com/5609.html
4 | + https://blog.csdn.net/weixin_44489501/article/details/104119556
5 | + https://blog.csdn.net/weixin_44489501/article/details/104119556
6 | 


--------------------------------------------------------------------------------
/Spider Study/0x02weiboSingleUser_spider/weiboSingleUser_spider.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | Created on Sat Sep 19 19:08:58 2020
 4 | 微博单个用户博文信息爬取
 5 | @author: luohenyueji
 6 | """
 7 | 
 8 | from urllib.parse import urlencode
 9 | import requests
10 | from pyquery import PyQuery as pq
11 | import json
12 | 
13 | # 定义了base_url来表示请求的URL的前半部分
14 | base_url = 'https://m.weibo.cn/api/container/getIndex?'
15 | 
16 | headers = {
17 |     'Host': 'm.weibo.cn',
18 |     'Referer': 'https://m.weibo.cn/u/2830678474',
19 |     'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_3) AppleWebKit/537.36 (KHTML, like Gecko) '
20 |                   'Chrome/58.0.3029.110 Safari/537.36',
21 |     'X-Requested-With': 'XMLHttpRequest',
22 | }
23 | 
24 | 
25 | # ----- 获取页面
26 | # 微博链接改版，需要输入since_id
27 | # since_id查找见
28 | # https://github.com/Python3WebSpider/WeiboList/issues/9
29 | def get_page(since_id):
30 |     # type,value,containerid为固定参数
31 |     params = {
32 |         'type': 'uid',
33 |         'value': '2830678474',
34 |         'containerid': '1076032830678474',
35 |         'since_id': since_id
36 |     }
37 |     url = base_url + urlencode(params)
38 |     try:
39 |         # 访问页面
40 |         response = requests.get(url, headers=headers)
41 |         if response.status_code == 200:
42 |             return response.json()
43 |     except requests.ConnectionError as e:
44 |         print('Error', e.args)
45 | 
46 | 
47 | # ----- 解析网页
48 | def parse_page(jsondata):
49 |     if jsondata:
50 |         items = jsondata.get('data').get('cards')
51 |         for item in items:
52 |             item = item.get('mblog')
53 |             weibo = {}
54 |             # 微博id
55 |             weibo['id'] = item.get('id')
56 |             # 正文
57 |             weibo['text'] = pq(item.get('text')).text()
58 |             # 获赞数
59 |             weibo['attitudes'] = item.get('attitudes_count')
60 |             # 评论数
61 |             weibo['comments'] = item.get('comments_count')
62 |             # 转发数
63 |             weibo['reposts'] = item.get('reposts_count')
64 |             yield weibo
65 | 
66 | 
67 | # ----- 保存数据为json文件
68 | def write_to_json(content):
69 |     with open('result.txt', 'a', encoding='utf-8') as f:
70 |         # ensure_ascii不将中文格式化
71 |         f.write(json.dumps(content, ensure_ascii=False) + '\n')
72 | 
73 | 
74 | if __name__ == '__main__':
75 |     since_id = 0
76 |     # 爬取页数
77 |     max_page = 10
78 |     for page in range(1, max_page + 1):
79 |         jsondata = get_page(since_id)
80 |         since_id = jsondata.get('data').get('cardlistInfo').get('since_id')
81 |         for i, weibo in enumerate(parse_page(jsondata)):
82 |             print(weibo)
83 |             write_to_json(weibo)
84 | 


--------------------------------------------------------------------------------
/Spider Study/0x03toutiaoArticle_spider/ref.md:
--------------------------------------------------------------------------------
1 | # 参考
2 | + https://cuiqingcai.com/5616.html
3 | + https://github.com/Python3WebSpider/Jiepai/issues/25


--------------------------------------------------------------------------------
/Spider Study/0x03toutiaoArticle_spider/toutiaoArticle_spider.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | Created on Sun Sep 20 19:20:40 2020
  4 | 今日头条单篇文章爬取
  5 | @author: luohenyueji
  6 | """
  7 | 
  8 | import requests
  9 | from hashlib import md5
 10 | import os
 11 | import re
 12 | 
 13 | # 需要cookie
 14 | headers = {
 15 |     'cookie': 'csrftoken=1490f6d92e97ce79f9e52dc4f3222608; ttcid=22698125819f4938826fc916af6b7e7355; SLARDAR_WEB_ID=f754d5f8-83ce-4577-8f77-a232e1708142; tt_webid=6856774172980561421; WEATHER_CITY=%E5%8C%97%E4%BA%AC; tt_webid=6856774172980561421; __tasessionId=v8lvfouyt1596594815875; s_v_web_id=kdgrbwsk_9Ussu5RZ_AUmC_4DO5_8s8w_21Pv7qDIVeeE; tt_scid=AyiNMhl4GyKjhxNFpcm5AWgbRD7dsl-Zu4nBHWPBkHFf6lAynwUzX3zbMRIWr.De95f9',
 16 |     'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) '
 17 |                   'Chrome/84.0.4147.105 Safari/537.36 '
 18 | }
 19 | 
 20 | 
 21 | # ----- 获取网页
 22 | def get_page(offset):
 23 |     # timestamp和_signature分割可以不传
 24 |     params = {
 25 |         'aid': 24,
 26 |         'app_name': 'web_search',
 27 |         # 控制翻页的参数
 28 |         'offset': offset,
 29 |         'format': 'json',
 30 |         # 搜索图片的关键词
 31 |         'keyword': '手机',
 32 |         'autoload': 'true',
 33 |         'count': 20,
 34 |         'en_qc': 1,
 35 |         'cur_tab': 1,
 36 |         'from': 'search_tab',
 37 |         'pd': 'synthesis',
 38 |     }
 39 |     url = 'http://www.toutiao.com/api/search/content/'
 40 |     try:
 41 |         response = requests.get(url, params=params, headers=headers)
 42 |         if response.status_code == 200:
 43 |             return response.json()
 44 |     except requests.ConnectionError as e:
 45 |         print(e)
 46 |         return None
 47 | 
 48 | 
 49 | # ----- 获得图像
 50 | def get_images(json):
 51 |     if json.get('data'):
 52 |         for item in json.get('data'):
 53 |             title = item.get('title')
 54 |             images = item.get('image_list')
 55 |             # 如果为空
 56 |             if images is None or title is None:
 57 |                 continue
 58 |             for image in images:
 59 |                 # 获得原图链接
 60 |                 origin_image = re.sub("list.*?pgc-image", "large/pgc-image",
 61 |                                       image.get('url'))
 62 |                 yield {
 63 |                     'image': origin_image,
 64 |                     'title': title
 65 |                 }
 66 | 
 67 |             # ----- 保存图片
 68 | 
 69 | 
 70 | # 修正文件名
 71 | def correct_title(title):
 72 |     # 避免文件名含有不合法字符
 73 |     # 文件名最好不要含有.，否则有的系统会报错
 74 |     error_set = ['/', '\\', ':', '*', '?', '"', '|', '<', '>', '.']
 75 |     for c in title:
 76 |         if c in error_set:
 77 |             title = title.replace(c, '')
 78 |     return title
 79 | 
 80 | 
 81 | def save_image(item):
 82 |     dir_name = "手机/" + correct_title(item.get('title'))
 83 |     # 创建文件夹
 84 |     if not os.path.exists(dir_name):
 85 |         os.makedirs(dir_name)
 86 |     try:
 87 |         response = requests.get(item.get('image'))
 88 |         if response.status_code == 200:
 89 |             # 获得图片
 90 |             file_path = '{0}/{1}.{2}'.format(dir_name,
 91 |                                              md5(response.content).hexdigest(), 'jpg')
 92 |             if not os.path.exists(file_path):
 93 |                 with open(file_path, 'wb') as f:
 94 |                     f.write(response.content)
 95 |             else:
 96 |                 print('Already Downloaded', file_path)
 97 |     except requests.ConnectionError:
 98 |         print('Failed to Save Image')
 99 | 
100 | 
101 | def main(offset):
102 |     json = get_page(offset)
103 |     for item in get_images(json):
104 |         print(item)
105 |         save_image(item)
106 | 
107 | 
108 | if __name__ == '__main__':
109 |     # 控制翻页
110 |     for i in range(0, 6):
111 |         print("第" + str(i + 1) + "页开始下载！！！")
112 |         offset = i * 20
113 |         main(offset)
114 | 


--------------------------------------------------------------------------------
/Spider Study/0x04doubanMovieExplore_spider/doubanMovieExplore_spider.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | Created on Wed Sep 23 19:57:30 2020
  4 | 豆瓣选电影页面信息爬取
  5 | @author: luohenyueji
  6 | """
  7 | 
  8 | import json
  9 | import time
 10 | import requests
 11 | from fake_useragent import UserAgent
 12 | from requests.exceptions import RequestException
 13 | import csv
 14 | 
 15 | 
 16 | # ----- 获得网页
 17 | def get_one_page(page_start):
 18 |     # 定义请求url
 19 |     # url获得方法见
 20 |     # https://www.cnblogs.com/dcpeng/p/13589433.html
 21 |     url = "https://movie.douban.com/j/search_subjects"
 22 | 
 23 |     ua = UserAgent()
 24 |     # 随机产生UserAgent，定义请求头，防止反爬
 25 |     # 详情见https://blog.csdn.net/u013421629/article/details/78168430
 26 |     headers = {'User-Agent': ua.random}
 27 |     params = {
 28 |         # 类型
 29 |         "type": "movie",
 30 |         # 标签
 31 |         "tag": "热门",
 32 |         # 排序方式
 33 |         "sort": "recommend",
 34 |         # 每页显示数量
 35 |         "page_limit": "20",
 36 |         # 开始页
 37 |         "page_start": page_start
 38 |     }
 39 |     try:
 40 |         response = requests.get(
 41 |             url=url,
 42 |             headers=headers,
 43 |             params=params,
 44 |             verify=False
 45 |         )
 46 |         if response.status_code == 200:
 47 |             # 方式一:直接转换json方法
 48 |             results = response.json()
 49 |             # # 方式二: 手动转换
 50 |             # # 获取字节串
 51 |             # content = response.content
 52 |             # # 转换成字符串
 53 |             # string = content.decode('utf-8')
 54 |             # # 把字符串转成python数据类型
 55 |             # results = json.loads(string)
 56 |             return results
 57 |         return None
 58 |     except RequestException:
 59 |         return None
 60 | 
 61 | 
 62 | # ----- 解析数据    
 63 | def parse_one_page(results, page_start):
 64 |     for i, item in enumerate(results["subjects"]):
 65 |         yield {
 66 |             # 序号
 67 |             'index': i + 1 + page_start,
 68 |             # 名字
 69 |             "title": item["title"],
 70 |             # 豆瓣评分
 71 |             'rate': item["rate"],
 72 |             # 豆瓣链接
 73 |             'url': item["url"]
 74 |         }
 75 | 
 76 | 
 77 | # ----- 保存为json数据
 78 | def write_to_json(content):
 79 |     with open('result.txt', 'a', encoding='utf-8') as f:
 80 |         f.write(json.dumps(content, ensure_ascii=False) + '\n')
 81 | 
 82 | 
 83 | # ----- 保存数据为csv文件
 84 | def write_to_csvFile(content):
 85 |     with open("result.csv", 'a', encoding='utf-8-sig', newline='') as f:
 86 |         writer = csv.DictWriter(f, fieldnames=content.keys())
 87 |         # 如果是第一行，写入表头
 88 |         if int(content['index']) == 1:
 89 |             writer.writeheader()
 90 |         writer.writerow(content)
 91 | 
 92 | 
 93 | def main(page_start):
 94 |     results = get_one_page(page_start)
 95 |     for item in parse_one_page(results, page_start):
 96 |         print(item)
 97 |         write_to_json(item)
 98 |         write_to_csvFile(item)
 99 | 
100 | 
101 | if __name__ == '__main__':
102 |     # 循环构建请求参数并且发送请求
103 |     for i in range(0, 100, 20):
104 |         main(i)
105 |         time.sleep(1)
106 | 


--------------------------------------------------------------------------------
/Spider Study/0x04doubanMovieExplore_spider/ref.md:
--------------------------------------------------------------------------------
1 | # 参考
2 | + https://github.com/DropsDevopsOrg/ECommerceCrawlers/tree/master/OthertCrawler/0x02douban
3 | + https://www.cnblogs.com/dcpeng/p/13589433.html


--------------------------------------------------------------------------------
/Spider Study/0x04doubanMovieExplore_spider/result.csv:
--------------------------------------------------------------------------------
  1 | ﻿index,title,rate,url
  2 | 1,从邪恶中拯救我,7.2,https://movie.douban.com/subject/30425219/
  3 | 2,神弃之地,6.9,https://movie.douban.com/subject/27194336/
  4 | 3,OK老板娘,6.3,https://movie.douban.com/subject/35127317/
  5 | 4,未怀孕,7.0,https://movie.douban.com/subject/34440481/
  6 | 5,祖孙大战,6.1,https://movie.douban.com/subject/26887069/
  7 | 6,我想结束这一切,7.3,https://movie.douban.com/subject/30129061/
  8 | 7,鸣鸟不飞：乌云密布,8.3,https://movie.douban.com/subject/33425932/
  9 | 8,1/2的魔法,7.7,https://movie.douban.com/subject/30401849/
 10 | 9,树上有个好地方,7.9,https://movie.douban.com/subject/30299040/
 11 | 10,妙先生,6.3,https://movie.douban.com/subject/34888476/
 12 | 11,釜山行2：半岛,5.1,https://movie.douban.com/subject/30299515/
 13 | 12,辣手保姆2：女王蜂,5.7,https://movie.douban.com/subject/34938650/
 14 | 13,误杀,7.7,https://movie.douban.com/subject/30176393/
 15 | 14,冰雪奇缘2,7.2,https://movie.douban.com/subject/25887288/
 16 | 15,多哥,8.8,https://movie.douban.com/subject/30271841/
 17 | 16,我和我的祖国,7.7,https://movie.douban.com/subject/32659890/
 18 | 17,小妇人,8.1,https://movie.douban.com/subject/26348103/
 19 | 18,数码宝贝：最后的进化,7.8,https://movie.douban.com/subject/30482645/
 20 | 19,灰猎犬号,8.2,https://movie.douban.com/subject/26871906/
 21 | 20,#活着,5.6,https://movie.douban.com/subject/34462775/
 22 | 21,理查德·朱维尔的哀歌,8.2,https://movie.douban.com/subject/25842038/
 23 | 22,铁雨2：首脑峰会,5.8,https://movie.douban.com/subject/30479644/
 24 | 23,黑帮大佬和我的365日,5.5,https://movie.douban.com/subject/34968329/
 25 | 24,温蒂妮,7.4,https://movie.douban.com/subject/34603816/
 26 | 25,决战中途岛,7.7,https://movie.douban.com/subject/26786669/
 27 | 26,进击的巨人：编年史,8.5,https://movie.douban.com/subject/35088569/
 28 | 27,少年的你,8.3,https://movie.douban.com/subject/30166972/
 29 | 28,东北往事：我叫刘海柱,5.5,https://movie.douban.com/subject/35141706/
 30 | 29,翻译疑云,7.2,https://movie.douban.com/subject/30145117/
 31 | 30,逃离比勒陀利亚,7.6,https://movie.douban.com/subject/27046758/
 32 | 31,黑水,8.5,https://movie.douban.com/subject/30331959/
 33 | 32,克劳斯：圣诞节的秘密,8.7,https://movie.douban.com/subject/26858510/
 34 | 33,叔·叔,7.7,https://movie.douban.com/subject/33428622/
 35 | 34,怪胎,7.0,https://movie.douban.com/subject/34850590/
 36 | 35,前哨,7.1,https://movie.douban.com/subject/30212622/
 37 | 36,给我翅膀,8.7,https://movie.douban.com/subject/30410114/
 38 | 37,绅士们,8.3,https://movie.douban.com/subject/30211998/
 39 | 38,1917,8.5,https://movie.douban.com/subject/30252495/
 40 | 39,剧场,7.9,https://movie.douban.com/subject/33446524/
 41 | 40,野性的呼唤,7.1,https://movie.douban.com/subject/27199324/
 42 | 41,爱玛,7.2,https://movie.douban.com/subject/30361878/
 43 | 42,清白,6.8,https://movie.douban.com/subject/33379493/
 44 | 43,逃走的女人,7.2,https://movie.douban.com/subject/34958725/
 45 | 44,中国机长,6.7,https://movie.douban.com/subject/30295905/
 46 | 45,致允熙,8.2,https://movie.douban.com/subject/30488003/
 47 | 46,双子杀手,6.9,https://movie.douban.com/subject/3097572/
 48 | 47,南山的部长们,8.1,https://movie.douban.com/subject/30241298/
 49 | 48,7号房的礼物,8.3,https://movie.douban.com/subject/34875369/
 50 | 49,多力特的奇幻冒险,5.9,https://movie.douban.com/subject/27000981/
 51 | 50,沉睡魔咒2,6.1,https://movie.douban.com/subject/26426056/
 52 | 51,超能计划,5.6,https://movie.douban.com/subject/30330875/
 53 | 52,影里,7.4,https://movie.douban.com/subject/30291552/
 54 | 53,82年生的金智英,8.6,https://movie.douban.com/subject/30327842/
 55 | 54,战争幽灵,6.6,https://movie.douban.com/subject/26969541/
 56 | 55,剧场版 架空OL日记,8.1,https://movie.douban.com/subject/34336025/
 57 | 56,默片解说员,7.8,https://movie.douban.com/subject/30135942/
 58 | 57,从不，很少，有时，总是,7.6,https://movie.douban.com/subject/33411505/
 59 | 58,秘密动物园,6.5,https://movie.douban.com/subject/30353076/
 60 | 59,意大利制造,6.6,https://movie.douban.com/subject/27175453/
 61 | 60,征途,5.1,https://movie.douban.com/subject/26705567/
 62 | 61,正直的候选人,7.1,https://movie.douban.com/subject/33441983/
 63 | 62,两只老虎,6.0,https://movie.douban.com/subject/30196805/
 64 | 63,真心半解,8.0,https://movie.douban.com/subject/33420285/
 65 | 64,爱尔兰人,8.8,https://movie.douban.com/subject/6981153/
 66 | 65,约束的罪行,7.2,https://movie.douban.com/subject/35155117/
 67 | 66,异界,6.3,https://movie.douban.com/subject/30264504/
 68 | 67,我是大哥大 电影版,8.1,https://movie.douban.com/subject/33400537/
 69 | 68,知晓天空之蓝的人啊,7.6,https://movie.douban.com/subject/33218345/
 70 | 69,叶问4：完结篇,6.9,https://movie.douban.com/subject/26885074/
 71 | 70,攀登者,6.1,https://movie.douban.com/subject/30413052/
 72 | 71,抓住救命稻草的野兽们,6.9,https://movie.douban.com/subject/30180781/
 73 | 72,狩猎,7.3,https://movie.douban.com/subject/30182726/
 74 | 73,终结者：黑暗命运,6.9,https://movie.douban.com/subject/27109633/
 75 | 74,猎谎者,7.0,https://movie.douban.com/subject/35027709/
 76 | 75,永生守卫,6.3,https://movie.douban.com/subject/27009869/
 77 | 76,半个喜剧,7.4,https://movie.douban.com/subject/30269016/
 78 | 77,只有芸知道,6.4,https://movie.douban.com/subject/33436135/
 79 | 78,最后一封信,7.4,https://movie.douban.com/subject/30271236/
 80 | 79,世界第一初恋：求婚篇,8.0,https://movie.douban.com/subject/34810454/
 81 | 80,犬鸣村,5.4,https://movie.douban.com/subject/30408298/
 82 | 81,夏日国度,7.5,https://movie.douban.com/subject/27033572/
 83 | 82,侵入者,6.5,https://movie.douban.com/subject/34845342/
 84 | 83,隐形人,7.1,https://movie.douban.com/subject/2364086/
 85 | 84,变身特工,7.4,https://movie.douban.com/subject/27000084/
 86 | 85,受益人,6.6,https://movie.douban.com/subject/30299311/
 87 | 86,85年盛夏,7.0,https://movie.douban.com/subject/33974174/
 88 | 87,爆炸新闻,7.1,https://movie.douban.com/subject/30289194/
 89 | 88,星球大战9：天行者崛起,6.3,https://movie.douban.com/subject/22265687/
 90 | 89,刺猬索尼克,6.1,https://movie.douban.com/subject/25905044/
 91 | 90,被光抓走的人,7.0,https://movie.douban.com/subject/30394535/
 92 | 91,猛禽小队和哈莉·奎茵,5.7,https://movie.douban.com/subject/30160393/
 93 | 92,想哭的我戴上了猫的面具,6.5,https://movie.douban.com/subject/34964061/
 94 | 93,惊天营救,7.1,https://movie.douban.com/subject/30314127/
 95 | 94,性之剧毒,5.8,https://movie.douban.com/subject/33414580/
 96 | 95,天衣无缝的她,7.6,https://movie.douban.com/subject/27615267/
 97 | 96,列夫·朗道：娜塔莎,6.3,https://movie.douban.com/subject/34960201/
 98 | 97,命运之夜——天之杯Ⅲ：春之歌,9.1,https://movie.douban.com/subject/26759820/
 99 | 98,最好的还未到来,7.9,https://movie.douban.com/subject/30411557/
100 | 99,女鬼桥,6.2,https://movie.douban.com/subject/34912837/
101 | 100,囧妈,5.9,https://movie.douban.com/subject/30306570/
102 | 


--------------------------------------------------------------------------------
/Spider Study/0x05baotuVideo_spider/baotuVideo_spider.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | Created on Wed Sep 22 23:03:23 2020
  4 | 包图网视频爬取
  5 | @author: luohenyueji
  6 | """
  7 | 
  8 | import os
  9 | import queue
 10 | import threading
 11 | import time
 12 | import requests
 13 | from bs4 import BeautifulSoup
 14 | from lxml import etree
 15 | import re
 16 | 
 17 | 
 18 | # ----- 时间装饰器，打印运行时间
 19 | def usetime(func):
 20 |     def inner(*args, **kwargs):
 21 |         time_start = time.time()
 22 |         # 装饰的函数在此运行
 23 |         func(*args, **kwargs)
 24 |         time_run = time.time() - time_start
 25 |         print(func.__name__ + '用时 %.2f 秒' % time_run)
 26 | 
 27 |     return inner
 28 | 
 29 | 
 30 | class Baotu(object):
 31 |     """
 32 |     负责爬虫存储
 33 |     TODO:
 34 |         目标网络有反爬虫机制，多线程下导致有些目标下载失败
 35 |         1.解决多线程下网络错误：增加retry机制
 36 |         2.异步进行
 37 |     注意点：
 38 |     requests.text与requests.context区别
 39 |     """
 40 | 
 41 |     # ----- 初始化
 42 |     def __init__(self, url='https://ibaotu.com/shipin/', thread=1, max_page=250, useragent=None, getOriginFile=False):
 43 |         """
 44 |         :param url: 链接名，如果是自己搜索关键词下载，如搜索中国则链接为https://ibaotu.com/s-shipin/zhongguo.html
 45 |         :param thread: 线程数
 46 |         :param max_page: 下载最大页数
 47 |         :param useragent: 自定义浏览器headers
 48 |         :param getOriginFile: 是否获取原始视频
 49 |         """
 50 |         
 51 |         # url为包图网视频链接
 52 |         self.url = url
 53 |         # 线程数
 54 |         self.thread = thread
 55 |         # 最大页数
 56 |         self.page = max_page
 57 |         self.useragent = useragent
 58 |         self.header = self._get_header()
 59 |         # 请求队列
 60 |         self.que = queue.Queue()
 61 |         # 失败队列
 62 |         self.fail = queue.Queue()
 63 |         # 检测当前共有多少分页若用户输入大于当前页面分页则使用当前分页
 64 |         page = self._get_maxpage()
 65 |         if self.page > page:
 66 |             self.page = page
 67 |         # 是否下载原始文件
 68 |         self.getOriginFile = getOriginFile
 69 |         super(Baotu, self).__init__()
 70 | 
 71 |     # ----- 设置浏览器类型
 72 |     def _get_header(self):
 73 |         if not isinstance(self.useragent, str):
 74 |             self.useragent = 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.119 Safari/537.36'
 75 |             return {'User-Agent': self.useragent}
 76 | 
 77 |     # ----- 检测当前共有多少分页
 78 |     def _get_maxpage(self):
 79 |         req = requests.get(self.url, headers=self.header, timeout=10, verify=False).text
 80 |         html = etree.HTML(req)
 81 |         # 获得总页数
 82 |         # xpath找到class为pagelist的div标签下第8个a标签下的text文本
 83 |         pageNum = int(html.xpath("//div[@class='pagelist']/a[8]/text()")[0])
 84 |         return pageNum
 85 | 
 86 |     # ----- 生产者模型，获取请求列表
 87 |     @usetime
 88 |     def request(self):
 89 |         for i in range(1, self.page + 1):
 90 |             try:
 91 |                 print(self.url)
 92 |                 # 页数后面会变化自行设置
 93 |                 req = requests.get(self.url + '7-0-0-0-0-{}.html'.format(i), headers=self.header, timeout=10,
 94 |                                    verify=False)
 95 |                 print('正在爬取第%d页的数据' % i)
 96 |                 if req.status_code == 200:
 97 |                     bs = BeautifulSoup(req.text)
 98 |                     # bs找到所有video以及class为scrollLoading的img标签
 99 |                     # 第一个findall找到视频地址，第二个findall找到视频名称
100 |                     for _, n in zip(bs.find_all('video', src=True), bs.find_all('img', {'class': 'scrollLoading'})):
101 |                         # 将每个视频组成字典形式放入队列中，{url:视频url,name:视频的名字)}
102 |                         self.que.put({'url': 'http:' + _['src'], 'name': n['alt']})
103 |             except Exception as e:
104 |                 print(e)
105 |                 pass
106 |         # 计算队列的长度及存储多少视频字典
107 |         print('共有{}条视频需要下载！'.format(self.que.qsize()))
108 | 
109 |     # ----- 消费者模型，进行下载
110 |     # 默认下载路径为当前文件夹下
111 |     @usetime
112 |     def download(self, path=os.getcwd()):
113 |         # 判断如果队列不为空进行下载
114 |         while not self.que.empty():
115 |             # 从队列中取出视频字典
116 |             data = self.que.get()
117 |             # 如果要下载原始视频
118 |             if self.getOriginFile:
119 |                 url = data['url']
120 |                 data['url'] = re.sub("mp4_10s.mp4", "mp4", url)
121 |             try:
122 |                 req = requests.get(url=data['url'], headers=self.header, verify=False)
123 |                 if req.status_code == 200:
124 |                     print('-' * 10, data['url'], '-' * 10)
125 |                     # 如果用户设置path不存在，则创建该path
126 |                     if not os.path.exists(path):
127 |                         os.mkdir(path.strip().rstrip('\\'))
128 |                     # 保存数据
129 |                     # os.path.splitext(data['url'])[-1]为文件后缀名
130 |                     with open(os.path.join(path, data['name'] + os.path.splitext(data['url'])[-1]), 'wb') as f:
131 |                         f.write(req.content)
132 |                 else:
133 |                     # 如果请求失败，等待2秒重新下载，感觉没用
134 |                     time.sleep(2)
135 |                     req2 = requests.get(url=data['url'], headers=self.header, verify=False)
136 |                     if req2.status_code == 200:
137 |                         print('+' * 10, data['url'], '+' * 10)
138 |                         with open(os.path.join(path, data['name'] + os.path.splitext(data['url'])[-1]), 'wb') as f:
139 |                             f.write(req.content)
140 |                     else:
141 |                         # 将失败的字典存入fail队列中
142 |                         self.fail.put(data)
143 |                         print(data['name'] + '\t' + '下载失败！')
144 |             except Exception as e:
145 |                 print(e)
146 |                 continue
147 | 
148 |     # ----- 控制线程，进行工作
149 |     def run(self):
150 |         # request线程，进行生产者任务
151 |         t1 = threading.Thread(target=self.request)
152 |         # 线程启动
153 |         t1.start()
154 |         # 等待其他线程结束，再结束线程
155 |         t1.join()
156 |         thread_list = []
157 | 
158 |         # 创建多个download线程
159 |         for i in range(self.thread):
160 |             t = threading.Thread(target=self.download, args=('./save',))
161 |             thread_list.append(t)
162 |         # 开启线程
163 |         for t in thread_list:
164 |             t.start()
165 |         # 子线程全部加入，主线程等所有子线程运行完毕
166 |         for t in thread_list:
167 |             t.join()
168 |         return self.fail.queue
169 | 
170 | if __name__ == '__main__':
171 |     failQueue = Baotu(max_page=1, thread=4, getOriginFile=True).run()
172 |     print("-" * 50)
173 |     print("失败条数：{}条".format(len(failQueue)))
174 | 


--------------------------------------------------------------------------------
/Spider Study/0x05baotuVideo_spider/ref.md:
--------------------------------------------------------------------------------
1 | # 参考
2 | + https://github.com/DropsDevopsOrg/ECommerceCrawlers/tree/master/OthertCrawler/0x04baotu


--------------------------------------------------------------------------------
/Spider Study/0x06quanjingCategoryImg_spider/quanjingCategoryImg_spider.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | Created on Wed Sep 23 23:13:44 2020
 4 | 全景网分类图片爬取
 5 | @author: luohenyueji
 6 | """
 7 | 
 8 | import os
 9 | import queue
10 | import threading
11 | import time
12 | 
13 | import requests
14 | from bs4 import BeautifulSoup
15 | 
16 | # 链接名，根据category种类号110003设定，如果要查看其他的种类见 https://www.quanjing.com/category
17 | string = 'https://www.quanjing.com/category/110003/'
18 | # 队列
19 | url_queue = queue.Queue()
20 | 
21 | 
22 | # ----- 获取下载网页链接
23 | def get_url(page):
24 |     for i in range(1, page + 1):
25 |         url = string + '{}.html'.format(i)
26 |         url_queue.put(url)
27 |     print(url_queue.queue)
28 | 
29 | 
30 | # ----- 下载网页
31 | def spider(url_queue):
32 |     # 队列为空则返回
33 |     if url_queue.empty():
34 |         return
35 |     # 获得网页链接
36 |     url = url_queue.get()
37 |     # 获得网页名
38 |     floder_name = os.path.split(url)[1].split('.')[0]
39 |     # 创建文件夹
40 |     os.makedirs('第{0}页'.format(floder_name), exist_ok=True)
41 |     html = requests.get(url=url, verify=False).text
42 |     soup = BeautifulSoup(html, 'lxml')
43 |     # 解析
44 |     ul = soup.find_all('a', attrs={"class": "item lazy"})
45 |     for i, child in enumerate(ul):
46 |         # 获得当前标签下的子标签的图片链接
47 |         downurl = child.img['src']
48 |         # 下载
49 |         result = requests.get(url=downurl, verify=False).content
50 |         with open('第{0}页\{1}.jpg'.format(floder_name, i), 'ab') as f:
51 |             f.write(result)
52 |         print('第{0}页第{1}张存储完成'.format(floder_name, i))
53 | 
54 |     if not url_queue.empty():
55 |         spider(url_queue)
56 | 
57 | 
58 | def main(queue_count=3):
59 |     queue_list = []
60 |     # 线程数
61 |     queue_count = queue_count
62 |     for i in range(queue_count):
63 |         t = threading.Thread(target=spider, args=(url_queue,))
64 |         queue_list.append(t)
65 |     for t in queue_list:
66 |         t.start()
67 |     for t in queue_list:
68 |         t.join()
69 | 
70 | 
71 | if __name__ == '__main__':
72 |     # 需要爬取的页数
73 |     page = 3
74 |     get_url(page)
75 |     start_time = time.time()
76 |     main()
77 |     print("用时：%f s" % (time.time() - start_time))
78 | 


--------------------------------------------------------------------------------
/Spider Study/0x06quanjingCategoryImg_spider/ref.md:
--------------------------------------------------------------------------------
1 | # 参考
2 | + https://github.com/DropsDevopsOrg/ECommerceCrawlers/tree/master/OthertCrawler/0x05quanjing


--------------------------------------------------------------------------------
/Spider Study/0x07doubanMusicChart_spider/doubanMusicChart_spider.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | 
 4 | Created on Wed Sep 23 23:53:41 2020
 5 | 豆瓣音乐排行榜爬取
 6 | @author: luohenyueji
 7 | """
 8 | 
 9 | from bs4 import BeautifulSoup
10 | import requests
11 | from fake_useragent import UserAgent
12 | import csv
13 | import json
14 | 
15 | 
16 | # ----- 解析数据
17 | def parseHtml(url):
18 |     ua = UserAgent()
19 |     # 随机产生UserAgent，定义请求头，防止反爬
20 |     # 详情见https://blog.csdn.net/u013421629/article/details/78168430
21 |     headers = {'User-Agent': ua.random}
22 | 
23 |     response = requests.get(url, headers=headers)
24 |     soup = BeautifulSoup(response.text, 'lxml')
25 |     # 使用css选择器获取class="col5"节点下面的所有li节点
26 |     for index, li in enumerate(soup.select(".col5 li")):
27 |         if index < 10:
28 |             yield {
29 |                 # 序号
30 |                 '歌曲排名': li.span.text,
31 |                 # 歌曲名，使用方法选择器
32 |                 "歌曲名": li.find(class_="icon-play").a.text,
33 |                 # 演唱者
34 |                 '演唱者': li.find(class_="intro").p.text.strip().split()[0],
35 |                 # 播放次数
36 |                 '播放次数': li.find(class_="intro").p.text.strip().split()[-1],
37 |                 # 上榜时间
38 |                 '上榜时间': li.find(class_="days").text.strip()
39 |             }
40 |         else:
41 |             yield {
42 |                 # 序号
43 |                 '歌曲排名': li.span.text,
44 |                 # 歌曲名，使用方法选择器
45 |                 "歌曲名": li.find(class_="icon-play").a.text,
46 |                 # 演唱者
47 |                 '演唱者': li.find(class_="intro").p.contents[-1].strip().split()[0],
48 |                 # 播放次数
49 |                 '播放次数': li.find(class_="intro").p.contents[-1].strip().split()[-1],
50 |                 # 上榜时间
51 |                 '上榜时间': li.find(class_="days").text.strip()
52 |             }
53 | 
54 | 
55 | # ----- 保存数据为json文件
56 | def write_to_json(content):
57 |     with open('result.txt', 'a', encoding='utf-8') as f:
58 |         # ensure_ascii不将中文格式化
59 |         f.write(json.dumps(content, ensure_ascii=False) + '\n')
60 | 
61 | 
62 | # ----- 保存数据为csv文件
63 | def write_to_csvFile(content):
64 |     with open("result.csv", 'a', encoding='utf-8-sig', newline='') as f:
65 |         writer = csv.DictWriter(f, fieldnames=content.keys())
66 |         # 如果是第一行，写入表头
67 |         if int(content['歌曲排名']) == 1:
68 |             writer.writeheader()
69 |         writer.writerow(content)
70 | 
71 | 
72 | def main():
73 |     # 豆瓣音乐排行榜
74 |     url = "https://music.douban.com/chart"
75 |     for item in parseHtml(url):
76 |         print(item)
77 |         write_to_json(item)
78 |         write_to_csvFile(item)
79 | 
80 | 
81 | if __name__ == '__main__':
82 |     main()
83 | 


--------------------------------------------------------------------------------
/Spider Study/0x07doubanMusicChart_spider/ref.md:
--------------------------------------------------------------------------------
1 | # 参考
2 | + https://www.jb51.net/article/167742.htm
3 | + https://github.com/DropsDevopsOrg/ECommerceCrawlers/tree/master/OthertCrawler/0x06douban_music


--------------------------------------------------------------------------------
/Spider Study/0x07doubanMusicChart_spider/result.csv:
--------------------------------------------------------------------------------
 1 | ﻿歌曲排名,歌曲名,演唱者,播放次数,上榜时间
 2 | 1,老师说 (诗朗诵),王敖（音乐人）,720次播放,(上榜6天)
 3 | 2,去远方,俊州,905次播放,(上榜1天)
 4 | 3, 生而为人,祥树,1963次播放,(上榜3天)
 5 | 4,2020.08-梦神（2020+2009版）,我爱陈绮贞,1120次播放,(上榜3天)
 6 | 5,如果你冷-cover,林若然,258次播放,(上榜6天)
 7 | 6,一点夜晚,title乐队,301次播放,(上榜6天)
 8 | 7,HollyJeremy婚礼纪念（词：赵鹤 曲/演唱：郝致用）,许婧靖,428次播放,(上榜10天)
 9 | 8,今生无法把你忘记,词曲唱作人刘良智,340次播放,(上榜10天)
10 | 9,Duet,Cottonseed,65次播放,(上榜1天)
11 | 10,Set Sail,SakyoStan,47次播放,(上榜1天)
12 | 11,这就是我,杜老师,1112次播放,(上榜17天)
13 | 12,完美的真空 A Perfect Vacuum,Fractals,623次播放,(上榜15天)
14 | 13,悲伤的歌1.0 demo(上） 20200826,小淞,539次播放,(上榜15天)
15 | 14,Growing up-demo,Garry,75次播放,(上榜8天)
16 | 15,cry of fear,鲁智深,43次播放,(上榜6天)
17 | 16,Crush,曹悦,35次播放,(上榜6天)
18 | 17,in another world,mutedevil,169次播放,(上榜3天)
19 | 18,Flying,Megapsyche,133次播放,(上榜10天)
20 | 19,夜郎自大:痛自创艾（feat kim）,无量亿劫,236次播放,(上榜13天)
21 | 20,明日之后-圣诞音乐-完整版_181211,Maxteiner,148次播放,(上榜8天)
22 | 


--------------------------------------------------------------------------------
/Spider Study/0x08doubanSingleMovieComment_spider/doubanSingleMovieComment_spider.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | Created on Thu Sep 24 01:08:07 2020
  4 | 豆瓣单部电影影评爬取与分析
  5 | @author: luohenyueji
  6 | """
  7 | 
  8 | import requests
  9 | from lxml import etree
 10 | import time
 11 | import random
 12 | import jieba
 13 | import numpy as np
 14 | from PIL import Image
 15 | from matplotlib import pyplot as plt
 16 | from wordcloud import WordCloud
 17 | from snownlp import SnowNLP
 18 | from fake_useragent import UserAgent
 19 | 
 20 | # 会话维持
 21 | session = requests.Session()
 22 | # 代理
 23 | proxies = {
 24 |     "http": "http://113.121.41.26:9999",
 25 | }
 26 | ua = UserAgent()
 27 | # 随机产生UserAgent，定义请求头，防止反爬
 28 | # 详情见https://blog.csdn.net/u013421629/article/details/78168430
 29 | headers = {'User-Agent': ua.random}
 30 | 
 31 | 
 32 | # ----- 登录账号
 33 | def login(tologin=True):
 34 |     if not tologin:
 35 |         return
 36 |     url = "https://accounts.douban.com/j/mobile/login/basic"
 37 |     data = {
 38 |         'name': '豆瓣账号',
 39 |         'password': '密码',
 40 |         'remember': 'false'
 41 |     }
 42 |     # 设置代理，从免费代理网站上找出一个可用的代理IP
 43 |     user = session.post(url=url, headers=headers, data=data, proxies=proxies)
 44 |     print(user.text)
 45 | 
 46 | 
 47 | # ----- 根据电影链接获取评论
 48 | def spider_lianjie(lianjie):
 49 |     page = 0
 50 |     f = open('result.txt', 'a+', encoding="utf-8")
 51 |     f.seek(0)
 52 |     # 从文件指针的地方开始删除内容
 53 |     # 结合上句话也就是说清空所有内容
 54 |     f.truncate()
 55 |     while True:
 56 |         comment_url = lianjie[:42] + 'comments'
 57 |         params = {
 58 |             'start': page * 20,
 59 |             'limit': 20,
 60 |             'sort': 'new_score',
 61 |             'status': 'P'
 62 |         }
 63 |         html = session.get(url=comment_url, params=params, proxies=proxies, headers=headers)
 64 |         page += 1
 65 |         print("开始爬取第{0}页***********************************************************************：".format(page))
 66 |         print(html.url)
 67 |         xpath_tree = etree.HTML(html.text)
 68 |         comment_divs = xpath_tree.xpath('//*[@id="comments"]/div')
 69 |         if len(comment_divs) > 2:
 70 |             # 获取每一条评论的具体内容
 71 |             for comment_div in comment_divs:
 72 |                 comment = comment_div.xpath('./div[2]/p/span/text()')
 73 |                 if len(comment) > 0:
 74 |                     print(comment[0])
 75 |                     f.write(comment[0] + '\n')
 76 |             time.sleep(int(random.choice([0.5, 0.2, 0.3])))
 77 |         else:
 78 |             f.close()
 79 |             print("大约共{0}页评论".format(page - 1))
 80 |             break
 81 | 
 82 | 
 83 | # ----- 根据电影id获取评论
 84 | def spider_id(movie_id):
 85 |     page = 0
 86 |     f = open('result.txt', 'a+', encoding='utf-8')
 87 |     f.seek(0)
 88 |     f.truncate()
 89 |     while True:
 90 |         # 链接
 91 |         movie_url = 'https://movie.douban.com/subject/' + movie_id + '/comments?'
 92 |         # 参数
 93 |         params = {
 94 |             'start': page * 20,
 95 |             'limit': 20,
 96 |             'sort': 'new_score',
 97 |             'status': 'P'
 98 |         }
 99 |         # 获取数据
100 |         html = session.get(url=movie_url, params=params, proxies=proxies, headers=headers)
101 |         print(html.status_code)
102 |         page += 1
103 |         print("开始爬取第{0}页".format(page))
104 |         print(html.url)
105 |         xpath_tree = etree.HTML(html.text)
106 |         # 评论根节点
107 |         comment_divs = xpath_tree.xpath('//*[@id="comments"]/div')
108 |         if len(comment_divs) > 2:
109 |             # 获取每一条评论的具体内容
110 |             for comment_div in comment_divs:
111 |                 comment = comment_div.xpath('./div[2]/p/span/text()')
112 |                 # 保存内容
113 |                 if len(comment) > 0:
114 |                     print(comment[0])
115 |                     f.write(comment[0] + '\n')
116 |             # 随机停止时间
117 |             time.sleep(int(random.choice([0.5, 0.2, 0.3])))
118 |         else:
119 |             f.close()
120 |             print("大约共{0}页评论".format(page - 1))
121 |             break
122 | 
123 | 
124 | # ----- 定义搜索类型
125 | def spider_kind():
126 |     kind = int(input("请选择搜索类型：1.根据电影链接 2.根据电影id："))
127 |     if kind == 1:
128 |         # example: lianjie = 'https://movie.douban.com/subject/30425219/'
129 |         lianjie = input("请输入电影链接：")
130 |         spider_lianjie(lianjie)
131 |     elif kind == 2:
132 |         # example: movie_id = '30425219'
133 |         movie_id = input("请输入电影id：")
134 |         spider_id(movie_id)
135 |     else:
136 |         print("sorry,输入错误！")
137 | 
138 | 
139 | # ----- 分割字符
140 | def cut_word():
141 |     with open('result.txt', 'r', encoding='utf-8') as file:
142 |         # 读取文件里面的全部内容
143 |         comment_txt = file.read()
144 |         # 使用jieba进行分割
145 |         wordlist = jieba.cut(comment_txt)
146 |         wl = "/".join(wordlist)
147 |         # print(wl)
148 |         return wl
149 | 
150 | 
151 | def create_word_cloud():
152 |     # 设置词云形状图片,numpy+PIL方式读取图片
153 |     # wc_mask = np.array(Image.open('Emile.jpg'))
154 |     # 数据清洗词列表
155 |     stop_words = ['就是', '不是', '但是', '还是', '只是', '这样', '这个', '一个', '什么', '电影', '没有']
156 | 
157 |     # 设置词云的一些配置，如：字体，背景色，词云形状，大小,生成词云对象
158 |     # 设置font_path避免中文出错
159 |     wc = WordCloud(background_color="white", stopwords=stop_words, max_words=50, scale=4,
160 |                    max_font_size=50, random_state=42, font_path='C:\\Windows\\Font\\simkai.ttf')
161 | 
162 |     # 设置mask=wc_mask就可以自定义词云形状
163 |     # wc = WordCloud(mask=wc_mask, background_color="white", stopwords=stop_words, max_words=50, scale=4,
164 |     #                max_font_size=50, random_state=42,font_path='C:\\Windows\\Font\\simkai.ttf')    
165 |     # 生成词云
166 |     wl = cut_word()
167 |     # 根据文本生成词云
168 |     wc.generate(wl)
169 | 
170 |     # 在只设置mask的情况下,你将会得到一个拥有图片形状的词云
171 |     # 开始画图
172 |     plt.imshow(wc, interpolation="bilinear")
173 |     # 为云图去掉坐标轴
174 |     plt.axis("off")
175 |     plt.figure()
176 |     wc.to_file("WordCloud.jpg")
177 |     plt.show()
178 |     
179 | 
180 | # ----- 生成情感分析表
181 | def data_show():
182 |     # 支持中文
183 |     plt.rcParams['font.sans-serif'] = ['SimHei']  # 用来正常显示中文标签
184 |     plt.rcParams['axes.unicode_minus'] = False  # 用来正常显示负号
185 |     f = open('result.txt', 'r', encoding='UTF-8')
186 |     lists = f.readlines()
187 |     sentimentslist = []
188 |     for i in lists:
189 |         # 情感分析
190 |         s = SnowNLP(i)
191 |         sentimentslist.append(s.sentiments)
192 |     print(sentimentslist)
193 |     print(len(sentimentslist))
194 |     plt.hist(sentimentslist, bins=10, facecolor='g')
195 |     plt.xlabel('情感概率')
196 |     plt.ylabel('数量')
197 |     plt.title('情感分析')
198 |     plt.savefig("sentiment.jpg",dpi=300)
199 |     plt.show()
200 | 
201 | 
202 | 
203 | if __name__ == '__main__':
204 |     # 登录账号
205 |     # False表示不登录，不登录的话只能查看前200条评论，登录之后可以查看500条，但登录多了会需要验证码
206 |     login(False)
207 |     # 爬取网页
208 |     spider_kind()
209 |     # 生成词云
210 |     create_word_cloud()
211 |     # 生成情感分析表
212 |     data_show()
213 | 


--------------------------------------------------------------------------------
/Spider Study/0x08doubanSingleMovieComment_spider/ref.md:
--------------------------------------------------------------------------------
1 | # 参考
2 | + https://github.com/DropsDevopsOrg/ECommerceCrawlers/tree/master/OthertCrawler/0x13douban_yingping


--------------------------------------------------------------------------------
/Spider Study/0x09xiaomiShopCategoryApp_spider/15.csv:
--------------------------------------------------------------------------------
 1 | ﻿ID,应用名称,应用子类,下载链接
 2 | 108048,王者荣耀,网游RPG,http://app.mi.com/download/108048
 3 | 547468,和平精英,动作枪战,http://app.mi.com/download/547468
 4 | 395801,阴阳师,网游RPG,http://app.mi.com/download/395801
 5 | 393944,迷你世界,休闲创意,http://app.mi.com/download/393944
 6 | 662234,逃跑吧！少年-新赛季,休闲创意,http://app.mi.com/download/662234
 7 | 25446,JJ斗地主,棋牌桌游,http://app.mi.com/download/25446
 8 | 463427,我的世界-周年庆,休闲创意,http://app.mi.com/download/463427
 9 | 121542,穿越火线-枪战王者,动作枪战,http://app.mi.com/download/121542
10 | 817118,美味星球2,休闲创意,http://app.mi.com/download/817118
11 | 445803,贪吃蛇大作战&reg;,休闲创意,http://app.mi.com/download/445803
12 | 58634,开心消消乐-新版来袭,休闲创意,http://app.mi.com/download/58634
13 | 463380,QQ飞车手游,赛车体育,http://app.mi.com/download/463380
14 | 692944,美味星球,动作枪战,http://app.mi.com/download/692944
15 | 819119,光&middot;遇,休闲创意,http://app.mi.com/download/819119
16 | 263189,部落冲突,战争策略,http://app.mi.com/download/263189
17 | 806195,阴阳师：妖怪屋,休闲创意,http://app.mi.com/download/806195
18 | 48522,植物大战僵尸2,塔防迷宫,http://app.mi.com/download/48522
19 | 537978,第五人格,网游RPG,http://app.mi.com/download/537978
20 | 19294,腾讯欢乐斗地主,棋牌桌游,http://app.mi.com/download/19294
21 | 669409,小米斗地主,棋牌桌游,http://app.mi.com/download/669409
22 | 165428,火影忍者,网游RPG,http://app.mi.com/download/165428
23 | 1164637,灰烬战线,战争策略,http://app.mi.com/download/1164637
24 | 786197,荒野乱斗,网游RPG,http://app.mi.com/download/786197
25 | 96928,球球大作战,休闲创意,http://app.mi.com/download/96928
26 | 673386,魔神战纪2-魔域地下城,网游RPG,http://app.mi.com/download/673386
27 | 818005,美味星球(终极版),休闲创意,http://app.mi.com/download/818005
28 | 106432,三国杀,棋牌桌游,http://app.mi.com/download/106432
29 | 730740,樱花校园模拟器,模拟经营,http://app.mi.com/download/730740
30 | 1211564,幻兽爱合成,休闲创意,http://app.mi.com/download/1211564
31 | 21629,神庙逃亡2-八周年庆,跑酷闯关,http://app.mi.com/download/21629
32 | 547779,决战！平安京,网游RPG,http://app.mi.com/download/547779
33 | 450665,元气骑士,动作枪战,http://app.mi.com/download/450665
34 | 728148,斗罗大陆-新武魂,网游RPG,http://app.mi.com/download/728148
35 | 853652,多乐麻将,棋牌桌游,http://app.mi.com/download/853652
36 | 45089,腾讯欢乐麻将全集,棋牌桌游,http://app.mi.com/download/45089
37 | 793914,三国志&middot;战略版-官方正版,战争策略,http://app.mi.com/download/793914
38 | 103249,率土之滨,战争策略,http://app.mi.com/download/103249
39 | 75420,天天象棋,棋牌桌游,http://app.mi.com/download/75420
40 | 296916,皇室战争,战争策略,http://app.mi.com/download/296916
41 | 48217,天天酷跑,跑酷闯关,http://app.mi.com/download/48217
42 | 623587,猫和老鼠,网游RPG,http://app.mi.com/download/623587
43 | 269712,乐乐捕鱼,休闲创意,http://app.mi.com/download/269712
44 | 46890,节奏大师,休闲创意,http://app.mi.com/download/46890
45 | 439837,英魂之刃,网游RPG,http://app.mi.com/download/439837
46 | 1022606,英雄这边请,网游RPG,http://app.mi.com/download/1022606
47 | 88246,梦幻西游,网游RPG,http://app.mi.com/download/88246
48 | 630604,明日之后,网游RPG,http://app.mi.com/download/630604
49 | 640141,多乐中国象棋,棋牌桌游,http://app.mi.com/download/640141
50 | 408109,数独,休闲创意,http://app.mi.com/download/408109
51 | 79963,炉石传说,棋牌桌游,http://app.mi.com/download/79963
52 | 70449,海岛奇兵,战争策略,http://app.mi.com/download/70449
53 | 819633,单机斗地主,棋牌桌游,http://app.mi.com/download/819633
54 | 793910,灌篮高手 正版授权手游,赛车体育,http://app.mi.com/download/793910
55 | 800567,混沌起源-送主题时装,网游RPG,http://app.mi.com/download/800567
56 | 71502,天天斗地主（真人版）,棋牌桌游,http://app.mi.com/download/71502
57 | 950248,魔渊之刃,动作枪战,http://app.mi.com/download/950248
58 | 930926,云上城之歌-异世界MMO,网游RPG,http://app.mi.com/download/930926
59 | 507974,五子棋,棋牌桌游,http://app.mi.com/download/507974
60 | 111494,钢琴块2,休闲创意,http://app.mi.com/download/111494
61 | 509823,狼人杀,棋牌桌游,http://app.mi.com/download/509823
62 | 


--------------------------------------------------------------------------------
/Spider Study/0x09xiaomiShopCategoryApp_spider/ref.md:
--------------------------------------------------------------------------------
1 | # 参考
2 | + https://github.com/DropsDevopsOrg/ECommerceCrawlers/tree/master/OthertCrawler/0x15xiaomiappshop
3 | + https://blog.csdn.net/weixin_42521211/article/details/106965550


--------------------------------------------------------------------------------
/Spider Study/0x09xiaomiShopCategoryApp_spider/xiaomiShopCategoryApp_spider.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | Created on Fri Sep 25 01:03:10 2020
  4 | 小米应用商店分类应用数据爬取
  5 | @author: luohenyueji
  6 | """
  7 | 
  8 | import requests
  9 | import csv
 10 | import queue
 11 | from fake_useragent import UserAgent
 12 | 
 13 | 
 14 | class XiaoMiShop:
 15 | 
 16 |     def __init__(self, category=15, max_page=50):
 17 |         """
 18 |         :param category: category下载类别
 19 |         :param max_page: max_page最大下载页数
 20 |         """
 21 |         # 网址解析说明见 https://blog.csdn.net/weixin_42521211/article/details/106965550
 22 |         # 网址解析地址
 23 |         self.base_url = 'http://app.mi.com/categotyAllListApi'
 24 |         # 下载地址
 25 |         self.base_download = 'http://app.mi.com/download/'
 26 | 
 27 |         # csv要保存的头部信息
 28 |         self.csv_header = ['ID', '应用名称', '应用子类', '下载链接']
 29 |         self.max_page = max_page
 30 |         # 下载类别
 31 |         self.category = category
 32 |         self.queue = queue.Queue()
 33 | 
 34 |     # ----- 清洗数据
 35 |     def clean_data(self, data):
 36 |         for i in data:
 37 |             app = {}
 38 |             app['ID'] = i.get('appId')
 39 |             app['应用名称'] = i.get('displayName')
 40 |             app['应用子类'] = i.get('level1CategoryName')
 41 |             app['下载链接'] = self.base_download + str(i.get('appId'))
 42 |             self.queue.put(app)
 43 | 
 44 |     # ----- 获得网页信息
 45 |     def request(self, page):
 46 |         # pageSize表示每页数量，page表示当前页数
 47 |         param = {
 48 |             'page': page,
 49 |             'categoryId': int(self.category),
 50 |             'pageSize': 30
 51 |         }
 52 |         # 随机产生UserAgent，定义请求头，防止反爬
 53 |         headers = {'User-Agent': UserAgent().random}
 54 |         req = requests.get(url=self.base_url, params=param, headers=headers)
 55 |         req.encoding = req.apparent_encoding
 56 |         return req
 57 | 
 58 |     # ----- 失败重新爬取
 59 |     def spider_by_page(self, page, retry=3):
 60 |         # retry 重试次数
 61 |         if retry > 0:
 62 |             print('重试第{}页'.format(page))
 63 |             req = self.request(page=page)
 64 |             try:
 65 |                 data = req.json()['data']
 66 |                 if data:
 67 |                     self.clean_data(data)
 68 |                     print('第{}页重试成功'.format(page))
 69 |             except:
 70 |                 self.spider_by_page(page=page, retry=retry - 1)
 71 | 
 72 |     # ----- 爬取数据
 73 |     def spider(self):
 74 |         # 失败页面
 75 |         fail_page = []
 76 |         for _ in range(self.max_page):
 77 |             print('正在爬取第{}页'.format(_))
 78 |             # 获得网页信息
 79 |             req = self.request(_)
 80 |             try:
 81 |                 data = req.json()['data']
 82 |             except:
 83 |                 data = []
 84 |                 fail_page.append(_)
 85 |             if data:
 86 |                 # 清洗数据
 87 |                 self.clean_data(data)
 88 |             else:
 89 |                 continue
 90 |         # 失败重新爬取
 91 |         if fail_page:
 92 |             print('出错的页数：', fail_page)
 93 |             for _ in fail_page:
 94 |                 self.spider_by_page(page=_)
 95 |         else:
 96 |             print('没有出错')
 97 | 
 98 |     def run(self):
 99 |         self.spider()
100 |         data_list = []
101 |         # 逐条信息提取
102 |         while not self.queue.empty():
103 |             data_list.append(self.queue.get())
104 |         # 保存数据
105 |         with open('{}.csv'.format(self.category), 'w', newline='', encoding='utf-8-sig') as f:
106 |             f_csv = csv.DictWriter(f, self.csv_header)
107 |             f_csv.writeheader()
108 |             f_csv.writerows(data_list)
109 |         print('文件保存成功,打开{}.csv查看'.format(self.category))
110 | 
111 | 
112 | if __name__ == '__main__':
113 |     XiaoMiShop(category=15, max_page=2).run()
114 | 


--------------------------------------------------------------------------------
/Spider Study/0x10kuanShopApp_spider/game.csv:
--------------------------------------------------------------------------------
 1 | ﻿应用名称,下载链接
 2 | 天天酷跑,https://dl.coolapk.com/down?pn=com.coolapk.market&id=NDU5OQ&h=46bb9d98&from=from-web
 3 | 王者荣耀,https://dl.coolapk.com/down?pn=com.coolapk.market&id=NDU5OQ&h=46bb9d98&from=from-web
 4 | 欢乐斗地主（腾讯）,https://dl.coolapk.com/down?pn=com.coolapk.market&id=NDU5OQ&h=46bb9d98&from=from-web
 5 | 天天飞车,https://dl.coolapk.com/down?pn=com.coolapk.market&id=NDU5OQ&h=46bb9d98&from=from-web
 6 | 奇迹暖暖,https://dl.coolapk.com/down?pn=com.coolapk.market&id=NDU5OQ&h=46bb9d98&from=from-web
 7 | 全民突击,https://dl.coolapk.com/down?pn=com.coolapk.market&id=NDU5OQ&h=46bb9d98&from=from-web
 8 | 全民飞机大战,https://dl.coolapk.com/down?pn=com.coolapk.market&id=NDU5OQ&h=46bb9d98&from=from-web
 9 | 和平精英,https://dl.coolapk.com/down?pn=com.coolapk.market&id=NDU5OQ&h=46bb9d98&from=from-web
10 | 炉石传说,https://dl.coolapk.com/down?pn=com.coolapk.market&id=NDU5OQ&h=46bb9d98&from=from-web
11 | 天天爱消除（花好月圆版）,https://dl.coolapk.com/down?pn=com.coolapk.market&id=NDU5OQ&h=46bb9d98&from=from-web
12 | 英雄杀,https://dl.coolapk.com/down?pn=com.coolapk.market&id=NDU5OQ&h=46bb9d98&from=from-web
13 | 雷霆战机-- 超人气飞行射击类手游,https://dl.coolapk.com/down?pn=com.coolapk.market&id=NDU5OQ&h=46bb9d98&from=from-web
14 | 王者营地,https://dl.coolapk.com/down?pn=com.coolapk.market&id=NDU5OQ&h=46bb9d98&from=from-web
15 | 小鸡模拟器,https://dl.coolapk.com/down?pn=com.coolapk.market&id=NDU5OQ&h=46bb9d98&from=from-web
16 | 阴阳师网易官方版,https://dl.coolapk.com/down?pn=com.coolapk.market&id=NDU5OQ&h=46bb9d98&from=from-web
17 | 保卫萝卜1,https://dl.coolapk.com/down?pn=com.coolapk.market&id=NDU5OQ&h=46bb9d98&from=from-web
18 | 穿越火线-枪战王者（国民枪战手游）,https://dl.coolapk.com/down?pn=com.coolapk.market&id=NDU5OQ&h=46bb9d98&from=from-web
19 | 球球大作战,https://dl.coolapk.com/down?pn=com.coolapk.market&id=NDU5OQ&h=46bb9d98&from=from-web
20 | QQ飞车手游,https://dl.coolapk.com/down?pn=com.coolapk.market&id=NDU5OQ&h=46bb9d98&from=from-web
21 | 多玩我的世界盒子,https://dl.coolapk.com/down?pn=com.coolapk.market&id=NDU5OQ&h=46bb9d98&from=from-web
22 | 


--------------------------------------------------------------------------------
/Spider Study/0x10kuanShopApp_spider/kuanShopApp_spider.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | Created on Fri Sep 25 04:26:09 2020
  4 | 酷安应用商店应用数据爬取
  5 | @author: luohenyueji
  6 | """
  7 | 
  8 | import requests
  9 | import queue
 10 | import threading
 11 | import re
 12 | from lxml import etree
 13 | import csv
 14 | from copy import deepcopy
 15 | 
 16 | 
 17 | class KuAn(object):
 18 | 
 19 |     def __init__(self, category, page):
 20 |         if category not in ['apk', 'game']:
 21 |             raise ValueError('category参数不在范围内')
 22 |         # 类别
 23 |         self.category = category
 24 |         self.page = page
 25 |         self.header = {
 26 |             'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.88 Safari/537.36'}
 27 |         # 写入csv头
 28 |         self.csv_header = ['应用名称', '下载链接']
 29 |         with open('{}.csv'.format(self.category), 'w', newline='', encoding='utf-8-sig') as f:
 30 |             csv_file = csv.writer(f)
 31 |             csv_file.writerow(self.csv_header)
 32 | 
 33 |         # url
 34 |         self.url = 'https://www.coolapk.com'
 35 |         self.base_url = 'https://www.coolapk.com/{}'.format(category)
 36 | 
 37 |         # 队列
 38 |         # 要爬取的网页
 39 |         self.page_url_queue = queue.Queue()
 40 |         # 单个应用网页地址
 41 |         self.detail_url_queue = queue.Queue()
 42 |         self.save_queue = queue.Queue()
 43 | 
 44 |     # ----- 获得单个应用的页面地址
 45 |     def get_detail_url_fun(self):
 46 |         while True:
 47 |             # 取出页面
 48 |             page_url = self.page_url_queue.get()
 49 |             req = requests.get(url=page_url, headers=self.header)
 50 |             if req.status_code == 200:
 51 |                 req.encoding = req.apparent_encoding
 52 |                 html = etree.HTML(req.text)
 53 |                 # 获得链接
 54 |                 if self.category == 'apk':
 55 |                     path = html.xpath('//*[@class="app_left_list"]/a/@href')
 56 |                 elif self.category == 'game':
 57 |                     path = html.xpath('//*[@class="game_left_three"]/a/@href')
 58 |                 for _ in path:
 59 |                     # 单个应用网页地址
 60 |                     detail_url = self.url + _
 61 |                     print('正在获取详情链接:', detail_url)
 62 |                     # 保存数据
 63 |                     self.detail_url_queue.put(deepcopy(detail_url))
 64 |             # 告诉 page_url_queue.join()任务完成
 65 |             # 参考 https://blog.csdn.net/qq_43577241/article/details/104442854
 66 |             self.page_url_queue.task_done()
 67 | 
 68 |             if self.page_url_queue.empty():
 69 |                 break
 70 | 
 71 |     # ----- 获得单个应用的下载地址
 72 |     def get_download_url_fun(self):
 73 |         while True:
 74 |             detail_url = self.detail_url_queue.get()
 75 |             req = requests.get(url=detail_url, headers=self.header)
 76 |             if req.status_code == 200:
 77 |                 req.encoding = 'utf-8'
 78 |                 # 下载链接获取需要仔细寻找，可能无法直接下载
 79 |                 url_reg = "'(.*?)&from=from-web"
 80 |                 name_reg = '<p class="detail_app_title">(.*?)<'
 81 |                 # 获取下载链接
 82 |                 download_url = re.findall(url_reg, req.text)[0] + '&from=from-web'
 83 |                 # 获取应用名字
 84 |                 name = re.findall(name_reg, req.text)[0]
 85 | 
 86 |                 data = {'name': name, 'url': download_url}
 87 |                 print('获取到数据:', data)
 88 |                 self.save_queue.put(data)
 89 |             self.detail_url_queue.task_done()
 90 | 
 91 |     # ----- 保存数据
 92 |     def save_data_fun(self):
 93 |         while True:
 94 |             data = self.save_queue.get()
 95 |             name = data.get('name')
 96 |             url = data.get('url')
 97 |             with open('{}.csv'.format(self.category), 'a+', newline='', encoding='utf-8-sig') as f:
 98 |                 csv_file = csv.writer(f)
 99 |                 csv_file.writerow([name, url])
100 |             self.save_queue.task_done()
101 | 
102 |     def run(self):
103 |         for _ in range(1, self.page + 1):
104 |             # 设定网页
105 |             page_url = self.base_url + '?p={}'.format(_)
106 |             print('下发页面url', page_url)
107 |             # 要爬取的网页
108 |             self.page_url_queue.put(page_url)
109 | 
110 |         self.get_detail_url_fun()
111 |         thread_list = []
112 |         # 两个线程获得单个应用的页面地址
113 |         for _ in range(2):
114 |             get_detail_url = threading.Thread(target=self.get_detail_url_fun)
115 |             thread_list.append(get_detail_url)
116 | 
117 |         # 五个线程获得单个应用的下载地址
118 |         for _ in range(5):
119 |             get_download_url = threading.Thread(target=self.get_download_url_fun)
120 |             thread_list.append(get_download_url)
121 | 
122 |         # 两个线程保存单个应用的下载地址
123 |         for _ in range(2):
124 |             save_data = threading.Thread(target=self.save_data_fun)
125 |             thread_list.append(save_data)
126 | 
127 |         for t in thread_list:
128 |             # 设置为守护进程 主进程中的代码执行完毕之后，子进程自动结束
129 |             t.setDaemon(True)
130 |             t.start()
131 |         for q in [self.page_url_queue, self.detail_url_queue, self.save_queue]:
132 |             # 直到 queue中的数据均被删除或者处理
133 |             # 参考 https://blog.csdn.net/dashoumeixi/article/details/80946509
134 |             q.join()
135 | 
136 |         print('爬取完成，结束')
137 | 
138 | 
139 | if __name__ == '__main__':
140 |     KuAn(category='game', page=2).run()
141 | 


--------------------------------------------------------------------------------
/Spider Study/0x10kuanShopApp_spider/ref.md:
--------------------------------------------------------------------------------
1 | # 参考
2 | + https://github.com/DropsDevopsOrg/ECommerceCrawlers/tree/master/OthertCrawler/0x16kuanappshop
3 | + https://blog.csdn.net/qq_43577241/article/details/104442854
4 | + https://blog.csdn.net/dashoumeixi/article/details/80946509
5 | 


--------------------------------------------------------------------------------
/Spider Study/0x11bingioliu_spider/bingioliu_spider.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | Created on Fri Sep 25 05:58:50 2020
 4 | bingioliu网站必应每日高清壁纸爬取
 5 | @author: luohenyueji
 6 | """
 7 | import requests
 8 | from lxml import etree
 9 | from fake_useragent import UserAgent
10 | import os
11 | import time
12 | 
13 | 
14 | # ----- 获得网页
15 | def get_one_page(page_start):
16 |     print('当前爬取第{}页'.format(i))
17 |     # 这里下载排行榜的图片
18 |     # url = 'https://bing.ioliu.cn/?p={}'.format(page_start)
19 |     url = 'https://bing.ioliu.cn/ranking?p={}'.format(page_start)
20 |     headers = {'User-Agent': UserAgent().random}
21 |     res = requests.get(url, headers=headers, verify=False)
22 |     if res.status_code == 200:
23 |         return res.text
24 |     else:
25 |         return None
26 | 
27 | 
28 | # ----- 解析网页与图片下载
29 | def parse_one_page(html, saveDir='./save'):
30 |     # 生成目录
31 |     os.makedirs(saveDir, exist_ok=True)
32 | 
33 |     parseHtml = etree.HTML(html)
34 |     picList = parseHtml.xpath('//img/@src')
35 |     # 遍历图片链接
36 |     for pic in picList:
37 |         try:
38 |             # http://h1.ioliu.cn/bing/SantoriniAerial_ZH-CN9367767863_640x480.jpg?imageslim
39 |             # 更换为1920x1080分辨率图片
40 |             picUrl = pic.split('_640')[0] + '_1920x1080.jpg'
41 |             # 图片名字
42 |             picName = pic.split('bing/')[-1].split('_')[0] + '.jpg'
43 |             headers = {'User-Agent': UserAgent().random}
44 |             picRes = requests.get(picUrl, headers=headers)
45 |             # 保存图片
46 |             with open(os.path.join(saveDir, picName), 'wb') as f:
47 |                 f.write(picRes.content)
48 | 
49 |         except Exception as e:
50 |             print(pic, e)
51 |             continue
52 | 
53 | 
54 | if __name__ == '__main__':
55 |     for i in range(1, 12):
56 |         # 分页爬取
57 |         html = get_one_page(i)
58 |         parse_one_page(html, saveDir='./save')
59 | 
60 |         # 由于是个人网站，建议分段延时爬取
61 |         time.sleep(1)
62 | 


--------------------------------------------------------------------------------
/Spider Study/0x11bingioliu_spider/ref.md:
--------------------------------------------------------------------------------
1 | # 参考
2 | + https://github.com/DropsDevopsOrg/ECommerceCrawlers/tree/master/OthertCrawler/0x18bing_img
3 | + https://github.com/xCss/bing


--------------------------------------------------------------------------------
/Spider Study/0x12shicimingjuwang_spider/ref.md:
--------------------------------------------------------------------------------
1 | # 参考
2 | + https://github.com/DropsDevopsOrg/ECommerceCrawlers/tree/master/ShicimingjuCrawleAndDisplay


--------------------------------------------------------------------------------
/Spider Study/0x12shicimingjuwang_spider/shicimingjuwang_spider.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | Created on Sun Sep 27 05:45:46 2020
  4 | 诗词名句网数据爬取，无数据保存
  5 | @author: luohenyueji
  6 | """
  7 | 
  8 | import queue
  9 | import re
 10 | import threading
 11 | 
 12 | import requests
 13 | from bs4 import BeautifulSoup
 14 | from fake_useragent import UserAgent
 15 | 
 16 | 
 17 | # ----- 浏览器头
 18 | def get_header():
 19 |     return {
 20 |         'User-Agent': UserAgent().random,
 21 |         'Connection': 'close'
 22 |     }
 23 | 
 24 | 
 25 | # ----- 多线程爬取，由于可能导致数据爬取不全，数据诗词总数约为20w+数据
 26 | class Shici(object):
 27 | 
 28 |     def __init__(self, thread=5):
 29 |         # 诗人网页
 30 |         self.poet_queue = queue.Queue()
 31 |         # 线程数
 32 |         self.thread = thread
 33 |         self.base_url = 'http://www.shicimingju.com'
 34 | 
 35 |     # ----- 查询每个诗人的网页
 36 |     def get_poet_url(self):
 37 |         # 查询每个诗人的网页
 38 |         # 具体作者查询https://www.shicimingju.com/category/all
 39 |         for i in range(4, 5):
 40 |             url = 'http://www.shicimingju.com/chaxun/zuozhe/{}.html'.format(i)
 41 | 
 42 |             self.poet_queue.put(url)
 43 | 
 44 |     # ----- 爬取信息
 45 |     def Spider(self):
 46 |         # 当诗人列表不为空
 47 |         while not self.poet_queue.empty():
 48 |             # 获得诗人诗词链接
 49 |             url = self.poet_queue.get()
 50 |             req = requests.get(url, headers=get_header())
 51 |             if req.status_code == 200:
 52 | 
 53 |                 req.encoding = 'utf-8'
 54 |                 soup = BeautifulSoup(req.text, 'lxml')
 55 |                 # 作者名
 56 |                 name = soup.h4.text
 57 |                 # 作者朝代
 58 |                 dynasty = soup.select(".aside_left .aside_val")[0].text.strip()
 59 |                 if len(dynasty) == 0:
 60 |                     dynasty = '未知'
 61 |                 # 生平介绍
 62 |                 introduction = soup.find(attrs={"class": "des"}).text.strip()
 63 |                 # 诗词数量
 64 |                 poem_num = soup.select(".aside_right .aside_val")[0].text.strip()[:-1]
 65 |                 # 当前作者每首诗的网址
 66 |                 poet_url_list = []
 67 |                 # 20表示每页诗词数量
 68 |                 for i in range(1, int(int(poem_num) / 20) + 2):
 69 |                     # 诗人id
 70 |                     poet_id = re.sub("\D", "", url)
 71 |                     # 每页诗词网页
 72 |                     poet_page_url = 'http://www.shicimingju.com/chaxun/zuozhe/{}_{}.html'.format(poet_id, i)
 73 |                     req1 = requests.get(url=poet_page_url, headers=get_header())
 74 |                     if req1.status_code == 200:
 75 |                         req1.encoding = 'utf-8'
 76 |                         list_html = BeautifulSoup(req1.text, 'lxml')
 77 |                         # 诗词具体链接
 78 |                         poet_url_list += list_html.find_all('h3')
 79 |                 # 获得作者每部诗词网页的链接
 80 |                 poet_url_list = map(lambda x: self.base_url + x.a['href'].strip(), poet_url_list)
 81 |                 for url in poet_url_list:
 82 |                     print(url)
 83 |                     # 获得具体诗词页的内容
 84 |                     req2 = requests.get(url, headers=get_header())
 85 |                     if req2.status_code == 200:
 86 |                         req2.encoding = 'utf-8'
 87 |                         poet_html = BeautifulSoup(req2.text, 'lxml')
 88 |                         # 诗词标题
 89 |                         title = poet_html.h1.text
 90 |                         # 内容
 91 |                         content = poet_html.find(class_='item_content')
 92 |                         # 解析
 93 |                         analysis = poet_html.find(class_='shangxi_content')
 94 |                         if not content:
 95 |                             content = ""
 96 |                         else:
 97 |                             content = content.text.strip()
 98 |                         if not analysis:
 99 |                             analysis = ''
100 |                         else:
101 |                             analysis = analysis.text.strip()
102 | 
103 |     def run(self):
104 |         self.get_poet_url()
105 |         thread_list = []
106 | 
107 |         # 爬取文章
108 |         for i in range(self.thread):
109 |             t = threading.Thread(target=self.Spider)
110 |             thread_list.append(t)
111 |         for t in thread_list:
112 |             t.setDaemon(True)
113 |             t.start()
114 |         for t in thread_list:
115 |             t.join()
116 |         # 检查是否还有未爬取的链接
117 |         self.Spider()
118 | 
119 | 
120 | if __name__ == '__main__':
121 |     Shici().run()
122 | 


--------------------------------------------------------------------------------
/Spider Study/README.md:
--------------------------------------------------------------------------------
 1 | # 爬虫学习笔记
 2 | 
 3 | **⚠ 本仓库所有关于爬虫的代码仅供学习研究使用，不得用于商业用途，如有侵权请联系作者删除！**
 4 | 
 5 | **项目难易程度不分先后，主要参考链接：**
 6 | + [Python3网络爬虫开发实战教程](https://cuiqingcai.com/5052.html)
 7 | + [ECommerceCrawlers](https://github.com/DropsDevopsOrg/ECommerceCrawlers)
 8 | ---
 9 | 
10 | ## [0x01maoyanTop100_spider](0x01maoyanTop100_spider)
11 | + 说明：爬取猫眼Top100电影的信息
12 | + 知识点：requests，正则
13 | + 难度：☆
14 | + 检查可用日期：20200917
15 | + 参考：[ref](0x01maoyanTop100_spider/ref.md)
16 | 
17 | ## [0x02weiboSingleUser_spider](0x02weiboSingleUser_spider)
18 | + 说明：爬取微博单个用户博文信息
19 | + 知识点：requests，ajxa，json，pyquery
20 | + 难度：☆
21 | + 检查可用日期：20200919
22 | + 参考：[ref](0x02weiboSingleUser_spider/ref.md)
23 | 
24 | ## [0x03toutiaoArticle_spider](0x03toutiaoArticle_spider)
25 | + 说明：爬取今日头条单篇文章图片
26 | + 知识点：requests，ajxa，图片下载，正则
27 | + 难度：☆
28 | + 检查可用日期：20200920
29 | + 参考：[ref](0x03toutiaoArticle_spider/ref.md)
30 | 
31 | ## [0x04doubanMovieExplore_spider](0x04doubanMovieExplore_spider)
32 | + 说明：爬取豆瓣选电影页面信息
33 | + 知识点：requests，json
34 | + 难度：☆
35 | + 检查可用日期：20200923
36 | + 参考：[ref](0x04doubanMovieExplore_spider/ref.md)
37 | 
38 | ## [0x05baotuVideo_spider](0x05baotuVideo_spider)
39 | + 说明：爬取包图网视频
40 | + 知识点：requests，视频下载，多线程，BeautifulSoup，lxml
41 | + 难度：☆
42 | + 检查可用日期：20200923
43 | + 参考：[ref](0x05baotuVideo_spider/ref.md)
44 | 
45 | ## [0x06quanjingCategoryImg_spider](0x06quanjingCategoryImg_spider)
46 | + 说明：爬取全景网分类图片
47 | + 知识点：requests，图片下载，多线程，BeautifulSoup
48 | + 难度：☆
49 | + 检查可用日期：20200924
50 | + 参考：[ref](0x06quanjingCategoryImg_spider/ref.md)
51 | 
52 | ## [0x07doubanMusicChart_spider](0x07doubanMusicChart_spider)
53 | + 说明：爬取豆瓣音乐排行榜
54 | + 知识点：requests，数据解析，BeautifulSoup
55 | + 难度：☆
56 | + 检查可用日期：20200924
57 | + 参考：[ref](0x07doubanMusicChart_spider/ref.md)
58 | 
59 | ## [0x08doubanSingleMovieComment_spider](0x08doubanSingleMovieComment_spider)
60 | + 说明：爬取与分析豆瓣单部电影影评
61 | + 知识点：requests，jieba，wordcloud，Snownlp，matplotlib，lxml 
62 | + 难度：☆☆
63 | + 检查可用日期：20200924
64 | + 参考：[ref](0x08doubanSingleMovieComment_spider/ref.md)
65 | 
66 | ## [0x09xiaomiShopCategoryApp_spider](0x09xiaomiShopCategoryApp_spider)
67 | + 说明：爬取小米应用商店分类应用数据
68 | + 知识点：requests
69 | + 难度：☆
70 | + 检查可用日期：20200925
71 | + 参考：[ref](0x09xiaomiShopCategoryApp_spider/ref.md)
72 | 
73 | ## [0x10kuanShopApp_spider](0x10kuanShopApp_spider)
74 | + 说明：爬取酷安应用商店应用数据
75 | + 知识点：requests，多线程，lxml，正则
76 | + 难度：☆☆
77 | + 检查可用日期：20200925
78 | + 参考：[ref](0x10kuanShopApp_spider/ref.md)
79 | 
80 | ## [0x11bingioliu_spider](0x11bingioliu_spider)
81 | + 说明：爬取bingioliu网站必应每日高清壁纸
82 | + 知识点：requests，lxml
83 | + 难度：☆
84 | + 检查可用日期：20200925
85 | + 参考：[ref](0x11bingioliu_spider/ref.md)
86 | 
87 | ## [0x12shicimingjuwang_spider](0x12shicimingjuwang_spider)
88 | + 说明：爬取诗词名句网数据
89 | + 知识点：requests，BeautifulSoup，多线程
90 | + 难度：☆
91 | + 检查可用日期：20200928
92 | + 参考：[ref](0x12shicimingjuwang_spider/ref.md)


--------------------------------------------------------------------------------