├── .gitattributes
├── .gitignore
├── Data-Processing
    ├── README.md
    ├── SuperCodetest.py
    ├── for_csv
    │   ├── README.md
    │   ├── csv_write.py
    │   ├── read_csv.py
    │   └── test.csv
    ├── for_docx
    │   ├── creat_docx.py
    │   ├── demo.docx
    │   └── read_docx.py
    ├── for_excel
    │   ├── README.md
    │   ├── excel使用技巧.md
    │   ├── read_excel.py
    │   └── test.xlsx
    ├── for_file
    │   ├── change_file_name.exe
    │   ├── change_file_name.py
    │   └── get_file_name.py
    ├── for_img
    │   ├── README.md
    │   ├── Tutorial.ipynb
    │   ├── detect_line.py
    │   └── lena.png
    ├── for_pdf
    │   ├── OCR
    │   │   ├── README.md
    │   │   ├── easy_ocr.py
    │   │   ├── pdf2ocr.py
    │   │   ├── tesseract_test.py
    │   │   └── transformer_ocr.py
    │   ├── QR_code
    │   │   ├── README.md
    │   │   ├── zbar_img.py
    │   │   └── zbar_pdf.py
    │   ├── README.md
    │   ├── Tools
    │   │   ├── README.md
    │   │   ├── pdf-rename.py
    │   │   ├── tax_judge.py
    │   │   └── test.ipynb
    │   ├── pdf2excel.py
    │   ├── pdf2img.py
    │   └── pdf_utils.ipynb
    ├── for_txt
    │   ├── 11.txt
    │   ├── README.md
    │   ├── add_content.py
    │   ├── example.txt
    │   ├── example1.txt
    │   ├── how_many_lines.py
    │   ├── read_data.py
    │   ├── read_numpy.py
    │   ├── read_txt.py
    │   ├── test.py
    │   └── write_txt.py
    ├── print
    │   └── print_in_1line.py
    └── remove_string_spaces.py
├── LICENSE
├── Messy
    ├── A.py
    ├── DataTransform.py
    ├── Graph.py
    ├── GraphAlgorithms.py
    ├── ListSearch.py
    ├── crop_morphology.py
    ├── labelImg.py
    └── list_remove.py
├── Python+Algorithm
    ├── Evolutionary-Algorithm
    │   ├── Match Phrase.py
    │   └── genetic_algorithm.py
    ├── Geometric
    │   ├── point_oblique_straight_point.py
    │   └── two_point_straight_point.py
    ├── Least-Squares
    │   ├── Least squares.py
    │   ├── README.md
    │   └── train_data.csv
    ├── Math
    │   ├── Kalman
    │   │   ├── Kalman_2D.py
    │   │   ├── Kalman_3D.py
    │   │   ├── README.md
    │   │   ├── kalamn_unc.py
    │   │   ├── kalman_1.py
    │   │   ├── my_kalman_carmove.py
    │   │   └── my_kalman_simple.py
    │   ├── gram_schmidt.py
    │   ├── math_base.py
    │   ├── matrix.ipynb
    │   ├── matrix.py
    │   └── pareto-front.py
    ├── Optimization-Algorithm
    │   ├── Adam.py
    │   ├── BGD.py
    │   ├── README.md
    │   ├── SGD.py
    │   ├── SGD_momentum.py
    │   └── test.py
    ├── Search-Algorithm
    │   ├── BFS.py
    │   ├── DFS.py
    │   ├── README.md
    │   └── fig1.png
    ├── Sorting-Algorithm
    │   ├── README.md
    │   ├── bubble_sort.py
    │   ├── counting_sort.py
    │   ├── insertion_sort.py
    │   ├── merge_sort.py
    │   ├── quick_sort.py
    │   ├── selection_sort.py
    │   └── sleep_sort.py
    ├── Uncategorized
    │   ├── pyramid.py
    │   └── xingxingdiandeng.py
    └── kalman.py
├── Python+Crawler
    ├── DoubanTop250.py
    ├── README.md
    ├── Web
    │   ├── README.md
    │   ├── fake_uragent.py
    │   ├── html+save.py
    │   ├── ip_get.py
    │   ├── ip_test.py
    │   ├── key_ua+ip.py
    │   ├── test.py
    │   ├── text
    │   │   ├── ip.txt
    │   │   └── user_agent.txt
    │   ├── urlib.py
    │   ├── view_ua+ip.py
    │   ├── view_ua.py
    │   ├── webpage_viewer.py
    │   └── 刷网页.py
    ├── caixukun.py
    ├── crawler1.py
    ├── crawler2.py
    ├── crawler3.py
    ├── debug.log
    ├── form_test.py
    ├── ip.txt
    ├── ip1.txt
    ├── sample1.py
    ├── selenium_first.py
    ├── spider-google.py
    ├── test.py
    ├── user_agent.txt
    └── 豆瓣最受欢迎的250部电影.xlsx
├── Python+HTML
    ├── README.md
    ├── test1.html
    ├── test2.html
    ├── test2.py
    ├── test3.html
    ├── test3.py
    ├── test4.html
    └── test4.py
├── Python+Media
    ├── 发邮件
    │   ├── 163.py
    │   ├── README.md
    │   ├── content.txt
    │   └── outlook.py
    └── 文字转音频
    │   ├── demo.pcm
    │   ├── new_use.py
    │   ├── pcm2wav.py
    │   ├── test_webtts.py
    │   ├── text.txt
    │   ├── text2audio.py
    │   ├── tts_ws_python3_demo.py
    │   └── use_old.py
├── Python+Opencv
    ├── README.md
    ├── opencv_draw.py
    ├── template_matching.py
    ├── 图像处理
    │   ├── binarization.py
    │   └── black-white.jpg
    ├── 基于颜色的物体追踪.py
    ├── 拾色器
    │   ├── 0.jpg
    │   ├── 1.jpg
    │   ├── 2.png
    │   ├── color_picker(取色器交互版).py
    │   └── color_picker(取色器无交互版).py
    ├── 相机
    │   └── camera_photo.py
    └── 颜色_圆_相关
    │   ├── 图片
    │       ├── color_filtering.py
    │       ├── color_list.py
    │       ├── detect_picture_color.py
    │       ├── detect_picture_color_circle.py
    │       ├── judge_color_center.py
    │       ├── judge_multi_color.py
    │       ├── judge_single_color.py
    │       └── multi_color_filtering.py
    │   └── 视频
    │       ├── color_list.py
    │       ├── detect_camera_color.py
    │       └── detect_camera_color_circle.py
├── Python+PC-Control
    ├── mouse_control.py
    ├── mouse_monitor.py
    └── moyu.py
├── Python+Piano
    └── read.py
├── Python+arXiv
    ├── README.md
    ├── arxiv.py
    ├── check_update.py
    ├── conf.json
    ├── conf_list.txt
    ├── file2md.py
    └── id2md.py
├── Pythonic-Standard
    ├── Config文件编写
    │   ├── Argparse使用指南.md
    │   ├── Config文件编写.md
    │   ├── YAML使用指南.md
    │   ├── config.py
    │   ├── config.yaml
    │   ├── use_argparse.py
    │   ├── use_omegaconf.py
    │   └── use_yaml.py
    ├── Partial使用指南.md
    ├── Pathlib 使用指南.md
    ├── Python下划线含义.md
    ├── README.md
    ├── decorator装饰器.md
    ├── import.md
    ├── multi-level
    │   ├── __init__.py
    │   ├── file1.py
    │   ├── file2.py
    │   ├── folder1
    │   │   ├── __init__.py
    │   │   ├── file11.py
    │   │   ├── file12.py
    │   │   ├── folder11
    │   │   │   ├── file111.py
    │   │   │   └── file112.py
    │   │   └── folder22
    │   │   │   ├── __init__.py
    │   │   │   └── file221.py
    │   ├── folder2
    │   │   └── file21.py
    │   └── set_env.sh
    ├── print 输出.md
    ├── tqdm使用指南.md
    ├── 切片.md
    ├── 参数 传参 可变参数.md
    ├── 基础使用.md
    ├── 字典的实用.md
    └── 类.md
└── README.md


/.gitattributes:
--------------------------------------------------------------------------------
1 | # Auto detect text files and perform LF normalization
2 | * text=auto


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | .vscode/settings.json
2 | .vscode
3 | Algorithm/Optimization-Algorithm/test.py
4 | **/__pycache__
5 | *.pyc
6 | .DS_Store
7 | 


--------------------------------------------------------------------------------
/Data-Processing/README.md:
--------------------------------------------------------------------------------
 1 | <h1 align="center">Data Processing</h1>
 2 | <div align="center">
 3 | 整理处理txt、csv、docx、pdf 等的高效方法。这里记录一些最基本的用法，其他详细见对应子文件夹。
 4 | 
 5 | </div>
 6 | 
 7 | ## 原则
 8 | 
 9 | `逗号`隔开的文件，尽量改为csv格式，因为处理csv格式有天然的优势
10 | 
11 | 可以直接由txt文件格式转为csv格式
12 | 
13 | 
14 | 
15 | ## 文件操作
16 | 
17 | 对路径进行操作，推荐使用 [Pathlib](https://docs.python.org/3/library/pathlib.html)。
18 | 
19 | ```python
20 | # 获取当前文件夹所有pdf的文件名
21 | 
22 | from pathlib import Path
23 | 
24 | root_dir = Path('./')
25 | pdf_list = sorted(root_dir.glob('*.pdf'))
26 | ```
27 | 
28 | 
29 | 
30 | 
31 | 
32 | 


--------------------------------------------------------------------------------
/Data-Processing/for_csv/README.md:
--------------------------------------------------------------------------------
 1 | # CSV
 2 | 
 3 | 
 4 | 
 5 | python 自带就有一个 csv 库
 6 | 
 7 | ```python
 8 | import csv
 9 | with open('some.csv', newline='') as f:
10 |     reader = csv.reader(f)
11 |     for row in reader:
12 |         print(row)
13 | ```
14 | 
15 | 
16 | 
17 | ```python
18 | import csv
19 | for row in csv.reader(['one,two,three']):
20 |     print(row)
21 | ```
22 | 
23 | 
24 | 
25 | 也可以借助 panda
26 | 
27 | ```python
28 | import pandas
29 | 
30 | df = pandas.read_csv('test.csv', encoding='utf-8')
31 | print(df['Price'][0])
32 | ```
33 | 
34 | 
35 | 
36 | 
37 | 
38 | 
39 | 
40 | 
41 | 
42 | 
43 | 
44 | 
45 | 
46 | 
47 | 
48 | 
49 | 
50 | 


--------------------------------------------------------------------------------
/Data-Processing/for_csv/csv_write.py:
--------------------------------------------------------------------------------
 1 | #加入需要的库
 2 | import csv
 3 | import os
 4 | 
 5 | #初始化工作
 6 | path='competition/train'
 7 | csvFile = open("2.csv", 'a',newline='')
 8 | writer = csv.writer(csvFile)	
 9 | 
10 | 
11 | #开始读写文件
12 | files = os.listdir(path)
13 | for file in files:
14 | 	command='tesseract.exe '+path+'/'+file+' output -l chi_sim+chi_sim1 --psm 7'
15 | 	os.popen(command).read()
16 | 	f = open('output.txt', 'r', encoding="utf8")    # 打开文件
17 | 	data=''
18 | 	for line in f.readlines():   
19 | 		data = data + line.strip()
20 | 	data = data.replace(' ', '')
21 | 	csvFile = open("2.csv", 'a', newline='')	
22 | 	add_info = [file, data]	
23 | 	writer.writerow(add_info)
24 | 	f.close()
25 | 
26 | csvFile.close()


--------------------------------------------------------------------------------
/Data-Processing/for_csv/read_csv.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | 读取csv，csv示例如下：
 3 | 
 4 | Symbol,Price,Date,Time,Change,Volume
 5 | "AA",39.48,"6/11/2007","9:36am",-0.18,181800
 6 | "AIG",71.38,"6/11/2007","9:36am",-0.15,195500
 7 | "AXP",62.58,"6/11/2007","9:36am",-0.46,935000
 8 | "BA",98.31,"6/11/2007","9:36am",+0.12,104800
 9 | "C",53.08,"6/11/2007","9:36am",-0.25,360900
10 | "CAT",78.29,"6/11/2007","9:36am",-0.23,225400
11 | 
12 | 数据存储格式为 DataFrame
13 | '''
14 | 
15 | import pandas
16 | 
17 | df = pandas.read_csv('test.csv', encoding='utf-8')
18 | 
19 | print(df['Price'][0])


--------------------------------------------------------------------------------
/Data-Processing/for_csv/test.csv:
--------------------------------------------------------------------------------
1 | Symbol,Price,Date,Time,Change,Volume
2 | "AA",39.48,"6/11/2007","9:36am",-0.18,181800
3 | "AIG",71.38,"6/11/2007","9:36am",-0.15,195500
4 | "AXP",62.58,"6/11/2007","9:36am",-0.46,935000
5 | "BA",98.31,"6/11/2007","9:36am",+0.12,104800
6 | "C",53.08,"6/11/2007","9:36am",-0.25,360900
7 | "CAT",78.29,"6/11/2007","9:36am",-0.23,225400


--------------------------------------------------------------------------------
/Data-Processing/for_docx/creat_docx.py:
--------------------------------------------------------------------------------
 1 | """Reference website
 2 | https://python-docx.readthedocs.io/en/latest/
 3 | """
 4 | 
 5 | from docx import Document
 6 | from docx.shared import Inches
 7 | 
 8 | document = Document()
 9 | 
10 | document.add_heading('Document Title', 0)
11 | 
12 | p = document.add_paragraph('A plain paragraph having some ')
13 | p.add_run('bold').bold = True
14 | p.add_run(' and some ')
15 | p.add_run('italic.').italic = True
16 | 
17 | document.add_heading('Heading, level 1', level=1)
18 | document.add_paragraph('Intense quote', style='Intense Quote')
19 | 
20 | document.add_paragraph(
21 |     'first item in unordered list', style='List Bullet'
22 | )
23 | document.add_paragraph(
24 |     'first item in ordered list', style='List Number'
25 | )
26 | 
27 | # document.add_picture('monty-truth.png', width=Inches(1.25))
28 | 
29 | records = (
30 |     (3, '101', 'Spam'),
31 |     (7, '422', 'Eggs'),
32 |     (4, '631', 'Spam, spam, eggs, and spam')
33 | )
34 | 
35 | table = document.add_table(rows=1, cols=3)
36 | hdr_cells = table.rows[0].cells
37 | hdr_cells[0].text = 'Qty'
38 | hdr_cells[1].text = 'Id'
39 | hdr_cells[2].text = 'Desc'
40 | for qty, id, desc in records:
41 |     row_cells = table.add_row().cells
42 |     row_cells[0].text = str(qty)
43 |     row_cells[1].text = id
44 |     row_cells[2].text = desc
45 | 
46 | document.add_page_break()
47 | 
48 | document.save('demo.docx')


--------------------------------------------------------------------------------
/Data-Processing/for_docx/demo.docx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yzy1996/Python-Code/2abcaa6fbfa4a84aaffdf10d7bcc6b12649dd221/Data-Processing/for_docx/demo.docx


--------------------------------------------------------------------------------
/Data-Processing/for_docx/read_docx.py:
--------------------------------------------------------------------------------
1 | from docx import Document
2 | 
3 | file = Document("demo.docx")
4 | 
5 | for para in file.paragraphs:
6 |     print(para.text)


--------------------------------------------------------------------------------
/Data-Processing/for_excel/README.md:
--------------------------------------------------------------------------------
1 | ## 用什么库读取
2 | 
3 | panda


--------------------------------------------------------------------------------
/Data-Processing/for_excel/excel使用技巧.md:
--------------------------------------------------------------------------------
1 | =IF(ISERROR(VLOOKUP(C3,Sheet1!$A$1:$B$200,2,FALSE)),"",VALUE(VLOOKUP(C3,Sheet1!$A$1:$B$200,2,FALSE)))
2 | 
3 | 
4 | 
5 | 


--------------------------------------------------------------------------------
/Data-Processing/for_excel/read_excel.py:
--------------------------------------------------------------------------------
1 | import pandas as pd
2 | 
3 | data = pd.read_excel('test.xlsx')
4 | 
5 | 


--------------------------------------------------------------------------------
/Data-Processing/for_excel/test.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yzy1996/Python-Code/2abcaa6fbfa4a84aaffdf10d7bcc6b12649dd221/Data-Processing/for_excel/test.xlsx


--------------------------------------------------------------------------------
/Data-Processing/for_file/change_file_name.exe:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yzy1996/Python-Code/2abcaa6fbfa4a84aaffdf10d7bcc6b12649dd221/Data-Processing/for_file/change_file_name.exe


--------------------------------------------------------------------------------
/Data-Processing/for_file/change_file_name.py:
--------------------------------------------------------------------------------
1 | import os
2 | 
3 | file_list = os.listdir()
4 | 
5 | for file in file_list:
6 |     os.rename(file, file.replace(' ', '-'))
7 | 


--------------------------------------------------------------------------------
/Data-Processing/for_file/get_file_name.py:
--------------------------------------------------------------------------------
1 | # 获取文件名，且不要后缀名
2 | 
3 | import os
4 | 
5 | path = r'demo.txt'
6 | name = os.path.splitext(os.path.basename(path))[0]  
7 | print(name)


--------------------------------------------------------------------------------
/Data-Processing/for_img/README.md:
--------------------------------------------------------------------------------
 1 | # 一文解释 图像读取
 2 | 
 3 | > 希望通过本文帮助你了解一些基础用法，本质数据类型。常用的库包括了`Pillow`,`OpenCV`,`Matplotlib`,`torchvision`.
 4 | 
 5 | 
 6 | 
 7 | 
 8 | 
 9 | ## Pillow
10 | 
11 | https://pillow.readthedocs.io/en/stable/handbook/tutorial.html
12 | 
13 | ```python
14 | from PIL import Image
15 | 
16 | # read
17 | pil_img = Image.open("your_image.jpg")  # RGB
18 | 
19 | # grayscale
20 | pil_img = Image.open("your_image.jpg").convert("L")
21 | 
22 | # save
23 | pil_img.save("new_image.jpg")
24 | 
25 | # save a JPEG image with specific quality
26 | pil_img.save("new_image.jpg", quality=95)
27 | ```
28 | 
29 | 
30 | 
31 | ```python
32 | # Pillow image to OpenCV image
33 | 
34 | cv2_img = np.array(pil_img)
35 | cv2_img = cv2.cvtColor(cv2_img, cv2.COLOR_RGB2BGR)
36 | ```
37 | 
38 | 
39 | 
40 | ## Opencv
41 | 
42 | >  OpenCV images are actually NumPy arrays
43 | 
44 | ```python
45 | import cv2
46 | 
47 | img = cv2.imread("your_image.jpg")  # BGR
48 | 
49 | img = cv2.imread("your_image.jpg", cv2.IMREAD_GRAYSCALE)
50 | 
51 | cv2.imwrite("new_image.jpg", img)
52 | 
53 | cv2.imwrite("new_image.jpg", img, [int(cv2.IMWRITE_JPEG_QUALITY), 95])
54 | 
55 | 
56 | cv.imshow(img)
57 | cv.waitKey(0)
58 | ```
59 | 
60 | ```python
61 | # OpenCV image to Pillow image
62 | 
63 | cv2_img = cv2.cvtColor(cv2_img, cv2.COLOR_BGR2RGB)
64 | pil_img = Image.fromarray(cv2_img
65 | ```
66 | 
67 | 
68 | 
69 | ## Matplotlib
70 | 
71 | 学习资料
72 | 
73 | https://github.com/rougier/matplotlib-tutorial
74 | 
75 | https://github.com/matplotlib/cheatsheets
76 | 
77 | https://github.com/matplotlib/cheatsheets
78 | 
79 | ```python
80 | 
81 | plt.show()
82 | ```
83 | 
84 | 
85 | 
86 | ref: https://medium.com/analytics-vidhya/the-ultimate-handbook-for-opencv-pillow-72b7eff77cd7
87 | 
88 | 
89 | 
90 | 


--------------------------------------------------------------------------------
/Data-Processing/for_img/Tutorial.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "\n",
  8 |     "\n",
  9 |     "%matplotlib notebook jupyter行内形成交互式的图表\n",
 10 |     "%matplotlib mac内形成交互式的图表，即会弹出图像窗口\n",
 11 |     "%matplotlib inline 可以显示图像，但无交互功能；同时方便导出为markdown的时候有图片\n",
 12 |     "the backend needs to be set in the ipython_config.py, not the jupyter_notebook_config.py."
 13 |    ]
 14 |   },
 15 |   {
 16 |    "cell_type": "code",
 17 |    "execution_count": 8,
 18 |    "metadata": {},
 19 |    "outputs": [],
 20 |    "source": [
 21 |     "%matplotlib inline\n",
 22 |     "import matplotlib.pyplot as plt"
 23 |    ]
 24 |   },
 25 |   {
 26 |    "cell_type": "markdown",
 27 |    "metadata": {},
 28 |    "source": [
 29 |     "## Pillow\n",
 30 |     "\n",
 31 |     "`pip install Pillow`"
 32 |    ]
 33 |   },
 34 |   {
 35 |    "cell_type": "code",
 36 |    "execution_count": 10,
 37 |    "metadata": {},
 38 |    "outputs": [
 39 |     {
 40 |      "name": "stdout",
 41 |      "output_type": "stream",
 42 |      "text": [
 43 |       "PNG (512, 512) RGB\n"
 44 |      ]
 45 |     }
 46 |    ],
 47 |    "source": [
 48 |     "from PIL import Image\n",
 49 |     "\n",
 50 |     "img = Image.open(\"lena.png\")\n",
 51 |     "\n",
 52 |     "print(img.format, img.size, img.mode)\n",
 53 |     "\n",
 54 |     "img.show()"
 55 |    ]
 56 |   },
 57 |   {
 58 |    "cell_type": "markdown",
 59 |    "metadata": {},
 60 |    "source": [
 61 |     "## Opencv"
 62 |    ]
 63 |   },
 64 |   {
 65 |    "cell_type": "code",
 66 |    "execution_count": null,
 67 |    "metadata": {},
 68 |    "outputs": [],
 69 |    "source": [
 70 |     "import cv2\n",
 71 |     "\n",
 72 |     "cv2_img = cv2.imread(\"lena.png\")  # BGR"
 73 |    ]
 74 |   },
 75 |   {
 76 |    "cell_type": "code",
 77 |    "execution_count": null,
 78 |    "metadata": {},
 79 |    "outputs": [],
 80 |    "source": []
 81 |   }
 82 |  ],
 83 |  "metadata": {
 84 |   "kernelspec": {
 85 |    "display_name": "Python 3.9.12 ('base')",
 86 |    "language": "python",
 87 |    "name": "python3"
 88 |   },
 89 |   "language_info": {
 90 |    "codemirror_mode": {
 91 |     "name": "ipython",
 92 |     "version": 3
 93 |    },
 94 |    "file_extension": ".py",
 95 |    "mimetype": "text/x-python",
 96 |    "name": "python",
 97 |    "nbconvert_exporter": "python",
 98 |    "pygments_lexer": "ipython3",
 99 |    "version": "3.9.12"
100 |   },
101 |   "orig_nbformat": 4,
102 |   "vscode": {
103 |    "interpreter": {
104 |     "hash": "40d3a090f54c6569ab1632332b64b2c03c39dcf918b08424e98f38b5ae0af88f"
105 |    }
106 |   }
107 |  },
108 |  "nbformat": 4,
109 |  "nbformat_minor": 2
110 | }
111 | 


--------------------------------------------------------------------------------
/Data-Processing/for_img/lena.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yzy1996/Python-Code/2abcaa6fbfa4a84aaffdf10d7bcc6b12649dd221/Data-Processing/for_img/lena.png


--------------------------------------------------------------------------------
/Data-Processing/for_pdf/OCR/README.md:
--------------------------------------------------------------------------------
 1 | ## PDF OCR 识别
 2 | 
 3 | 使用python标准库做OCR识别。目前主流的库有这些，收集了Github上的Star数以及最新的更新日期。
 4 | 
 5 | |                             名称                             |                 更新时间（不带年份则表今年）                 |
 6 | | :----------------------------------------------------------: | :----------------------------------------------------------: |
 7 | | [tesseract](https://github.com/tesseract-ocr/tesseract) ![](https://img.shields.io/github/stars/tesseract-ocr/tesseract?style=social) | ![](https://img.shields.io/github/last-commit/tesseract-ocr/tesseract) |
 8 | | [EasyOcr](https://github.com/JaidedAI/EasyOCR) ![](https://img.shields.io/github/stars/JaidedAI/EasyOCR?style=social) | ![](https://img.shields.io/github/last-commit/JaidedAI/EasyOCR) |
 9 | | [OCRmyPDF](https://github.com/ocrmypdf/OCRmyPDF) ![](https://img.shields.io/github/stars/ocrmypdf/OCRmyPDF?style=social) | ![](https://img.shields.io/github/last-commit/ocrmypdf/OCRmyPDF) |
10 | | [pytesseract](https://github.com/madmaze/pytesseract) ![](https://img.shields.io/github/stars/madmaze/pytesseract?style=social) | ![](https://img.shields.io/github/last-commit/madmaze/pytesseract) |
11 | 
12 | 
13 | 
14 | 
15 | 
16 | 使用 [ocrmypdf](https://ocrmypdf.readthedocs.io/en/latest/cookbook.html) ，根据[官方教程](https://ocrmypdf.readthedocs.io/en/latest/installation.html)安装。Linux系统（包含MacOS，WSL）会简单一点，Windows复杂一点。
17 | 
18 | 本质上使用的是谷歌的tesseract工具，同时也有一个支持python的 https://github.com/madmaze/pytesseract
19 | 
20 | 不过上述默认都不支持手写字，在学术上基于Transformer的技术也出现了，例如 https://github.com/microsoft/unilm/tree/master/trocr，https://github.com/Breta01/handwriting-ocr
21 | 
22 | 使用方法：直接在命令行执行
23 | 
24 | ```shell
25 | ocrmypdf --pages 1 --optimize 0 --output-type none --sidecar output.txt input.pdf -
26 | ```
27 | 
28 | > --pages 1 是仅处理 pdf 的第一页，--optimize 0 禁用页面优化，--output-type none是不输出额外的一个pdf（需要配合最后的 -）
29 | >
30 | > 还可以加上 --quiet 不让打印过程
31 | 
32 | 会在本地保存一个 output.txt 里面存有识别的文字。
33 | 
34 | > 默认的是英文，可以替换为其他语言
35 | 
36 | 
37 | 
38 | 如果想要写入python，注意如果简单加进去会报一个错误 `python stdout is connected to a terminal. Please redirect stdout to a file.`下面的程序中已经修复了。
39 | 
40 | ```python
41 | import os
42 | import subprocess
43 | import shlex
44 | 
45 | file = 'test.pdf'
46 | command = f"ocrmypdf --deskew --rotate-pages --rotate-pages-threshold 5 --output-type none --sidecar ocr_output.txt {file} -"
47 | command_args = shlex.split(command)
48 | 
49 | with open('log', "w") as outfile:
50 |     subprocess.run(command_args, stdout=outfile)
51 | os.remove('log')
52 | ```
53 | 
54 | 
55 | 
56 | 
57 | 
58 | 
59 | 
60 | 


--------------------------------------------------------------------------------
/Data-Processing/for_pdf/OCR/easy_ocr.py:
--------------------------------------------------------------------------------
1 | 
2 | import easyocr
3 | 
4 | reader = easyocr.Reader(['ch_sim', 'en'], gpu=False) # this needs to run only once to load the model into memory
5 | result = reader.readtext('test.jpg', detail = 0)
6 | 
7 | # 去掉空格
8 | print(result)


--------------------------------------------------------------------------------
/Data-Processing/for_pdf/OCR/pdf2ocr.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import subprocess
 3 | import shlex
 4 | 
 5 | file = '2.pdf'
 6 | command = f"ocrmypdf --deskew --rotate-pages --rotate-pages-threshold 5 --output-type none --sidecar ocr_output.txt {file} -"
 7 | command_args = shlex.split(command)
 8 | 
 9 | with open('log', "w") as outfile:
10 |     subprocess.run(command_args, stdout=outfile)
11 | os.remove('log')


--------------------------------------------------------------------------------
/Data-Processing/for_pdf/OCR/tesseract_test.py:
--------------------------------------------------------------------------------
 1 | 
 2 | import pytesseract
 3 | from pikepdf import Pdf, PdfImage
 4 | 
 5 | with Pdf.open('wrong2.pdf') as pdf:
 6 |     page = pdf.pages[-1]
 7 |     keyimage = list(page.images.keys())
 8 |     rawimage = page.images[keyimage[0]]
 9 |     pdfimage = PdfImage(rawimage)
10 | 
11 |     img = pdfimage.as_pil_image()
12 |     w, h = img.size
13 |     # img = img.crop((0, 0, w/2, h/3))
14 | 
15 | print(pytesseract.image_to_string(img, lang='chi_sim'))
16 | # print(pytesseract.image_to_osd(img))


--------------------------------------------------------------------------------
/Data-Processing/for_pdf/OCR/transformer_ocr.py:
--------------------------------------------------------------------------------
 1 | from transformers import TrOCRProcessor, VisionEncoderDecoderModel
 2 | from PIL import Image
 3 | import requests
 4 | 
 5 | from pikepdf import Pdf, PdfImage
 6 | 
 7 | # with Pdf.open('test.pdf') as pdf:
 8 | #     page = pdf.pages[0]
 9 | #     keyimage = list(page.images.keys())
10 | #     rawimage = page.images[keyimage[0]]
11 | #     pdfimage = PdfImage(rawimage)
12 | 
13 | #     img = pdfimage.as_pil_image().convert("RGB")
14 | 
15 | # # load image from the IAM database
16 | # url = 'https://fki.tic.heia-fr.ch/static/img/a01-122-02-00.jpg'
17 | img = Image.open('test.png').convert("RGB")
18 | 
19 | processor = TrOCRProcessor.from_pretrained('microsoft/trocr-base-handwritten')
20 | model = VisionEncoderDecoderModel.from_pretrained('microsoft/trocr-base-handwritten')
21 | pixel_values = processor(images=img, return_tensors="pt").pixel_values
22 | 
23 | generated_ids = model.generate(pixel_values)
24 | generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
25 | 
26 | print(generated_text)


--------------------------------------------------------------------------------
/Data-Processing/for_pdf/QR_code/README.md:
--------------------------------------------------------------------------------
 1 | # 识别二维码
 2 | 
 3 | 
 4 | 
 5 | > 写在这里，需要先将pdf转换成相应的图像格式
 6 | 
 7 | 使用 [pyzbar](https://pypi.org/project/pyzbar/) 来帮助识别二维码，是经典的 **zbar** 在python3上的支持。
 8 | 
 9 | 根据官方安装步骤，针对mac的错误`ImportError: Unable to find zbar shared library`需要额外的:
10 | 
11 | ```shell
12 | mkdir ~/lib
13 | ln -s $(brew --prefix zbar)/lib/libzbar.dylib ~/lib/libzbar.dylib
14 | ```
15 | 
16 | 
17 | 
18 | 基本用法（搬运自主页）
19 | 
20 | ```python
21 | # 使用 PIL.Image 类型
22 | from pyzbar.pyzbar import decode
23 | from PIL import Image
24 | 
25 | QR_info = decode(Image.open('name.png'))
26 | QR_data = decoded_data[0].data.decode()
27 | ```
28 | 
29 | ```python
30 | # 使用 cv2 numpy.ndarray类型
31 | from pyzbar.pyzbar import decode
32 | import cv2
33 | 
34 | QR_info = decode(Image.open('name.png'))
35 | QR_data = decoded_data[0].data.decode()
36 | ```
37 | 
38 | 
39 | 
40 | 和pdf搭配起来的用法
41 | 
42 | ```python
43 | from pikepdf import Pdf, PdfImage
44 | from pyzbar.pyzbar import decode
45 | 
46 | with Pdf.open('name.pdf') as pdf:
47 |   page = pdf.pages[0]
48 |   keyimage = list(page.images.keys())
49 |   rawimage = page.images[keyimage[0]]
50 |   pdfimage = PdfImage(rawimage)
51 | 
52 | 	QR_info = decode(pdfimage.as_pil_image())
53 |   if decoded_data:
54 |       QR_data = decoded_data[0].data.decode()
55 | ```
56 | 
57 | 
58 | 
59 | 
60 | 
61 | 
62 | 
63 | 


--------------------------------------------------------------------------------
/Data-Processing/for_pdf/QR_code/zbar_img.py:
--------------------------------------------------------------------------------
 1 | from pyzbar.pyzbar import decode
 2 | from PIL import Image
 3 | from pikepdf import Pdf, PdfImage
 4 | import cv2
 5 | 
 6 | # pdf = Pdf.open('12.pdf')
 7 | # page = pdf.pages[0]
 8 | # keyimage = list(page.images.keys())
 9 | # rawimage = page.images[keyimage[0]]
10 | # pdfimage = PdfImage(rawimage)
11 | 
12 | # img = pdfimage.as_pil_image()
13 | # w, h = img.size
14 | # img = img.crop((0, h/2, w, h))
15 | from PIL import Image,ImageEnhance
16 | img1 = Image.open('111.png')
17 | img = cv2.imread('111.png')
18 | 
19 | decoded_data = decode(img)
20 | 
21 | for txt in decoded_data:
22 |     barcodeData = txt.data.decode("utf-8")
23 |     print(barcodeData)
24 | 
25 | # if decoded_data:
26 | #     scan_CR = decoded_data[0].data.decode()
27 |  


--------------------------------------------------------------------------------
/Data-Processing/for_pdf/QR_code/zbar_pdf.py:
--------------------------------------------------------------------------------
 1 | from pyzbar.pyzbar import decode
 2 | from PIL import Image
 3 | from pikepdf import Pdf, PdfImage
 4 | 
 5 | 
 6 | pdf = Pdf.open('2.pdf')
 7 | page = pdf.pages[0]
 8 | keyimage = list(page.images.keys())
 9 | rawimage = page.images[keyimage[0]]
10 | pdfimage = PdfImage(rawimage)
11 | img = pdfimage.as_pil_image()
12 | 
13 | # img = pdfimage.as_pil_image()
14 | # w, h = img.size
15 | # img = img.crop((0, h/2, w, h))
16 | # img.show()
17 | 
18 | # img = cv2.imread('ttest.jpg')
19 | decoded_data = decode(img)
20 | 
21 | print(decoded_data)
22 | 
23 | # if decoded_data:
24 | #     scan_CR = decoded_data[0].data.decode()
25 | 


--------------------------------------------------------------------------------
/Data-Processing/for_pdf/README.md:
--------------------------------------------------------------------------------
 1 | # PDF 相关处理脚本
 2 | 
 3 | > 这里是一些基础用法，以及其他 [PDF OCR 识别](https://github.com/yzy1996/Python-Code/tree/master/Data-Processing/for_pdf/OCR)；[QR_code](https://github.com/yzy1996/Python-Code/tree/master/Data-Processing/for_pdf/QR_code)
 4 | 
 5 | 
 6 | 
 7 | **先说有哪些热门的库**
 8 | 
 9 | |                           名称                           |                            Stars                             |               最后更新时间（不带年份则表今年）               |                             特点                             |
10 | | :------------------------------------------------------: | :----------------------------------------------------------: | :----------------------------------------------------------: | :----------------------------------------------------------: |
11 | |        [PyPDF2](https://github.com/py-pdf/PyPDF2)        | ![](https://img.shields.io/github/stars/py-pdf/PyPDF2?style=social) | ![](https://img.shields.io/github/last-commit/py-pdf/PyPDF2) |             基于pyPDF，纯python，支持超过10年了              |
12 | | [pdfminer.six](https://github.com/pdfminer/pdfminer.six) | ![](https://img.shields.io/github/stars/pdfminer/pdfminer.six?style=social) | ![](https://img.shields.io/github/last-commit/pdfminer/pdfminer.six) |             基于PDFMiner，extracting information             |
13 | |    [pdfplumber](https://github.com/jsvine/pdfplumber)    | ![](https://img.shields.io/github/stars/jsvine/pdfplumber?style=social) | ![](https://img.shields.io/github/last-commit/jsvine/pdfplumber) | Built on [pdfminer.six](https://github.com/pdfminer/pdfminer.six)，for detailed information about text character, rectangle, and line |
14 | |      [PyMuPDF](https://github.com/pymupdf/PyMuPDF)       | ![](https://img.shields.io/github/stars/pymupdf/PyMuPDF?style=social) | ![](https://img.shields.io/github/last-commit/pymupdf/PyMuPDF) |       基于[MuPDF](https://mupdf.com/)，付费，C语言依赖       |
15 | |      [pikepdf](https://github.com/pikepdf/pikepdf)       | ![](https://img.shields.io/github/stars/pikepdf/pikepdf?style=social) | ![](https://img.shields.io/github/last-commit/pikepdf/pikepdf) |     基于[QPDF](https://github.com/qpdf/qpdf)，C语言依赖      |
16 | |        [pdfx](https://github.com/metachris/pdfx)         | ![](https://img.shields.io/github/stars/metachris/pdfx?style=social) | ![](https://img.shields.io/github/last-commit/metachris/pdfx) | Extract references (pdf, url, doi, arxiv) and metadata from a PDF |
17 | 
18 | **再说我比较推荐的库**，[PyPDF2](https://github.com/py-pdf/PyPDF2) and [pikepdf](https://github.com/pikepdf/pikepdf)
19 | 
20 | **安装方式**：
21 | 
22 | ```shell
23 | pip install PyPDF2
24 | pip install pikepdf
25 | ```
26 | 
27 | **官网教程已经非常详细了**，在这里我只展示几个我常使用的脚本
28 | 
29 | - [pdf2img](#pdf2img)
30 | - [extract_text](#extract_text)
31 | - [extract_annotation](#extract_annotation)
32 | 
33 | 
34 | 
35 | ## pdf2img
36 | 
37 | ```python
38 | from pikepdf import Pdf, PdfImage
39 | 
40 | with Pdf.open('1.pdf') as pdf:
41 |     page = pdf.pages[0]
42 |     keyimage = list(page.images.keys())
43 |     rawimage = page.images[keyimage[0]]
44 |     pdfimage = PdfImage(rawimage)
45 | 
46 |     # 保存为图片文件
47 |     pdfimage.extract_to(fileprefix='test')
48 | 
49 |     # 保存为PIL.image
50 |     img = pdfimage.as_pil_image()
51 |     img.show()
52 | ```
53 | 
54 | 
55 | 
56 | ## extract_text
57 | 
58 | ```python
59 | from PyPDF2 import PdfReader
60 | 
61 | reader = PdfReader("1.pdf")
62 | page = reader.pages[0]
63 | text = page.extract_text()
64 | 
65 | print(text)
66 | ```
67 | 
68 | 
69 | 
70 | ## extract_annotation
71 | 
72 | ```python
73 | from PyPDF2 import PdfReader
74 | 
75 | reader = PdfReader("commented.pdf")
76 | 
77 | for page in reader.pages:
78 |     if "/Annots" in page:
79 |         for annot in page["/Annots"]:
80 |             obj = annot.get_object()
81 |             annotation = {"subtype": obj["/Subtype"], "location": obj["/Rect"]}
82 |             print(annotation)
83 | ```
84 | 
85 | 
86 | 
87 | 
88 | 
89 | 
90 | 
91 | 
92 | 
93 | 
94 | 
95 | 


--------------------------------------------------------------------------------
/Data-Processing/for_pdf/Tools/README.md:
--------------------------------------------------------------------------------
 1 | # 识别边框
 2 | 
 3 | # 识别横竖线
 4 | 
 5 | # 识别特定区域
 6 | 
 7 | 
 8 | 
 9 | 
10 | 
11 | https://github.com/wxwwt/opencv-picture-to-excel
12 | 
13 | https://blog.csdn.net/muxiong0308/article/details/80969355
14 | 
15 | https://github.com/muxiong0308/form_pic_ocr
16 | 
17 | https://juejin.cn/post/6844904078032666631
18 | 


--------------------------------------------------------------------------------
/Data-Processing/for_pdf/pdf2excel.py:
--------------------------------------------------------------------------------
 1 | import pdfplumber
 2 | import xlrd
 3 | import xlwt
 4 | from xlutils.copy import copy
 5 | import sys
 6 | 
 7 | def write_excel_xls(path, sheet_name, value):
 8 |     index = len(value)  # 获取需要写入数据的行数
 9 |     workbook = xlwt.Workbook()  # 新建一个工作簿
10 |     sheet = workbook.add_sheet(sheet_name)  # 在工作簿中新建一个表格
11 |     for i in range(0, index):
12 |         for j in range(0, len(value[i])):
13 |             sheet.write(i, j, value[i][j])  # 像表格中写入数据（对应的行和列）
14 |     workbook.save(path)  # 保存工作簿
15 |     print("xls格式表格写入数据成功！")
16 | 
17 | 
18 | def write_excel_xls_append(path, value):
19 |     index = len(value)  # 获取需要写入数据的行数
20 |     workbook = xlrd.open_workbook(path)  # 打开工作簿
21 |     sheets = workbook.sheet_names()  # 获取工作簿中的所有表格
22 |     worksheet = workbook.sheet_by_name(sheets[0])  # 获取工作簿中所有表格中的的第一个表格
23 |     rows_old = worksheet.nrows  # 获取表格中已存在的数据的行数
24 |     new_workbook = copy(workbook)  # 将xlrd对象拷贝转化为xlwt对象
25 |     new_worksheet = new_workbook.get_sheet(0)  # 获取转化后工作簿中的第一个表格
26 |     for i in range(0, index):
27 |         for j in range(0, len(value[i])):
28 |             new_worksheet.write(i+rows_old, j, value[i][j])  # 追加写入数据，注意是从i+rows_old行开始写入
29 |     new_workbook.save(path)  # 保存工作簿
30 |     print("xls格式表格【追加】写入数据成功！")
31 | 
32 | 
33 | name = sys.argv[1]
34 | #name='2020-04-23-603214.SH-603214爱婴室2020年第一季度报告'
35 | 
36 | path = name+'.pdf'
37 | pdf = pdfplumber.open(path)
38 | 
39 | book_name_xls = name+'.xls' 
40 | sheet_name_xls = '表1'
41 | 
42 | 
43 | write_excel_xls(book_name_xls, sheet_name_xls, [])
44 | for i in range(len(pdf.pages)): 
45 |     if i>=0:
46 |         for table in pdf.pages[i].extract_tables():
47 |                 # print(table)
48 |                 for row in table:
49 |                     print(row)
50 |                     write_excel_xls_append(book_name_xls, [row])
51 |                     
52 | 
53 | pdf.close()
54 | 


--------------------------------------------------------------------------------
/Data-Processing/for_pdf/pdf2img.py:
--------------------------------------------------------------------------------
 1 | import io
 2 | from PyPDF2 import PdfReader
 3 | from PIL import Image
 4 | 
 5 | # Open the PDF file and read all pages
 6 | pdf_file = open('1.pdf', 'rb')
 7 | pdf_reader = PdfReader(pdf_file)
 8 | pages = []
 9 | for i in range(pdf_reader.getNumPages()):
10 |     page = pdf_reader.getPage(i)
11 |     pages.append(page)
12 | 
13 | # Merge all pages into a single image
14 | width = max(page.mediaBox.getWidth() for page in pages)
15 | height = sum(page.mediaBox.getHeight() for page in pages)
16 | image = Image.new('RGB', (width, height))
17 | y = 0
18 | for page in pages:
19 |     x = (width - page.mediaBox.getWidth()) / 2
20 |     img_bytes = bytes(page.getContents())
21 |     try:
22 |         img = Image.open(io.BytesIO(img_bytes)).convert('RGB')
23 |     except TypeError:
24 |         img = Image.open(io.BytesIO(img_bytes.decode())).convert('RGB')
25 |     image.paste(img, (int(x), int(y)))
26 |     y += page.mediaBox.getHeight()
27 | 
28 | # Convert the image to PNG format and save it
29 | image.save('output.png', 'PNG')
30 | 


--------------------------------------------------------------------------------
/Data-Processing/for_pdf/pdf_utils.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "from pathlib import Path\n",
 10 |     "from PyPDF2 import PdfReader\n",
 11 |     "from pikepdf import Pdf, PdfImage\n",
 12 |     "import pytesseract\n",
 13 |     "import re "
 14 |    ]
 15 |   },
 16 |   {
 17 |    "cell_type": "markdown",
 18 |    "metadata": {},
 19 |    "source": [
 20 |     "# 提取pdf内容"
 21 |    ]
 22 |   },
 23 |   {
 24 |    "cell_type": "code",
 25 |    "execution_count": null,
 26 |    "metadata": {},
 27 |    "outputs": [],
 28 |    "source": [
 29 |     "from PyPDF2 import PdfReader\n",
 30 |     "\n",
 31 |     "reader = PdfReader(\"test.pdf\")\n",
 32 |     "\n",
 33 |     "for page in reader.pages:\n",
 34 |     "    print(page['/Annots'])\n",
 35 |     "    break\n",
 36 |     "    # if \"/Annots\" in page:\n",
 37 |     "    #     for annot in page[\"/Annots\"]:\n",
 38 |     "    #         obj = annot.get_object()\n",
 39 |     "    #         # annotation = {\"subtype\": obj[\"/Subtype\"], \"location\": obj[\"/Rect\"]}\n",
 40 |     "    #         # if subtype['/A']['/S'] == '/GoTo':\n",
 41 |     "    #         #     print(subtype['/A']['/D'])\n",
 42 |     "\n",
 43 |     "    #         print(obj)"
 44 |    ]
 45 |   },
 46 |   {
 47 |    "cell_type": "code",
 48 |    "execution_count": null,
 49 |    "metadata": {},
 50 |    "outputs": [],
 51 |    "source": [
 52 |     "with Pdf.open('CR-PI-2208-3391 ok.pdf') as pdf:\n",
 53 |     "\n",
 54 |     "    page = pdf.pages[-1]\n",
 55 |     "    keyimage = list(page.images.keys())\n",
 56 |     "    rawimage = page.images[keyimage[0]]\n",
 57 |     "    pdfimage = PdfImage(rawimage)\n",
 58 |     "    img = pdfimage.as_pil_image()\n",
 59 |     "\n",
 60 |     "    if pdfimage.width / pdfimage.height < 0.6:\n",
 61 |     "        rotation_degrees = pytesseract.image_to_osd(img).split('\\n')[1][-2:]\n",
 62 |     "        if rotation_degrees != '0':\n",
 63 |     "            img = img.rotate(int(rotation_degrees),expand=True)\n",
 64 |     "\n",
 65 |     "    print(pdfimage.width / pdfimage.height)\n",
 66 |     "\n",
 67 |     "    img = img.crop((0, 0, img.size[0] / 1.8, img.size[1] / 2))\n",
 68 |     "\n",
 69 |     "    text = pytesseract.image_to_string(img, lang='chi_sim')\n",
 70 |     "\n",
 71 |     "    print(text)"
 72 |    ]
 73 |   },
 74 |   {
 75 |    "cell_type": "markdown",
 76 |    "metadata": {},
 77 |    "source": [
 78 |     "# PDF to Image"
 79 |    ]
 80 |   },
 81 |   {
 82 |    "cell_type": "code",
 83 |    "execution_count": null,
 84 |    "metadata": {},
 85 |    "outputs": [],
 86 |    "source": [
 87 |     "from pikepdf import Pdf, PdfImage\n",
 88 |     "\n",
 89 |     "with Pdf.open('1.pdf') as pdf:\n",
 90 |     "    page = pdf.pages[0]\n",
 91 |     "    keyimage = list(page.images.keys())\n",
 92 |     "    rawimage = page.images[keyimage[0]]\n",
 93 |     "    pdfimage = PdfImage(rawimage)\n",
 94 |     "\n",
 95 |     "    # 保存为图片文件\n",
 96 |     "    pdfimage.extract_to(fileprefix='test')\n",
 97 |     "\n",
 98 |     "    # 保存为PIL.image\n",
 99 |     "    img = pdfimage.as_pil_image()\n",
100 |     "    img.show()"
101 |    ]
102 |   }
103 |  ],
104 |  "metadata": {
105 |   "kernelspec": {
106 |    "display_name": "Python 3.9.12 ('base')",
107 |    "language": "python",
108 |    "name": "python3"
109 |   },
110 |   "language_info": {
111 |    "codemirror_mode": {
112 |     "name": "ipython",
113 |     "version": 3
114 |    },
115 |    "file_extension": ".py",
116 |    "mimetype": "text/x-python",
117 |    "name": "python",
118 |    "nbconvert_exporter": "python",
119 |    "pygments_lexer": "ipython3",
120 |    "version": "3.9.12"
121 |   },
122 |   "orig_nbformat": 4,
123 |   "vscode": {
124 |    "interpreter": {
125 |     "hash": "40d3a090f54c6569ab1632332b64b2c03c39dcf918b08424e98f38b5ae0af88f"
126 |    }
127 |   }
128 |  },
129 |  "nbformat": 4,
130 |  "nbformat_minor": 2
131 | }
132 | 


--------------------------------------------------------------------------------
/Data-Processing/for_txt/11.txt:
--------------------------------------------------------------------------------
1 | 1
2 | 2
3 | 3
4 | 


--------------------------------------------------------------------------------
/Data-Processing/for_txt/README.md:
--------------------------------------------------------------------------------
  1 | <h1 align="center">For txt</h1>
  2 | <div align="center">
  3 | 
  4 | 
  5 | 
  6 | ![python-version](https://img.shields.io/badge/python-3.7-blue) ![country](https://img.shields.io/badge/country-China-red)
  7 | 
  8 | </div>
  9 | 
 10 | ## Usage
 11 | 
 12 | ### Without Package
 13 | 
 14 | ```python
 15 | with open('example.txt') as f:
 16 |     lines = (line.strip() for line in f)
 17 |     for line in lines:
 18 |         print(line)
 19 | 
 20 | >>> 37 52 2
 21 |     49 49 4
 22 |     52 64 4
 23 |     20 26 1
 24 |     40 30 3
 25 | ```
 26 | 
 27 | 
 28 | 
 29 | ### With Package
 30 | 
 31 | `numpy.genfromtxt(fname, dtype=<class 'float'>, delimiter=None, encoding='bytes')`
 32 | 
 33 | `returns out: ndarray`
 34 | 
 35 | 
 36 | 
 37 | ### Something Useful
 38 | 
 39 | 1. `str.split(str="", num=string.count(str)) ` 
 40 | 
 41 | * str -- 分隔符，默认为所有的空字符，包括空格、换行(\n)、制表符(\t)等
 42 | * num -- 分割次数。默认为 -1, 即分隔所有
 43 | 
 44 | `returns out: str`
 45 | 
 46 | 
 47 | 
 48 | 2. `map(function, iterable)` 会根据提供的函数对指定序列做映射
 49 | 
 50 | 可以用 `int` ，来将字符串变整数
 51 | 
 52 | ## Example
 53 | 
 54 | You can see this example [txtfile](./example.txt)
 55 | 
 56 | `37 52 2`
 57 | `49 49 4`
 58 | `52 64 4`
 59 | `20 26 1`
 60 | `40 30 3`
 61 | 
 62 | ```
 63 | import numpy as np
 64 | 
 65 | data = np.genfromtxt('example.txt')
 66 | >>> [[37. 52.  2.]
 67 |     [49. 49.  4.]
 68 |     [52. 64.  4.]
 69 |     [20. 26.  1.]
 70 |     [40. 30.  3.]] <class 'numpy.ndarray'>
 71 |     
 72 | print(data[0][0])
 73 | >>> 37.0 <class 'numpy.float64'>
 74 | ```
 75 | 
 76 | Another example [txtfile](./example1.txt), but I really recommend you to use `csv` to store data with `,`
 77 | 
 78 | `37,52,2`
 79 | `49,49,4`
 80 | `52,64,4`
 81 | `20,26,1`
 82 | `40,30,3`
 83 | 
 84 | ```python
 85 | import numpy as np
 86 | 
 87 | data = np.genfromtxt('example.txt')
 88 | >>> [nan nan nan nan nan]
 89 | 
 90 | data = np.genfromtxt('example.txt', dtype='unicode')
 91 | >>> ['37,52,2' '49,49,4' '52,64,4' '20,26,1' '40,30,3']
 92 | 
 93 | 
 94 | data = np.genfromtxt('example1.txt', dtype='unicode')
 95 | data1 = ()
 96 | 
 97 | for i in range(len(data)):
 98 |     data1 += tuple(map(int, data[i].split(',')))
 99 | 
100 | data = np.reshape(data1, (5,3))
101 | >>> [[37 52  2]
102 |     [49 49  4]
103 |     [52 64  4]
104 |     [20 26  1]
105 |     [40 30  3]] <class 'numpy.ndarray'>
106 | ```
107 | 
108 | 


--------------------------------------------------------------------------------
/Data-Processing/for_txt/add_content.py:
--------------------------------------------------------------------------------
1 | # 给txt每一行增加内容
2 | 
3 | with open('demo.txt') as f:
4 |     lines = (line.strip() for line in f)
5 |     for line in lines:
6 |         print(line)


--------------------------------------------------------------------------------
/Data-Processing/for_txt/example.txt:
--------------------------------------------------------------------------------
1 | 37 52 2
2 | 49 49 4
3 | 52 64 4
4 | 20 26 1
5 | 40 30 3


--------------------------------------------------------------------------------
/Data-Processing/for_txt/example1.txt:
--------------------------------------------------------------------------------
1 | 37,52,2
2 | 49,49,4
3 | 52,64,4
4 | 20,26,1
5 | 40,30,3


--------------------------------------------------------------------------------
/Data-Processing/for_txt/how_many_lines.py:
--------------------------------------------------------------------------------
 1 | # 输出txt有多少行
 2 | # 使用with open的好处是：
 3 | with open(r'demo.txt', 'rt') as f:
 4 |     count=len(f.readlines())
 5 |     print(count)
 6 | 
 7 | 
 8 | # f = open(r'somefile.txt', 'rt')
 9 | # data = f.read()
10 | # f.close()


--------------------------------------------------------------------------------
/Data-Processing/for_txt/read_data.py:
--------------------------------------------------------------------------------
 1 | # 读取txt数据并保存到数组
 2 | # txt数据类型为：每行带括号，用逗号隔开
 3 | # (id,length,speed,channel,from,to,isDuplex)
 4 | # (5000, 10, 5, 1, 1, 2, 1)
 5 | # (5001, 10, 5, 1, 2, 3, 1)
 6 | # (5002, 10, 5, 1, 3, 4, 1)
 7 | # (5003, 10, 5, 1, 4, 5, 1)
 8 | # (5004, 10, 5, 1, 5, 6, 1)
 9 | 
10 | with open(r'demo.txt') as f:
11 |     next(f)  # 从txt的第二行开始了
12 |     lines = f.readlines()
13 | 
14 |     data = []                          
15 |     for line in lines:              #把lines中的数据逐行读取出来
16 |         temp1=line.strip('\n()').split(',')  # 去掉字符串首尾的分隔符
17 |         data.append(temp)
18 | 	data = [list(map(int, x)) for x in data]  # 将字符串转换为整数
19 | 


--------------------------------------------------------------------------------
/Data-Processing/for_txt/read_numpy.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | 
3 | data = np.genfromtxt('example.txt')
4 | 


--------------------------------------------------------------------------------
/Data-Processing/for_txt/read_txt.py:
--------------------------------------------------------------------------------
1 | with open('example.txt') as f:
2 |     lines = (line.strip() for line in f)  # 得到一个迭代器
3 |     for line in lines:
4 |         print(line)


--------------------------------------------------------------------------------
/Data-Processing/for_txt/test.py:
--------------------------------------------------------------------------------
1 | number = 7
2 | print(f"problem: {number}")


--------------------------------------------------------------------------------
/Data-Processing/for_txt/write_txt.py:
--------------------------------------------------------------------------------
1 | # 将列表写入txt
2 | import numpy as np
3 | a = [1, 2, 3]
4 | 
5 | np.savetxt('11.txt', a, fmt='%i', delimiter=',')
6 |     


--------------------------------------------------------------------------------
/Data-Processing/print/print_in_1line.py:
--------------------------------------------------------------------------------
 1 | # 同一行输出
 2 | 
 3 | import time
 4 | for i in range(20):
 5 |     time.sleep(0.4)
 6 |     print('\r',str(30-i).ljust(10),end='')
 7 | 
 8 | 
 9 | # import sys,time
10 | # for i in range(20):
11 | #     print('#',end='',flush=True)
12 | #     time.sleep(0.4)
13 | 
14 | # 而 ‘\r‘ 则是回到当前的开头
15 | # 默认是Flase，只有缓冲区满或者全部内容都获取到了，才会一次全部执行打印
16 | # 改成True，就是强制刷新，立刻打印出来
17 | 
18 | # end='\n' 这个是默认的end参数，所以平时是打印一条之后会换行。
19 | # 例子都将参数改为了空，所以不会换行了


--------------------------------------------------------------------------------
/Data-Processing/remove_string_spaces.py:
--------------------------------------------------------------------------------
1 | s = '   I       love u    forerver  !'
2 | result = ''.join(s.split())
3 | print(result)


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2019 crazyang
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/Messy/A.py:
--------------------------------------------------------------------------------
 1 | import math
 2 | 
 3 | def heuristic_distace(Neighbour_node,target_node):
 4 |     H = abs(Neighbour_node[0] - target_node[0]) + abs(Neighbour_node[1] - target_node[1])
 5 |     return H
 6 | 
 7 | def go_around(direction):
 8 |     box_length = 1
 9 |     diagonal_line = box_length * 1.4
10 |     if (direction==0 or direction==2 or direction==6 or direction==8):
11 |         return diagonal_line
12 |     elif (direction==1 or direction==3 or direction==4 or direction==5 or direction==7):
13 |         return diagonal_line
14 | 
15 | def find_coordinate(map,symble):
16 |     #store coordinate
17 |     result=[]
18 |     for index1,value1 in enumerate(map):
19 |         if symble in value1:
20 |             row = index1
21 |             for index2, value2 in enumerate(map[index1]):
22 |                 if symble==value2:
23 |                    column = index2
24 |                    result.append([row, column])
25 |     return result
26 | 
27 | map =[[".", ".", ".", "#", ".", "#", ".", ".", ".", "."],
28 |       [".", ".", "#", ".", ".", "#", ".", "#", ".", "#"],
29 |       ["s", ".", "#", ".", "#", ".", "#", ".", ".", "."],
30 |       [".", "#", "#", ".", ".", ".", ".", ".", "#", "."],
31 |       [".", ".", ".", ".", "#", "#", ".", ".", "#", "."],
32 |       [".", "#", ".", ".", ".", ".", "#", ".", ".", "."],
33 |       [".", "#", ".", ".", ".", "#", "#", ".", "#", "."],
34 |       [".", ".", ".", ".", ".", ".", ".", ".", "#", "."],
35 |       [".", "#", "#", ".", ".", ".", "#", ".", ".", "."],
36 |       [".", ".", ".", "#", "#", "#", ".", ".", "#", "f"],
37 |       ["#", "#", ".", ".", "#", "#", "#", ".", "#", "."],
38 |       [".", "#", "#", ".", ".", ".", "#", ".", ".", "."],
39 |       [".", ".", ".", ".", "#", "#", ".", ".", "#", "."]]
40 | 
41 | #these datas are store in the form of list in a singal list
42 | 
43 | obstacle = find_coordinate(map,"#")
44 | start_node = find_coordinate(map,"s")[0]
45 | target_node = find_coordinate(map,"f")[0]
46 | current_node = start_node
47 | path_vertices = [start_node]
48 | #visited_vertices should be stored in the form of a singal list
49 | Neighbour_vertices = []
50 | 
51 | while current_node != target_node:
52 | 
53 |     x_coordinate = current_node[0]
54 |     y_coordinate = current_node[1]
55 |     F = []
56 |     Neighbour_vertices =   [[x_coordinate - 1, y_coordinate - 1],
57 |                             [x_coordinate - 1, y_coordinate    ],
58 |                             [x_coordinate - 1, y_coordinate + 1],
59 |                             [x_coordinate,     y_coordinate - 1],
60 |                             [x_coordinate    , y_coordinate    ],
61 |                             [x_coordinate,     y_coordinate + 1],
62 |                             [x_coordinate + 1, y_coordinate - 1],
63 |                             [x_coordinate + 1, y_coordinate    ],
64 |                             [x_coordinate + 1, y_coordinate + 1]]
65 | 
66 |     for index, value in enumerate(Neighbour_vertices):
67 |         if value[0] in range(len(map)):
68 |             if value[1] in range(len(map)):
69 |                if value not in obstacle+path_vertices:
70 |                     F.append(heuristic_distace(value, target_node) + go_around(index))
71 |                else:
72 |                     F.append(10000)
73 |             else:
74 |                     F.append(10000)
75 |         else:
76 |                     F.append(10000)
77 |                #a very large number
78 |     print(F)
79 |     current_node=Neighbour_vertices[F.index(min(total_distance for total_distance in F))]
80 |     print(current_node)
81 | 
82 |     path_vertices.append(current_node)
83 |       # if current_node not in visited_vertices:
84 |       #     visited_vertices.append(current_node)
85 |       # else:
86 |       #     print("there is no route between")
87 |       #     break
88 | 
89 | print(path_vertices)
90 | 


--------------------------------------------------------------------------------
/Messy/DataTransform.py:
--------------------------------------------------------------------------------
 1 | from xml.etree.ElementTree import parse
 2 | import os
 3 | 
 4 | db_dir = './data/VMS/'    #更改为自己的目录地址
 5 | anno_path = db_dir
 6 | fileList = os.listdir(anno_path)
 7 | if not os.path.exists(anno_path):
 8 |     os.makedirs(anno_path)
 9 | for file_name in fileList:
10 |     if file_name[len(file_name)-3:] != 'xml':
11 |         continue
12 |     filePath = db_dir + file_name
13 |     print(filePath)
14 |     tree = parse(filePath)
15 |     root = tree.getroot()
16 |     parsed = []
17 |     for annot in root.iter('annotation'):
18 |         for obj in annot.findall('object'):
19 |             label = obj.findtext('name')
20 |             for coord in obj.findall('bndbox'):
21 |                 x_max = float(coord.findtext('xmax'))
22 |                 x_min = float(coord.findtext('xmin'))
23 |                 y_max = float(coord.findtext('ymax'))
24 |                 y_min = float(coord.findtext('ymin'))
25 |             parsed = parsed + [str(label) + ',' + str(x_min) + ',' + str(y_min) + ','+ str(x_max) + ',' + str(y_max)]
26 |     fp = open(anno_path + '/' + file_name[:-3] + 'txt','w')
27 |     for elem in parsed:
28 |         print>>fp, elem
29 |     fp.close()
30 | 


--------------------------------------------------------------------------------
/Messy/Graph.py:
--------------------------------------------------------------------------------
 1 | import matplotlib.pyplot as plt
 2 | import networkx as nx
 3 | 
 4 | G = nx.Graph()
 5 | # explicitly set positions
 6 | pos = {0: (0, 0),
 7 |        1: (1, 0),
 8 |        2: (0, 1),
 9 |        3: (1, 1),
10 |        4: (0.5, 2.0)}
11 | 
12 | # nx.draw_networkx_nodes(G, pos, node_color='r', node_size=20)
13 | # nx.draw_networkx_edges(G, pos, edge_color='b', width=2)
14 | # nx.draw_networkx_labels(G, pos, font_size=10)
15 | 
16 | 
17 | 
18 | G.add_edge('x','y')#添加边,起点为x，终点为y
19 | # G.add_edges_from([(0,1),(1,3),(2,4)]) 
20 | nx.draw(G, 
21 |         with_labels=True, #这个选项让节点有名称
22 |         edge_color='b', # b stands for blue! 
23 |         # pos=pos, # 这个是选项选择点的排列方式，具体可以用 help(nx.drawing.layout) 查看
24 |      # 主要有spring_layout  (default), random_layout, circle_layout, shell_layout   
25 |      # 这里是环形排布，还有随机排列等其他方式  
26 |         node_color='r', # r = red
27 |         node_size=1000, # 节点大小
28 |         width=3, # 边的宽度
29 |        )
30 | plt.axis('off')
31 | plt.show()
32 | 
33 | 
34 | # import matplotlib.pyplot as plt
35 | # import networkx as nx
36 | 
37 | # G = nx.Graph()
38 | # 把地图绘制出来
39 | # def draw_map():
40 | #     for i in range(len(road_label[0])):
41 | #         G.add_edge(road_label[4][i],road_label[5][i])
42 | 
43 | #     nx.draw(G, 
44 | #             with_labels=True, #这个选项让节点有名称
45 | #             edge_color='b', # b stands for blue! 
46 | #             # pos=pos,  
47 | #             node_color='r', # r = red
48 | #             node_size=1000, # 节点大小
49 | #             width=3, # 边的宽度
50 | #         )
51 | #     print(nx.shortest_path(G, 35, 35))
52 |     # nx.draw_networkx_edge_labels #  可以给路加上路名
53 |     # plt.axis('off')
54 |     # plt.show()
55 | 


--------------------------------------------------------------------------------
/Messy/GraphAlgorithms.py:
--------------------------------------------------------------------------------
 1 | import classini
 2 | import read
 3 | # 路径规划算法集合
 4 | 
 5 | # 生成邻接矩阵
 6 | # input数据类型：列表list
 7 | # output数据类型：列表List
 8 | def adjacency_matrix(list):
 9 |     n = len(list)
10 |     Na = 1000
11 |     output_matrix = [ [ Na for i in range(n) ] for j in range(n) ]
12 | 
13 |     for i in range(n):
14 |         output_matrix[i][i] = 0
15 |         output_matrix[list[i].begin_id][list[i].end_id] = list[i].length
16 |         output_matrix[list[i].begin_id][list[i].end_id] = list[i].length
17 | 
18 |     return output_matrix
19 | 
20 | 
21 | 
22 | if __name__ == '__main__':
23 |     car_path = r'car.txt'
24 |     carfile=read.read_input_file(car_path)
25 | 
26 |     car_list=[]
27 |     for i in range(len(carfile[0])):
28 |         car_list.append(classini.Car(carfile[0][i],carfile[1][i],carfile[2][i],carfile[3][i],carfile[4][i]))
29 | 
30 |     road_path = r'road.txt'
31 |     roadfile=read.read_input_file(road_path)
32 | 
33 |     road_list=[]
34 |     for i in range(len(roadfile[0])):
35 |         road_list.append(classini.road(roadfile[0][i],roadfile[1][i],roadfile[2][i],roadfile[3][i],roadfile[4][i],roadfile[5][i],roadfile[6][i]))
36 | 
37 |     cross_path = r'cross.txt'
38 |     crossfile=read.read_input_file(cross_path)
39 | 
40 |     cross_list=[]
41 |     for i in range(len(crossfile[0])):
42 |         cross_list.append(classini.Cross(crossfile[0][i],crossfile[1][i],crossfile[2][i],crossfile[3][i],crossfile[4][i]))
43 | 
44 |     print(adjacency_matrix(road_list))


--------------------------------------------------------------------------------
/Messy/ListSearch.py:
--------------------------------------------------------------------------------
 1 | # enumerate() 函数用于将一个可遍历的数据对象(如列表、元组或字符串)组合为一个索引序列，同时列出数据和数据下标
 2 | 
 3 | map =[[".", ".", ".", "#", ".", "#", ".", ".", ".", "."],
 4 |       [".", ".", "#", ".", ".", "#", ".", "#", ".", "#"],
 5 |       ["s", ".", "#", ".", "#", ".", "#", ".", ".", "."],
 6 |       [".", "#", "#", ".", ".", ".", ".", ".", "#", "."],
 7 |       [".", ".", ".", ".", "#", "#", ".", ".", "#", "."],
 8 |       [".", "#", ".", ".", ".", ".", "#", ".", ".", "."],
 9 |       [".", "#", ".", ".", ".", "#", "#", ".", "#", "."],
10 |       [".", ".", ".", ".", ".", ".", ".", ".", "#", "."],
11 |       [".", "#", "#", ".", ".", ".", "#", ".", ".", "."],
12 |       [".", ".", ".", "#", "#", "#", ".", ".", "#", "f"],
13 |       ["#", "#", ".", ".", "#", "#", "#", ".", "#", "."],
14 |       [".", "#", "#", ".", ".", ".", "#", ".", ".", "."],
15 |       [".", ".", ".", ".", "#", "#", ".", ".", "#", "."]]
16 | 
17 | def find_coordinate(map,symble):
18 |     result=[]
19 |     for index1,value1 in enumerate(map):
20 |         if symble in value1:
21 |             row = index1
22 |             for index2, value2 in enumerate(map[index1]):
23 |                 if symble == value2:
24 |                    column = index2
25 |                    result.append([row, column])
26 |     return result
27 | 
28 | obstacle = find_coordinate(map,"#")
29 | 
30 | print(obstacle)


--------------------------------------------------------------------------------
/Messy/labelImg.py:
--------------------------------------------------------------------------------
 1 | ﻿# -*- coding: utf-8 -*-
 2 | import os
 3 | import sys
 4 | #import natsort
 5 | import xml.etree.ElementTree as ET
 6 | file_dir='D:\BD-label'
 7 | 
 8 | def file_name(file_dir):
 9 |     L=[]
10 |     for root, dirs, files in os.walk(file_dir):
11 |         for file in files:
12 |             if os.path.splitext(file)[1] == '.xml':
13 |                 L.append(os.path.join(root, file))
14 |     return L
15 | 
16 | if  __name__=='__main__':
17 |     Num=file_name(file_dir)
18 |     Num.sort()
19 |     for i in range(241,len(Num)-1):   #每个单独的文件
20 |         print Num[i]
21 |         name=Num[i].split('\\')[2]
22 |         print name
23 |         tree = ET.parse(Num[i])
24 |         root = tree.getroot()
25 |         object = root.findall("object")
26 |         count=0
27 |         for tmp in object:     #every object
28 |             count=count+1
29 |             a=tmp
30 |             d=ET.SubElement(a, 'index')
31 |             d.text=str(count)
32 |             d.tail="\n\t\t"
33 |             #print(a.find('name').text)
34 |             print("print No.%d     %s 's father index.End with 0\n"%(count,a.find('name').text))
35 | 
36 |             b = ET.SubElement(a, 'father')
37 |             b.text="\n\t\t\t"
38 |             b.tail="\n\t\t"
39 |             count_father=0
40 |             while(True):
41 |                 count_father = count_father+1
42 |                 insert_num=input()
43 |                 if(insert_num==0):
44 |                     break
45 |                 c=ET.SubElement(b,'num'+str(count_father))
46 |                 c.text=str(insert_num)
47 |                 c.tail = "\n\t\t\t"
48 |             print("print No.%d     %s's child num. End with 0\n" % (count,a.find('name').text))
49 |             a = tmp
50 |             b = ET.SubElement(a, 'child')
51 |             b.text = "\n\t\t\t"
52 |             b.tail = "\n\t"
53 |             count_child=0
54 |             while (True):
55 |                 count_child=count_child+1
56 |                 insert_num = input()
57 |                 if (insert_num == 0):
58 |                     break
59 |                 c = ET.SubElement(b, 'num'+str(count_child))
60 |                 c.text = str(insert_num)
61 |                 c.tail = "\n\t\t\t"
62 |             tree.write(name)
63 | 


--------------------------------------------------------------------------------
/Messy/list_remove.py:
--------------------------------------------------------------------------------
1 | list = ['Google', 'Runoob', 'Taobao', 'Baidu']
2 | for i in range(len(list)):
3 |     list.remove(list[0])
4 | print(list)


--------------------------------------------------------------------------------
/Python+Algorithm/Evolutionary-Algorithm/Match Phrase.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Visualize Genetic Algorithm to match the target phrase.
 3 | 
 4 | Visit my tutorial website for more: https://morvanzhou.github.io/tutorials/
 5 | """
 6 | import numpy as np
 7 | 
 8 | TARGET_PHRASE = 'You get it!'       # target DNA
 9 | POP_SIZE = 300                      # population size
10 | CROSS_RATE = 0.4                    # mating probability (DNA crossover)
11 | MUTATION_RATE = 0.01                # mutation probability
12 | N_GENERATIONS = 1000
13 | 
14 | DNA_SIZE = len(TARGET_PHRASE)
15 | TARGET_ASCII = np.fromstring(TARGET_PHRASE, dtype=np.uint8)  # 将字符串转变为数字
16 | ASCII_BOUND = [32, 126]
17 | 
18 | 
19 | class GA(object):
20 |     def __init__(self, DNA_size, DNA_bound, cross_rate, mutation_rate, pop_size):
21 |         self.DNA_size = DNA_size
22 |         DNA_bound[1] += 1
23 |         self.DNA_bound = DNA_bound
24 |         self.cross_rate = cross_rate
25 |         self.mutate_rate = mutation_rate
26 |         self.pop_size = pop_size
27 | 
28 |         self.pop = np.random.randint(*DNA_bound, size=(pop_size, DNA_size)).astype(np.int8)  # int8 for convert to ASCII
29 | 
30 |     def translateDNA(self, DNA):                 # convert to readable string
31 |         return DNA.tostring().decode('ascii')
32 | 
33 |     def get_fitness(self):                      # count how many character matches
34 |         match_count = (self.pop == TARGET_ASCII).sum(axis=1)
35 |         return match_count
36 | 
37 |     def select(self):
38 |         fitness = self.get_fitness() + 1e-4     # add a small amount to avoid all zero fitness
39 |         idx = np.random.choice(np.arange(self.pop_size), size=self.pop_size, replace=True, p=fitness/fitness.sum())
40 |         return self.pop[idx]
41 | 
42 |     def crossover(self, parent, pop):
43 |         if np.random.rand() < self.cross_rate:
44 |             i_ = np.random.randint(0, self.pop_size, size=1)                        # select another individual from pop
45 |             cross_points = np.random.randint(0, 2, self.DNA_size).astype(np.bool)   # choose crossover points
46 |             parent[cross_points] = pop[i_, cross_points]                            # mating and produce one child
47 |         return parent
48 | 
49 |     def mutate(self, child):
50 |         for point in range(self.DNA_size):
51 |             if np.random.rand() < self.mutate_rate:
52 |                 child[point] = np.random.randint(*self.DNA_bound)  # choose a random ASCII index
53 |         return child
54 | 
55 |     def evolve(self):
56 |         pop = self.select()
57 |         pop_copy = pop.copy()
58 |         for parent in pop:  # for every parent
59 |             child = self.crossover(parent, pop_copy)
60 |             child = self.mutate(child)
61 |             parent[:] = child
62 |         self.pop = pop
63 | 
64 | if __name__ == '__main__':
65 |     ga = GA(DNA_size=DNA_SIZE, DNA_bound=ASCII_BOUND, cross_rate=CROSS_RATE,
66 |             mutation_rate=MUTATION_RATE, pop_size=POP_SIZE)
67 | 
68 |     for generation in range(N_GENERATIONS):
69 |         fitness = ga.get_fitness()
70 |         best_DNA = ga.pop[np.argmax(fitness)]
71 |         best_phrase = ga.translateDNA(best_DNA)
72 |         print('Gen', generation, ': ', best_phrase)
73 |         if best_phrase == TARGET_PHRASE:
74 |             break
75 |         ga.evolve()
76 | 


--------------------------------------------------------------------------------
/Python+Algorithm/Evolutionary-Algorithm/genetic_algorithm.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Visualize Genetic Algorithm to find a maximum point in a function.
 3 | 
 4 | Visit my tutorial website for more: https://morvanzhou.github.io/tutorials/
 5 | """
 6 | import numpy as np
 7 | import matplotlib.pyplot as plt
 8 | 
 9 | DNA_SIZE = 10            # DNA length
10 | POP_SIZE = 100           # population size
11 | CROSS_RATE = 0.8         # mating probability (DNA crossover)
12 | MUTATION_RATE = 0.003    # mutation probability
13 | N_GENERATIONS = 200
14 | X_BOUND = [0, 5]         # x upper and lower bounds
15 | 
16 | # to find the maximum of this function
17 | def F(x): 
18 |     return np.sin(10*x)*x + np.cos(2*x)*x     
19 | 
20 | 
21 | # find non-zero fitness for selection
22 | def get_fitness(pred): return pred + 1e-3 - np.min(pred)
23 | 
24 | 
25 | # convert binary DNA to decimal and normalize it to a range(0, 5)
26 | def translateDNA(pop): 
27 |     return pop.dot(2 ** np.arange(DNA_SIZE)[::-1]) / float(2**DNA_SIZE-1) * X_BOUND[1]
28 | 
29 | 
30 | def select(pop, fitness):    # nature selection wrt pop's fitness
31 |     idx = np.random.choice(np.arange(POP_SIZE), size=POP_SIZE, replace=True,
32 |                            p=fitness/fitness.sum())
33 |     return pop[idx]
34 | 
35 | 
36 | def crossover(parent, pop):     # mating process (genes crossover)
37 |     if np.random.rand() < CROSS_RATE:
38 |         i_ = np.random.randint(0, POP_SIZE, size=1)                             # select another individual from pop
39 |         cross_points = np.random.randint(0, 2, size=DNA_SIZE).astype(np.bool)   # choose crossover points
40 |         parent[cross_points] = pop[i_, cross_points]                            # mating and produce one child
41 |     return parent
42 | 
43 | 
44 | def mutate(child):
45 |     for point in range(DNA_SIZE):
46 |         if np.random.rand() < MUTATION_RATE:
47 |             child[point] = 1 if child[point] == 0 else 0
48 |     return child
49 | 
50 | 
51 | pop = np.random.randint(2, size=(POP_SIZE, DNA_SIZE))   # initialize the pop DNA
52 | 
53 | plt.ion()       # something about plotting
54 | x = np.linspace(*X_BOUND, 200)
55 | plt.plot(x, F(x))
56 | 
57 | for _ in range(N_GENERATIONS):
58 |     F_values = F(translateDNA(pop))    # compute function value by extracting DNA
59 | 
60 |     # something about plotting
61 |     if 'sca' in globals(): sca.remove()
62 |     sca = plt.scatter(translateDNA(pop), F_values, s=200, lw=0, c='red', alpha=0.5); plt.pause(0.05)
63 | 
64 |     # GA part (evolution)
65 |     fitness = get_fitness(F_values)
66 |     print("Most fitted DNA: ", pop[np.argmax(fitness), :])
67 |     pop = select(pop, fitness)
68 |     pop_copy = pop.copy()
69 |     for parent in pop:
70 |         child = crossover(parent, pop_copy)
71 |         child = mutate(child)
72 |         parent[:] = child       # parent is replaced by its child
73 | 
74 | plt.ioff()
75 | plt.show()
76 | 
77 | 
78 | 


--------------------------------------------------------------------------------
/Python+Algorithm/Geometric/point_oblique_straight_point.py:
--------------------------------------------------------------------------------
 1 | # 解算出两线段的交点
 2 | # 采用 一点和斜率，然后两直线求解
 3 | 
 4 | 
 5 | class Point(object):
 6 |     def __init__(self, x=0, y=0):
 7 |         self.x = x
 8 |         self.y = y
 9 | 
10 | 
11 | # 用"一点和斜率"定义的直线
12 | class Line(object):
13 |     def __init__(self, p, k):
14 |         self.p = p
15 |         self.k = k
16 | 
17 | 
18 | # 解两直线的交点
19 | def get_cross_point(l1, l2):
20 |     cross_point = Point()
21 |     cross_point.x = (l2.p.y + l1.p.y - l2.k * l2.p.x - l1.k * l1.p.x) * 1.0 / (
22 |         l1.k - l2.k)
23 |     cross_point.y = (l1.k * (l2.p.y - l2.k * l2.p.x) - l2.k *
24 |                      (l1.p.y - l1.k * l1.p.x)) * 1.0 / (l1.k - l2.k)
25 |     return cross_point
26 | 
27 | 
28 | if __name__ == '__main__':
29 |     p1 = Point(1, 1)
30 |     k1 = 1
31 |     line1 = Line(p1, k1)
32 | 
33 |     p2 = Point(1, 1)
34 |     k2 = -1
35 |     line2 = Line(p2, k2)
36 | 
37 |     cross_point = get_cross_point(line1, line2)
38 |     print("Cross point:", cross_point.x, cross_point.y)
39 | 


--------------------------------------------------------------------------------
/Python+Algorithm/Geometric/two_point_straight_point.py:
--------------------------------------------------------------------------------
 1 | # 解算出两线段的交点
 2 | # 采用 两点确定一条直线，然后两直线求解
 3 | 
 4 | 
 5 | class Point(object):
 6 |     def __init__(self, x=0, y=0):
 7 |         self.x = x
 8 |         self.y = y
 9 | 
10 | 
11 | # 用"两点"定义的直线
12 | class Line(object):
13 |     def __init__(self, p1, p2):
14 |         self.p1 = p1
15 |         self.p2 = p2
16 | 
17 | 
18 | # 求直线的参数，
19 | def get_line_parameter(line):
20 |     line.a = line.p1.y - line.p2.y
21 |     line.b = line.p2.x - line.p1.x
22 |     line.c = line.p1.x * line.p2.y - line.p2.x * line.p1.y
23 | 
24 | 
25 | # 解两直线的交点
26 | def get_cross_point(l1, l2):
27 |     get_line_parameter(l1)
28 |     get_line_parameter(l2)
29 |     d = l1.a * l2.b - l2.a * l1.b
30 |     cross_point = Point()
31 |     cross_point.x = (l1.b * l2.c - l2.b * l1.c) * 1.0 / d
32 |     cross_point.y = (l1.c * l2.a - l2.c * l1.a) * 1.0 / d
33 |     return cross_point
34 | 
35 | 
36 | if __name__ == '__main__':
37 |     p1 = Point(0, 1)
38 |     p2 = Point(1, 1)
39 |     line1 = Line(p1, p2)
40 | 
41 |     p3 = Point(1, 1)
42 |     p4 = Point(1, 0)
43 |     line2 = Line(p3, p4)
44 | 
45 |     cross_point = get_cross_point(line1, line2)
46 |     print("Cross point:", cross_point.x, cross_point.y)
47 | 


--------------------------------------------------------------------------------
/Python+Algorithm/Least-Squares/Least squares.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | 
 3 | sales=pd.read_csv('train_data.csv',sep='\s*,\s*',engine='python')  #读取CSV
 4 | X=sales['X'].values    #存csv的第一列
 5 | Y=sales['Y'].values    #存csv的第二列
 6 | 
 7 | #初始化赋值
 8 | s1 = 0
 9 | s2 = 0
10 | s3 = 0
11 | s4 = 0
12 | n = 4       ####你需要根据的数据量进行修改
13 | 
14 | #循环累加
15 | for i in range(n):
16 | 	s1 = s1 + X[i]*Y[i]
17 | 	s2 = s2 + X[i]
18 | 	s3 = s3 + Y[i]
19 | 	s4 = s4 + X[i]*X[i]
20 | 	
21 | #计算斜率和截距
22 | b = (s2*s3-n*s1)/(s2*s2-s4*n)
23 | a = (s3 - b*s2)/n
24 | print("Coeff: {} Intercept: {}".format(b, a))
25 | 
26 | 


--------------------------------------------------------------------------------
/Python+Algorithm/Least-Squares/README.md:
--------------------------------------------------------------------------------
1 | # Least-squares
2 | 
3 | 最小二乘法的python实现，没有使用任何第三方库
4 | 
5 | 数据是train_data文件，也可以你手动输入
6 | 
7 | 参考我的[CSDN说明](https://blog.csdn.net/yzy_1996/article/details/81064140)


--------------------------------------------------------------------------------
/Python+Algorithm/Least-Squares/train_data.csv:
--------------------------------------------------------------------------------
1 | X,Y
2 | 1,6
3 | 2,5
4 | 3,7
5 | 4,10
6 | 


--------------------------------------------------------------------------------
/Python+Algorithm/Math/Kalman/Kalman_2D.py:
--------------------------------------------------------------------------------
 1 | import cv2
 2 | import numpy as np
 3 | import matplotlib.pyplot as plt
 4 | 
 5 | pos = np.array([
 6 |         [10,    50],
 7 |         [12,    49],    
 8 |         [11,    52],     
 9 |         [13,    52.2],     
10 |         [12.9,  50]], np.float32)    
11 | 
12 | '''
13 | 它有3个输入参数，dynam_params：状态空间的维数，这里为2；measure_param：测量值的维数，这里也为2; control_params：控制向量的维数，默认为0。由于这里该模型中并没有控制变量，因此也为0。
14 | '''
15 | kalman = cv2.KalmanFilter(2,2)
16 | 
17 | kalman.measurementMatrix = np.array([[1,0],[0,1]],np.float32)
18 | kalman.transitionMatrix = np.array([[1,0],[0,1]], np.float32)
19 | kalman.processNoiseCov = np.array([[1,0],[0,1]], np.float32) * 1e-3
20 | kalman.measurementNoiseCov = np.array([[1,0],[0,1]], np.float32) * 0.01
21 | '''
22 | kalman.measurementNoiseCov为测量系统的协方差矩阵，方差越小，预测结果越接近测量值，kalman.processNoiseCov为模型系统的噪声，噪声越大，预测结果越不稳定，越容易接近模型系统预测值，且单步变化越大，相反，若噪声小，则预测结果与上个计算结果相差不大。
23 | '''
24 | 
25 | kalman.statePre =  np.array([[6],[6]],np.float32)
26 | 
27 | for i in range(len(pos)):
28 |     mes = np.reshape(pos[i,:],(2,1))
29 | 
30 |     x = kalman.correct(mes)
31 | 
32 |     y = kalman.predict()
33 |     print (kalman.statePost[0],kalman.statePost[1])
34 |     print (kalman.statePre[0],kalman.statePre[1])
35 |     print ('measurement:\t',mes[0],mes[1])  
36 |     print ('correct:\t',x[0],x[1])
37 |     print ('predict:\t',y[0],y[1])     
38 |     print ('='*30)  


--------------------------------------------------------------------------------
/Python+Algorithm/Math/Kalman/Kalman_3D.py:
--------------------------------------------------------------------------------
 1 | import cv2
 2 | import numpy as np
 3 | #创建一个大小800*800的空帧
 4 | frame = np.zeros((800,800,3),np.uint8)
 5 | #初始化测量坐标和鼠标运动预测的数组
 6 | last_measurement = current_measurement = np.array((2,1),np.float32)
 7 | print(last_measurement)
 8 | last_predicition = current_prediction = np.zeros((2,1),np.float32)
 9 | '''
10 |     mousemove()函数在这里的作用就是传递X,Y的坐标值，便于对轨迹进行卡尔曼滤波
11 | '''
12 | def mousemove(event,x,y,s,p):
13 |     #定义全局变量
14 |     global frame,current_measurement,measurements,last_measurement,current_prediction,last_prediction
15 |     #初始化
16 |     last_measurement = current_measurement
17 |     last_prediction = current_prediction
18 |     #传递当前测量坐标值
19 |     current_measurement = np.array([[np.float32(x)],[np.float32(y)]])
20 |     #用来修正卡尔曼滤波的预测结果
21 |     kalman.correct(current_measurement)
22 |     # 调用kalman这个类的predict方法得到状态的预测值矩阵，用来估算目标位置
23 |     current_prediction = kalman.predict()
24 |     #上一次测量值
25 |     lmx,lmy = last_measurement[0],last_measurement[1]
26 |     #当前测量值
27 |     cmx,cmy = current_measurement[0],current_measurement[1]
28 |     #上一次预测值
29 |     lpx,lpy = last_prediction[0],last_prediction[1]
30 |     #当前预测值
31 |     cpx,cpy = current_prediction[0],current_prediction[1]
32 |     #绘制测量值轨迹（绿色）
33 |     cv2.line(frame,(lmx,lmy),(cmx,cmy),(0,100,0))
34 |     #绘制预测值轨迹（红色）
35 |     cv2.line(frame,(lpx,lpy),(cpx,cpy),(0,0,200))
36 | 
37 | cv2.namedWindow("kalman_tracker")
38 | 
39 | cv2.setMouseCallback("kalman_tracker",mousemove)
40 | 
41 | kalman = cv2.KalmanFilter(4,2)  # 状态空间的维数 和 测量值维数
42 | #设置测量矩阵
43 | kalman.measurementMatrix = np.array([[1,0,0,0],[0,1,0,0]],np.float32)
44 | #设置转移矩阵
45 | kalman.transitionMatrix = np.array([[1,0,1,0],[0,1,0,1],[0,0,1,0],[0,0,0,1]],np.float32)
46 | #设置过程噪声协方差矩阵
47 | kalman.processNoiseCov = np.array([[1,0,0,0],[0,1,0,0],[0,0,1,0],[0,0,0,1]],np.float32)*0.03
48 | 
49 | while True:
50 |     cv2.imshow("kalman_tracker",frame)
51 |     if cv2.waitKey(1) & 0xFF == ord('q'):
52 |         break
53 | 
54 | cv2.destroyAllWindows()
55 | 


--------------------------------------------------------------------------------
/Python+Algorithm/Math/Kalman/README.md:
--------------------------------------------------------------------------------
 1 | # Kalman滤波器代码
 2 | 
 3 | ## 1、Python
 4 | 
 5 | you need to have $numpy$ & $pylab$ firstly!
 6 | 
 7 | example 1: for the file named kalman_carmove.py
 8 | 
 9 | 
10 | 
11 | ## 2、Matlab
12 | 
13 | 


--------------------------------------------------------------------------------
/Python+Algorithm/Math/Kalman/kalamn_unc.py:
--------------------------------------------------------------------------------
 1 | # -*- coding=utf-8 -*-
 2 | # Kalman filter example demo in Python
 3 |  
 4 | # A Python implementation of the example given in pages 11-15 of "An
 5 | # Introduction to the Kalman Filter" by Greg Welch and Gary Bishop,
 6 | # University of North Carolina at Chapel Hill, Department of Computer
 7 | # Science, TR 95-041,
 8 | # http://www.cs.unc.edu/~welch/kalman/kalmanIntro.html
 9 |  
10 | # by Andrew D. Straw
11 | #coding:utf-8
12 | import numpy
13 | import pylab
14 |  
15 | #这里是假设A=1，H=1的情况
16 |  
17 | # intial parameters
18 | n_iter = 50
19 | sz = (n_iter,) # size of array
20 | x = -0.37727 # truth value (typo in example at top of p. 13 calls this z)
21 | z = numpy.random.normal(x,0.1,size=sz) # observations (normal about x, sigma=0.1)
22 |  
23 | Q = 1e-5 # process variance
24 |  
25 | # allocate space for arrays
26 | xhat=numpy.zeros(sz)      # a posteri estimate of x
27 | P=numpy.zeros(sz)         # a posteri error estimate
28 | xhatminus=numpy.zeros(sz) # a priori estimate of x
29 | Pminus=numpy.zeros(sz)    # a priori error estimate
30 | K=numpy.zeros(sz)         # gain or blending factor
31 |  
32 | R = 0.1**2 # estimate of measurement variance, change to see effect
33 |  
34 | # intial guesses
35 | xhat[0] = 0.0
36 | P[0] = 1.0
37 |  
38 | for k in range(1,n_iter):
39 |     # time update
40 |     xhatminus[k] = xhat[k-1]  #X(k|k-1) = AX(k-1|k-1) + BU(k) + W(k),A=1,BU(k) = 0
41 |     Pminus[k] = P[k-1]+Q      #P(k|k-1) = AP(k-1|k-1)A' + Q(k) ,A=1
42 |  
43 |     # measurement update
44 |     K[k] = Pminus[k]/( Pminus[k]+R ) #Kg(k)=P(k|k-1)H'/[HP(k|k-1)H' + R],H=1
45 |     xhat[k] = xhatminus[k]+K[k]*(z[k]-xhatminus[k]) #X(k|k) = X(k|k-1) + Kg(k)[Z(k) - HX(k|k-1)], H=1
46 |     P[k] = (1-K[k])*Pminus[k] #P(k|k) = (1 - Kg(k)H)P(k|k-1), H=1
47 |  
48 | pylab.figure()
49 | pylab.plot(z,'k+',label='noisy measurements')     #测量值
50 | pylab.plot(xhat,'b-',label='a posteri estimate')  #过滤后的值
51 | pylab.axhline(x,color='g',label='truth value')    #系统值
52 | pylab.legend()
53 | pylab.xlabel('Iteration')
54 | pylab.ylabel('Voltage')
55 |  
56 | pylab.figure()
57 | valid_iter = range(1,n_iter) # Pminus not valid at step 0
58 | pylab.plot(valid_iter,Pminus[valid_iter],label='a priori error estimate')
59 | pylab.xlabel('Iteration')
60 | pylab.ylabel('$(Voltage)^2$')
61 | pylab.setp(pylab.gca(),'ylim',[0,.01])
62 | pylab.show()


--------------------------------------------------------------------------------
/Python+Algorithm/Math/Kalman/kalman_1.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 |  
  3 | '''
  4 | kalman_mousetracker.py - OpenCV mouse-tracking demo using 2D Kalman filter 
  5 | Adapted from
  6 |    http://www.morethantechnical.com/2011/06/17/simple-kalman-filter-for-tracking-using-opencv-2-2-w-code/
  7 |     
  8 | Copyright (C) 2014 Simon D. Levy
  9 | This code is free software: you can redistribute it and/or modify
 10 | it under the terms of the GNU Lesser General Public License as
 11 | published by the Free Software Foundation, either version 3 of the
 12 | License, or (at your option) any later version.
 13 | This code is distributed in the hope that it will be useful,
 14 | but WITHOUT ANY WARRANTY without even the implied warranty of
 15 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 16 | GNU General Public License for more details.
 17 | You should have received a copy of the GNU Lesser General Public License
 18 | along with this code. If not, see <http://www.gnu.org/licenses/>.
 19 | '''
 20 |  
 21 | # This delay will affect the Kalman update rate
 22 | DELAY_MSEC = 20
 23 |  
 24 | # Arbitrary display params
 25 | WINDOW_NAME = 'Kalman Mousetracker [ESC to quit]'
 26 | WINDOW_SIZE = 500
 27 |  
 28 | import cv2
 29 | import numpy as np
 30 | from sys import exit
 31 |  
 32 | from kalman2d import Kalman2D
 33 |  
 34 | class MouseInfo(object):
 35 |     '''
 36 |     A class to store X,Y points
 37 |     '''
 38 |  
 39 |     def __init__(self):
 40 |  
 41 |         self.x, self.y = -1, -1
 42 |  
 43 |     def __str__(self):
 44 |  
 45 |         return '%4d %4d' % (self.x, self.y)
 46 |  
 47 | def mouseCallback(event, x, y, flags, mouse_info):
 48 |     '''
 49 |     Callback to update a MouseInfo object with new X,Y coordinates
 50 |     '''
 51 |  
 52 |     mouse_info.x = x
 53 |     mouse_info.y = y
 54 |  
 55 |  
 56 | def drawCross(img, center, r, g, b):
 57 |     '''
 58 |     Draws a cross a the specified X,Y coordinates with color RGB
 59 |     '''
 60 |  
 61 |     d = 5
 62 |     t = 2
 63 |  
 64 |     color = (r, g, b)
 65 |  
 66 |     ctrx = center[0]
 67 |     ctry = center[1]
 68 |  
 69 |     cv2.line(img, (ctrx - d, ctry - d), (ctrx + d, ctry + d), color, t, cv2.CV_AA)
 70 |     cv2.line(img, (ctrx + d, ctry - d), (ctrx - d, ctry + d), color, t, cv2.CV_AA)
 71 |  
 72 |  
 73 | def drawLines(img, points, r, g, b):
 74 |     '''
 75 |     Draws lines 
 76 |     '''
 77 |  
 78 |     cv2.polylines(img, [np.int32(points)], isClosed=False, color=(r, g, b))
 79 |  
 80 |  
 81 | def newImage():
 82 |     '''
 83 |     Returns a new image
 84 |     '''
 85 |  
 86 |     return np.zeros((500,500,3), np.uint8) 
 87 |  
 88 |  
 89 | if __name__ == '__main__':
 90 |  
 91 |  
 92 |     # Create a new image in a named window
 93 |     img = newImage()
 94 |     cv2.namedWindow(WINDOW_NAME)
 95 |  
 96 |     # Create an X,Y mouse info object and set the window's mouse callback to modify it
 97 |     mouse_info = MouseInfo()
 98 |     cv2.setMouseCallback(WINDOW_NAME, mouseCallback, mouse_info)
 99 |  
100 |     # Loop until mouse inside window
101 |     while True:
102 |  
103 |         if mouse_info.x > 0 and mouse_info.y > 0:
104 |             break
105 |  
106 |         cv2.imshow(WINDOW_NAME, img)
107 |         if cv2.waitKey(1) == 27:
108 |             exit(0)
109 |  
110 |  
111 |     # These will get the trajectories for mouse location and Kalman estiamte
112 |     measured_points = []
113 |     kalman_points = []
114 |  
115 |     # Create a new Kalman2D filter and initialize it with starting mouse location
116 |     kalman2d = Kalman2D()
117 |  
118 |     # Loop till user hits escape
119 |     while True:
120 |  
121 |         # Serve up a fresh image
122 |         img = newImage()
123 |  
124 |         # Grab current mouse position and add it to the trajectory
125 |         measured = (mouse_info.x, mouse_info.y)
126 |         measured_points.append(measured)
127 |  
128 |         # Update the Kalman filter with the mouse point
129 |         kalman2d.update(mouse_info.x, mouse_info.y)
130 |  
131 |         # Get the current Kalman estimate and add it to the trajectory
132 |         estimated = [int (c) for c in kalman2d.getEstimate()]
133 |         kalman_points.append(estimated)
134 |  
135 |         # Display the trajectories and current points
136 |         drawLines(img, kalman_points,   0,   255, 0)
137 |         drawCross(img, estimated,       255, 255, 255)
138 |         drawLines(img, measured_points, 255, 255, 0)
139 |         drawCross(img, measured, 0,   0,   255)
140 |  
141 |         # Delay for specified interval, quitting on ESC
142 |         cv2.imshow(WINDOW_NAME, img)
143 |         if cv2.waitKey(DELAY_MSEC) == 27:
144 |             break


--------------------------------------------------------------------------------
/Python+Algorithm/Math/Kalman/my_kalman_carmove.py:
--------------------------------------------------------------------------------
 1 | '''本例说明
 2 | 小车匀加速运动，对小车进行位移预测
 3 | '''
 4 | 
 5 | import numpy as np
 6 | import pylab
 7 | 
 8 | # 初始化参数
 9 | delta_t = 0.1
10 | t = np.arange(0, 5, delta_t)  # 时间序列
11 | N = len(t)  # 序列长度
12 | sz = (2, N)  # 数据量
13 | a = 10  # 真实加速度
14 | x = 1 / 2 * a * t**2  # 真实位移
15 | z = x + np.random.normal(0, 10, size=N)  # 观测值，在真实值上加入了白噪声，服从高斯分布
16 | 
17 | Q = [[0, 0], [0, 0.01]]
18 | R = 10
19 | 
20 | A = np.array([[1, delta_t], [0, 1]])
21 | B = np.array([1 / 2 * delta_t**2, delta_t])
22 | H = np.array([1, 0])
23 | 
24 | # 分配空间
25 | x_predict = np.zeros(sz)  # x的先验估计，也就是预测值
26 | P_predict = np.zeros((2, 2))  # P的先验估计
27 | x_update = np.zeros(sz)  # x的后验估计，也就是最终的估计量
28 | P_update = np.zeros((2, 2))  # 协方差的后验估计
29 | K = np.zeros(sz)  # 卡尔曼增益
30 | I = np.eye(2)
31 | 
32 | for k in range(1, N):
33 |     # 预测过程
34 |     x_predict[:, k] = A.dot(x_update[:, k - 1]) + a * B
35 |     P_predict = A.dot(P_update).dot(A.T) + Q
36 | 
37 |     # 更新过程
38 |     K[:, k] = P_predict.dot(H.T) / (H.dot(P_predict).dot(H.T) + R)
39 |     x_update[:, k] = x_predict[:, k] + K[:, k].dot(
40 |         (z[k] - H.dot(x_predict[:, k])))
41 |     P_update = (I - K[:, k].dot(H)).dot(P_predict)
42 | 
43 | pylab.rcParams['font.sans-serif'] = ['FangSong']  # 指定默认字体
44 | pylab.rcParams['axes.unicode_minus'] = False  # 解决保存图像是负号'-'显示为方块的问题
45 | 
46 | pylab.figure()
47 | pylab.plot(z, color='g', linestyle='--', label='观测值')  # 观测值
48 | pylab.plot(x_update[0], color='r', label='估计值')  # 估计值
49 | pylab.plot(x, linestyle=':', label='真实值')  # 真实值
50 | pylab.xlabel('时间/s')
51 | pylab.ylabel('位移/m')
52 | pylab.legend()
53 | pylab.show()


--------------------------------------------------------------------------------
/Python+Algorithm/Math/Kalman/my_kalman_simple.py:
--------------------------------------------------------------------------------
 1 | '''本例说明
 2 | 是Kalman滤波器的一种特殊情况，去掉了
 3 | 设定真实值，按高斯分布，针对真实值随机生成【sz】个观测数据，然后进行卡尔曼滤波估计
 4 | '''
 5 | 
 6 | import numpy as np
 7 | import pylab
 8 | 
 9 | # 初始化参数
10 | sz = 50  # 数据量
11 | 
12 | x = 0.1  # 真实值
13 | z = np.random.normal(x, 0.1, size=sz)  # 观测值，服从高斯分布
14 | 
15 | Q = 1e-5  # 过程噪声
16 | R = 1e-2  # 观测噪声
17 | 
18 | # 为变量分配空间
19 | x_predict = np.zeros(sz)  # x的先验估计，也就是预测值
20 | P_predict = np.zeros(sz)  # P的先验估计
21 | x_update = np.zeros(sz)  # x的后验估计，也就是最终的估计量
22 | P_update = np.zeros(sz)  # 协方差的后验估计
23 | K = np.zeros(sz)  # 卡尔曼增益
24 | 
25 | # 赋初值
26 | x_update[0] = 0.0
27 | P_update[0] = 1.0
28 | 
29 | for k in range(1, sz):
30 |     # 预测过程
31 |     x_predict[k] = x_update[k - 1]
32 |     P_predict[k] = P_update[k - 1] + Q
33 | 
34 |     # 更新过程
35 |     K[k] = P_predict[k] / (P_predict[k] + R)
36 |     x_update[k] = x_predict[k] + K[k] * (z[k] - x_predict[k])
37 |     P_update[k] = (1 - K[k]) * P_predict[k]
38 | 
39 | pylab.rcParams['font.sans-serif'] = ['FangSong']  # 指定默认字体
40 | pylab.rcParams['axes.unicode_minus'] = False  # 解决保存图像是负号'-'显示为方块的问题
41 | 
42 | pylab.figure()
43 | pylab.plot(z, 'k+', label='观测值')  # 观测值
44 | pylab.plot(x_update, 'b-', label='估计值')  # 估计值
45 | pylab.axhline(x, color='g', label='真实值')  # 真实值
46 | pylab.legend()
47 | pylab.show()
48 | 


--------------------------------------------------------------------------------
/Python+Algorithm/Math/gram_schmidt.py:
--------------------------------------------------------------------------------
 1 | # 施密特正交化 Gram-Schmidt
 2 | # ！注意 例子中 输入向量为  (3, 1)T  (2, 2)T
 3 | # 输出向量为 (0.9., 0.3.)T (-0.3., 0.9.)T
 4 | 
 5 | import numpy as np
 6 | 
 7 | def myGS(V):
 8 |     u = V.copy().transpose()
 9 |     E = []
10 |     for i in range(len(u)):
11 |         for j in range(i):
12 |             u[i] = (V[i] @ u[j]) / (u[j] @ u[j]) * (u[j])
13 |         E.append(u[i] / np.linalg.norm(u[i]))
14 |     return np.array(E)
15 | 
16 | if __name__ == "__main__":
17 |     # 输入矩阵  
18 |     # V = np.array([[3., 1.], [2., 2.]])
19 |     V = np.array([[1., 0, -1, 4], [-1, 2, 2, -1], [2, 3, 1, -3]]).transpose()
20 |     print(V)
21 | 
22 |     # # 输出施密特正交化结果
23 |     E = myGS(V)
24 |     print(E)
25 |     
26 |     # # 验证是否为单位阵
27 |     print(E.transpose() @ E)


--------------------------------------------------------------------------------
/Python+Algorithm/Math/math_base.py:
--------------------------------------------------------------------------------
 1 | '''
 2 |     在这里将用代码实现一些数学中常用的概念，方便理解
 3 |     1、均方误差
 4 | '''
 5 | import numpy as np
 6 | import matplotlib.pyplot as plt
 7 | 
 8 | # 生成一组随机数
 9 | x1 = np.random.rand(10) 
10 | x2 = np.random.randn(100) # 标准正态分布
11 | x3 = np.random.randint(1, 10, 10) # 1-10的整数
12 | x4 = np.random.normal(size=10)
13 | 
14 | # 生成一组真实值和预测值
15 | target = [1, 2, 3, 4, 5, 6]
16 | prediction = [1, 1, 2, 4, 4, 7]
17 | 
18 | 
19 | ## 对单组数据而言
20 | # 计算均值
21 | # mean = np.mean(target)
22 | # print('均值:', mean)
23 | 
24 | # # 计算方差
25 | # var = np.var(target)
26 | # print('方差：', var)
27 | 
28 | ## 对两组数据而言
29 | # 计算误差
30 | error = []
31 | for i in range(len(target)):
32 |     error.append(target[i] - prediction[i])
33 | 
34 | squaredError = []
35 | for val in error:
36 |     squaredError.append(val^2)
37 | 
38 | print('MSE:', sum(squaredError)/len(squaredError)) 
39 | 
40 | # 计算标准差
41 | std = np.std(target)
42 | print(std)
43 | # 画出随机数的图像
44 | # plt.figure(1)
45 | # plt.plot(x4)
46 | # # plt.figure(2)
47 | # # plt.hist(x4, 100)
48 | # plt.show()
49 | 
50 | # 1、均方误差(MSE)
51 | # print("MSE = ", sum(squaredError) / len(squaredError))
52 | 


--------------------------------------------------------------------------------
/Python+Algorithm/Math/matrix.ipynb:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yzy1996/Python-Code/2abcaa6fbfa4a84aaffdf10d7bcc6b12649dd221/Python+Algorithm/Math/matrix.ipynb


--------------------------------------------------------------------------------
/Python+Algorithm/Math/matrix.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | '''
 4 | list列表 [1, 2]
 5 | np.array数组 np.array([1, 2])
 6 | np.mat矩阵 np.mat([1, 2])
 7 | 数组矩阵两者可以互相转换
 8 | 
 9 | !!!不要用列表来进行运算，下面只说数组和矩阵
10 | '''
11 | 
12 | # 1-D
13 | a = np.array([[1, 2], [3, 4]])
14 | b = np.array([[2, 4], [1, 3]])
15 | 
16 | print(a.shape) # [3 8] 点乘
17 | print(a.T @ b) # 11 乘
18 | print(a.dot(b)) # 11 乘
19 | 
20 | 


--------------------------------------------------------------------------------
/Python+Algorithm/Math/pareto-front.py:
--------------------------------------------------------------------------------
 1 | import matplotlib.pyplot as plt
 2 | import numpy as np
 3 | 
 4 | 
 5 | def pareto_front(x, y):
 6 |     b = []
 7 |     i = 0
 8 |     while i < len(x):
 9 |         j = 0
10 |         while j < len(a):
11 |             if i != j:
12 |                 vj1 = a[j][0]
13 |                 vj2 = a[j][1]
14 |                 vi1 = a[i][0]
15 |                 vi2 = a[i][1]
16 | 
17 |                 if (vj1 >= vi1 and vj2 <= vi2) and (vj1 > vi1 or vj2 < vi2):
18 |                     i += 1
19 |                     break
20 |                 else:
21 |                     j += 1
22 |                 if j == len(a):
23 |                     print(a[i])
24 |                     i += 1
25 |                     break
26 |             else:
27 |                 j += 1
28 |                 if i == len(a)-1 and j == len(a):
29 |                     print(a[i])
30 |                     i += 1
31 | 
32 | 
33 | # 帕累托最优点需要满足左下区域没有其他点
34 | # 遍历Traversal
35 | # 数据点表示为 (x[i],y[i])
36 | for i = range(len(x)):
37 |     for j = range(len(x)):
38 |         if x[j]>=x[i] & y[j]>=y[i]:
39 |             pareto_optimality=[x[i],y[i]]
40 | 
41 | 
42 | 
43 | def plot_pareto():
44 |     plt.plot(p1, p2, 'ro')
45 | 
46 | 
47 | if __name__ == '__main__':
48 | 
49 |     x = np.array([2, 5, 1, 3, 2, 7])
50 |     y = np.array([9, 8, 12, 11, 16, 10])
51 |     plt.plot(x, y, 'ro')
52 |     plt.show()
53 | 
54 | 
55 | # 加上可视化
56 | 


--------------------------------------------------------------------------------
/Python+Algorithm/Optimization-Algorithm/Adam.py:
--------------------------------------------------------------------------------
 1 | # ADAM
 2 | # 以 y=x1+2*x2为例
 3 | import math
 4 | import numpy as np
 5 | 
 6 | 
 7 | def adam():
 8 |     # 训练集，每个样本有三个分量
 9 |     x = np.array([(1, 1), (1, 2), (2, 2), (3, 1), (1, 3), (2, 4), (2, 3), (3,
10 |                                                                            3)])
11 |     y = np.array([3, 5, 6, 5, 7, 10, 8, 9])
12 | 
13 |     # 初始化
14 |     m, dim = x.shape
15 |     theta = np.zeros(dim)  # 参数
16 |     alpha = 0.01  # 学习率
17 |     momentum = 0.1  # 冲量
18 |     threshold = 0.0001  # 停止迭代的错误阈值
19 |     iterations = 3000  # 迭代次数
20 |     error = 0  # 初始错误为0
21 | 
22 |     b1 = 0.9  # 算法作者建议的默认值
23 |     b2 = 0.999  # 算法作者建议的默认值
24 |     e = 0.00000001  #算法作者建议的默认值
25 |     mt = np.zeros(dim)
26 |     vt = np.zeros(dim)
27 | 
28 |     for i in range(iterations):
29 |         j = i % m
30 |         error = 1 / (2 * m) * np.dot((np.dot(x, theta) - y).T,
31 |                                      (np.dot(x, theta) - y))
32 |         if abs(error) <= threshold:
33 |             break
34 | 
35 |         gradient = x[j] * (np.dot(x[j], theta) - y[j])
36 |         mt = b1 * mt + (1 - b1) * gradient
37 |         vt = b2 * vt + (1 - b2) * (gradient**2)
38 |         mtt = mt / (1 - (b1**(i + 1)))
39 |         vtt = vt / (1 - (b2**(i + 1)))
40 |         vtt_sqrt = np.array([math.sqrt(vtt[0]),
41 |                              math.sqrt(vtt[1])])  # 因为只能对标量进行开方
42 |         theta = theta - alpha * mtt / (vtt_sqrt + e)
43 | 
44 |     print('迭代次数：%d' % (i + 1), 'theta：', theta, 'error：%f' % error)
45 | 
46 | 
47 | if __name__ == '__main__':
48 |     adam()


--------------------------------------------------------------------------------
/Python+Algorithm/Optimization-Algorithm/BGD.py:
--------------------------------------------------------------------------------
 1 | # 批量梯度下降BGD
 2 | # 拟合函数为：y = theta * x
 3 | # 代价函数为：J = 1 / (2 * m) * ((theta * x) - y) * ((theta * x) - y).T;
 4 | # 梯度迭代为: theta = theta - alpha / m * (x * (theta * x - y).T);
 5 | import numpy as np
 6 | 
 7 | 
 8 | # 1、单元数据程序
 9 | # 以 y=x为例，所以正确的结果应该趋近于theta = 1
10 | def bgd_single():
11 |     # 训练集, 单样本
12 |     x = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10])
13 | 
14 |     y = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10])
15 | 
16 |     # 初始化
17 |     m = len(y)
18 |     theta = 0  # 参数
19 |     alpha = 0.01  # 学习率
20 |     threshold = 0.0001  # 停止迭代的错误阈值
21 |     iterations = 1500  # 迭代次数
22 |     error = 0  # 初始错误为0
23 | 
24 |     # 迭代开始
25 |     for i in range(iterations):
26 |         error = 1 / (2 * m) * np.dot(((theta * x) - y).T, ((theta * x) - y))
27 |         # 迭代停止
28 |         if abs(error) <= threshold:
29 |             break
30 | 
31 |         theta -= alpha / m * (np.dot(x.T, (theta * x - y)))
32 | 
33 |     print('单变量：', '迭代次数： %d' % (i + 1), 'theta： %f' % theta,
34 |           'error1： %f' % error)
35 | 
36 | 
37 | # 2、多元数据程序
38 | # 以 y=x1+2*x2为例，所以正确的结果应该趋近于theta = [1，2]
39 | 
40 | 
41 | def bgd_multi():
42 |     # 训练集，每个样本有2个分量
43 |     x = np.array([(1, 1), (1, 2), (2, 2), (3, 1), (1, 3), (2, 4), (2, 3), (3,
44 |                                                                            3)])
45 |     y = np.array([3, 5, 6, 5, 7, 10, 8, 9])
46 | 
47 |     # 初始化
48 |     m, dim = x.shape
49 |     theta = np.zeros(dim)  # 参数
50 |     alpha = 0.01  # 学习率
51 |     threshold = 0.0001  # 停止迭代的错误阈值
52 |     iterations = 1500  # 迭代次数
53 |     error = 0  # 初始错误为0
54 | 
55 |     # 迭代开始
56 |     for i in range(iterations):
57 |         error = 1 / (2 * m) * np.dot((np.dot(x, theta) - y).T,
58 |                                      (np.dot(x, theta) - y))
59 |         # 迭代停止
60 |         if abs(error) <= threshold:
61 |             break
62 | 
63 |         theta -= alpha / m * (np.dot(x.T, (np.dot(x, theta) - y)))
64 | 
65 |     print('多元变量：', '迭代次数：%d' % (i + 1), 'theta：', theta, 'error：%f' % error)
66 | 
67 | 
68 | if __name__ == '__main__':
69 |     bgd_single()
70 |     bgd_multi()


--------------------------------------------------------------------------------
/Python+Algorithm/Optimization-Algorithm/README.md:
--------------------------------------------------------------------------------
1 | # Optimization-Algorithm
2 | 
3 | 参考CSDN[链接](https://blog.csdn.net/yzy_1996/article/details/84618536)


--------------------------------------------------------------------------------
/Python+Algorithm/Optimization-Algorithm/SGD.py:
--------------------------------------------------------------------------------
 1 | # 随机梯度下降SGD
 2 | # 以 y=x1+2*x2为例
 3 | 
 4 | import numpy as np
 5 | 
 6 | 
 7 | # 多元数据
 8 | def sgd():
 9 |     # 训练集，每个样本有2个分量
10 |     x = np.array([(1, 1), (1, 2), (2, 2), (3, 1), (1, 3), (2, 4), (2, 3), (3, 3)])
11 |     y = np.array([3, 5, 6, 5, 7, 10, 8, 9])
12 | 
13 |     # 初始化
14 |     m, dim = x.shape
15 |     theta = np.zeros(dim)  # 参数
16 |     alpha = 0.01  # 学习率
17 |     threshold = 0.0001  # 停止迭代的错误阈值
18 |     iterations = 1500  # 迭代次数
19 |     error = 0  # 初始错误为0
20 | 
21 |     # 迭代开始
22 |     for i in range(iterations):
23 | 
24 |         error = 1 / (2 * m) * np.dot((np.dot(x, theta) - y).T, (np.dot(x, theta) - y))
25 |         # 迭代停止
26 |         if abs(error) <= threshold:
27 |             break
28 |         
29 |         j = np.random.randint(0, m)
30 | 
31 |         theta -= alpha * (x[j] * (np.dot(x[j], theta) - y[j]))
32 | 
33 |     print('迭代次数：%d' % (i + 1), 'theta：', theta, 'error：%f' % error)
34 | 
35 | 
36 | if __name__ == '__main__':
37 |     sgd()


--------------------------------------------------------------------------------
/Python+Algorithm/Optimization-Algorithm/SGD_momentum.py:
--------------------------------------------------------------------------------
 1 | # 带冲量的随机梯度下降SGD
 2 | # 以 y=x1+2*x2为例
 3 | 
 4 | import numpy as np
 5 | 
 6 | 
 7 | # 多元数据
 8 | def sgd():
 9 |     # 训练集，每个样本有三个分量
10 |     x = np.array([(1, 1), (1, 2), (2, 2), (3, 1), (1, 3), (2, 4), (2, 3), (3,
11 |                                                                            3)])
12 |     y = np.array([3, 5, 6, 5, 7, 10, 8, 9])
13 | 
14 |     # 初始化
15 |     m, dim = x.shape
16 |     theta = np.zeros(dim)  # 参数
17 |     alpha = 0.01  # 学习率
18 |     momentum = 0.1  # 冲量
19 |     threshold = 0.0001  # 停止迭代的错误阈值
20 |     iterations = 1500  # 迭代次数
21 |     error = 0  # 初始错误为0
22 |     gradient = 0  # 初始梯度为0
23 | 
24 |     # 迭代开始
25 |     for i in range(iterations):
26 |         j = i % m
27 |         error = 1 / (2 * m) * np.dot((np.dot(x, theta) - y).T,
28 |                                      (np.dot(x, theta) - y))
29 |         # 迭代停止
30 |         if abs(error) <= threshold:
31 |             break
32 | 
33 |         gradient = momentum * gradient + alpha * (x[j] *
34 |                                                   (np.dot(x[j], theta) - y[j]))
35 |         theta -= gradient
36 | 
37 |     print('迭代次数：%d' % (i + 1), 'theta：', theta, 'error：%f' % error)
38 | 
39 | 
40 | if __name__ == '__main__':
41 |     sgd()


--------------------------------------------------------------------------------
/Python+Algorithm/Optimization-Algorithm/test.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | 
3 | x = np.array([(1, 1), (1, 2), (2, 2), (3, 1), (1, 3), (2, 4), (2, 3), (3, 3)])
4 | 
5 | m, dim = x.shape
6 | 
7 | 
8 | print(x)


--------------------------------------------------------------------------------
/Python+Algorithm/Search-Algorithm/BFS.py:
--------------------------------------------------------------------------------
 1 | # 广度优先算法
 2 | # 集合（set）是一个无序的不重复元素序列。
 3 | 
 4 | 
 5 | def walk(G, s, S=set()):
 6 |     P, Q = dict(), set()
 7 |     P[s] = None  # s节点没有前任节点
 8 |     Q.add(s)  # 从s开始搜索
 9 |     while Q:
10 |         u = Q.pop()  # 随机移除元素
11 |         for v in G[u].difference(P, S):  # 得到新节点 difference 返回集合的差集
12 |             Q.add(v)
13 |             P[v] = u  # 记录前任节点
14 |     return P
15 | 
16 | 
17 | def components(G):
18 |     comp = []
19 |     seen = set()
20 |     for u in range(9):
21 |         if u in seen: continue
22 |         C = walk(G, u)
23 |         seen.update(C)  # 添加
24 |         comp.append(C)
25 |     return comp
26 | 
27 | 
28 | if __name__ == "__main__":
29 |     a, b, c, d, e, f, g, h, i = range(9)
30 |     N = [
31 |         {b, c, d},  # a
32 |         {a, d},  # b
33 |         {a, d},  # c
34 |         {a, c, d},  # d
35 |         {g, f},  # e
36 |         {e, g},  # f
37 |         {e, f},  # g
38 |         {i},  # h
39 |         {h}  # i
40 |     ]
41 |     comp = components(N)
42 |     print(comp)


--------------------------------------------------------------------------------
/Python+Algorithm/Search-Algorithm/DFS.py:
--------------------------------------------------------------------------------
 1 | '''深度优先算法
 2 | 算法逻辑：
 3 | 从初始点开始，向子节点搜索，
 4 | '''
 5 | 
 6 | 
 7 | def iter_dfs(G, s):  # G是整个图， s是起点
 8 |     S, Q = set(), []  # S是存放具体的访问路径
 9 |     Q.append(s)  # Q是用来存放需要进行遍历的数据
10 |     while Q:  # 只要不是空
11 |         u = Q.pop()  # 删除并返回末尾元素
12 |         if u in S:
13 |             continue
14 |         S.add(u)
15 |         Q.extend(G[u])  # 在末尾追加
16 |         yield u
17 | 
18 | 
19 | if __name__ == "__main__":
20 |     a, b, c, d, e, f, g, h, i = range(9)
21 |     G = [
22 |         {b, c, d, e, f},  # a
23 |         {c, e},  # b
24 |         {d},  # c
25 |         {e},  # d
26 |         {f},  # e
27 |         {c, g, h},  # f
28 |         {f, h},  # g
29 |         {f, g}  # h
30 |     ]
31 |     print(list(iter_dfs(G, a)))  # [0, 5, 7, 6, 2, 3, 4, 1]
32 | 


--------------------------------------------------------------------------------
/Python+Algorithm/Search-Algorithm/README.md:
--------------------------------------------------------------------------------
 1 | # Search Algorithm
 2 | 
 3 | problem 1:
 4 | 
 5 |  ![img](./fig1.png)
 6 | 
 7 | problem 2: fig 2
 8 | 
 9 |  
10 | 
11 | ##  depth-first search 
12 | 
13 | [python code]()
14 | 
15 | [matlab code]()
16 | 
17 | ##  breadth-first search 
18 | 
19 | [python code]()
20 | 
21 | [matlab code]()
22 | 
23 | 
24 | 
25 | 
26 | 
27 | Travelling Salesman Problem (TSP)，它寻求的是旅行者由起点出发，通过所有给定的需求点后，再次返回起点所花费的最小路径成本
28 | 
29 | 
30 | 
31 |  动态规划算法（Dynamic Programming，简称DP）通常用于求解具有某种最优性质的问题，其基本思想是将待求解问题分解成若干个子问题，先求解子问题，然后由这些子问题的解再得到原问题的解。
32 | 
33 | 
34 | 
35 | 代码
36 | 
37 | 
38 | 
39 | BFS DFS
40 | 
41 | 广度优先和深度优先
42 | 
43 | 
44 | 
45 |  ![img](./fig1.png) 
46 | 
47 | 
48 | 
49 | 深度优先遍历顺序为：1->2->4->8->5->3->6->7
50 | 
51 | 广度优先遍历顺序为：1->2->3->4->5->6->7->8 
52 | 
53 | 
54 | 
55 | 代码
56 | 
57 | python 和 matlab
58 | 
59 | 
60 | 
61 | 我们会用到python中的集合（set），它是一个无序的不重复元素序列
62 | 
63 | 


--------------------------------------------------------------------------------
/Python+Algorithm/Search-Algorithm/fig1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yzy1996/Python-Code/2abcaa6fbfa4a84aaffdf10d7bcc6b12649dd221/Python+Algorithm/Search-Algorithm/fig1.png


--------------------------------------------------------------------------------
/Python+Algorithm/Sorting-Algorithm/README.md:
--------------------------------------------------------------------------------
1 | # Sorting-Algorithm
2 | 
3 | 参考CSDN[链接](https://blog.csdn.net/yzy_1996/article/details/85318705)


--------------------------------------------------------------------------------
/Python+Algorithm/Sorting-Algorithm/bubble_sort.py:
--------------------------------------------------------------------------------
 1 | # Bubble Sort 冒泡排序
 2 | # 冒泡排序只会操作相邻的两个数据。每次冒泡操作都会对相邻的两个元素进行比较，交换为正确的顺序，重复N次，时间复杂度为O(n^2)
 3 | # 优化的地方在于：当某次排序已经没有数据可以交换，就可以停止了
 4 | import time
 5 | 
 6 | 
 7 | def bubble_sort(array):
 8 |     length = len(array)
 9 |     if length <= 1:
10 |         return
11 | 
12 |     for i in range(length):
13 |         made_swap = False
14 |         for j in range(length - 1 - i):
15 |             if array[j] > array[j + 1]:
16 |                 array[j], array[j + 1] = array[j + 1], array[j]
17 |                 made_swap = True
18 |         if not made_swap:
19 |             break
20 | 
21 | 
22 | if __name__ == '__main__':
23 |     array = [5, 6, -1, 4, 2, 8, 10, 7, 6]
24 |     start = time.clock()
25 |     bubble_sort(array)
26 |     print(array)
27 |     end = time.clock()
28 |     print('用时：', str(end - start))
29 | 


--------------------------------------------------------------------------------
/Python+Algorithm/Sorting-Algorithm/counting_sort.py:
--------------------------------------------------------------------------------
 1 | # counting_sort 计数排序
 2 | 
 3 | import time
 4 | 
 5 | 
 6 | def counting_sort(array):
 7 |     if len(array) <= 1:
 8 |         return
 9 | 
10 |     counts = [0] * (max(array) + 1)
11 |     for num in array:
12 |         counts[num] += 1
13 | 
14 |     # 临时数组，储存排序之后的结果
15 |     array_sorted = []
16 |     for i in range(max(array) + 1):
17 |         array_sorted += [i] * counts[i]
18 |     array[:] = array_sorted
19 | 
20 | 
21 | if __name__ == '__main__':
22 |     array = [5, 6, 1, 4, 2, 8, 10, 7, 6]
23 |     start = time.clock()
24 |     counting_sort(array)
25 |     print(array)
26 |     end = time.clock()
27 |     print('用时：', str(end - start))


--------------------------------------------------------------------------------
/Python+Algorithm/Sorting-Algorithm/insertion_sort.py:
--------------------------------------------------------------------------------
 1 | # insertion_sort 插入排序
 2 | # 取未排序区间中的元素，在已排序区间中找到合适的插入位置将其插入，并保证已排序区间数据一直有序。重复这个过程，直到未排序区间中元素为空，算法结束。
 3 | 
 4 | import time
 5 | 
 6 | 
 7 | def insertion_sort(array):
 8 |     length = len(array)
 9 |     if length <= 1:
10 |         return
11 | 
12 | # 比较一下以下两种方法
13 | 
14 |     # for i in range(length - 1):
15 |     #     while i >= 0 and array[i] > array[i + 1]:
16 |     #         array[i + 1], array[i] = array[i], array[i + 1]
17 |     #         i -= 1
18 | 
19 |     for i in range(1, length):
20 |         value = array[i]
21 |         j = i - 1
22 |         while j >= 0 and array[j] > value:
23 |             array[j + 1] = array[j]
24 |             j -= 1
25 |         array[j + 1] = value
26 | 
27 | 
28 | if __name__ == '__main__':
29 |     array = [5, 6, -1, 4, 2, 8, 10, 7, 6]
30 |     start = time.clock()
31 |     insertion_sort(array)
32 |     print(array)
33 |     end = time.clock()
34 |     print('用时：', str(end - start))
35 | 


--------------------------------------------------------------------------------
/Python+Algorithm/Sorting-Algorithm/merge_sort.py:
--------------------------------------------------------------------------------
 1 | # merge_sort 归并排序
 2 | 
 3 | import time
 4 | 
 5 | 
 6 | def merge_sort(array):
 7 |     merge_out(array, 0, len(array) - 1)
 8 | 
 9 | 
10 | def merge_out(array, low, high):
11 |     if low < high:
12 |         mid = low + (high - low) // 2
13 |         merge_out(array, low, mid)
14 |         merge_out(array, mid + 1, high)
15 |         merge_in(array, low, mid, high)
16 | 
17 | 
18 | def merge_in(array, low, mid, high):
19 |     # a[low:mid], a[mid+1, high] are sorted.
20 |     i, j = low, mid + 1
21 |     tmp = []
22 |     while i <= mid and j <= high:
23 |         if array[i] <= array[j]:
24 |             tmp.append(array[i])
25 |             i += 1
26 |         else:
27 |             tmp.append(array[j])
28 |             j += 1
29 |     # 将超出索引未添加进tmp的添加进去
30 |     if i <= mid:  # 如果mid右边已添加，则需补充mid左边部分
31 |         start, end = i, mid
32 |     else:
33 |         start, end = j, high
34 |     tmp.extend(array[start:end + 1])  # 常规+1
35 |     array[low:high + 1] = tmp
36 | 
37 | 
38 | if __name__ == '__main__':
39 |     array = [5, 6, -1, 4, 2, 8, 10, 7, 6]
40 |     start = time.clock()
41 |     merge_sort(array)
42 |     print(array)
43 |     end = time.clock()
44 |     print('用时：', str(end - start))


--------------------------------------------------------------------------------
/Python+Algorithm/Sorting-Algorithm/quick_sort.py:
--------------------------------------------------------------------------------
 1 | # quick_sort 快速排序
 2 | 
 3 | import time
 4 | import random
 5 | 
 6 | def quick_sort(array):
 7 |     quick_out(array, 0, len(array) - 1)
 8 | 
 9 | 
10 | def quick_out(array, low, high):
11 |     if low < high:
12 |         k = random.randint(low, high)
13 |         array[low], array[k] = array[k], array[low] # 将分区点换到首位，避免了K的传参
14 |         m = partition(array, low, high) 
15 |         quick_out(array, low, m - 1)
16 |         quick_out(array, m + 1, high)
17 | 
18 | # 返回pivot正确的位置索引（它的左边是比它小的，右边是比它大的）
19 | def partition(array, low, high):
20 |     pivot, j = array[low], low  # j指向pivot在的位置
21 |     for i in range(low + 1, high + 1):  # i指向待比较元素的位置，从pivot后一位开始，因为pivot在首位
22 |         if array[i] <= pivot:
23 |             j += 1  # 此时j指向pivot应该在的位置
24 |             array[j], array[i] = array[i], array[j]  # 先让待比较的元素交换位置
25 |     array[low], array[j] = array[j], array[low]  # 真实交换pivot到它正确的位置
26 |     return j
27 | 
28 | 
29 | 
30 | if __name__ == '__main__':
31 |     array = [5, 6, -1, 4, 2, 8, 10, 7, 6]
32 |     start = time.clock()
33 |     quick_sort(array)
34 |     print(array)
35 |     end = time.clock()
36 |     print('用时：', str(end - start))


--------------------------------------------------------------------------------
/Python+Algorithm/Sorting-Algorithm/selection_sort.py:
--------------------------------------------------------------------------------
 1 | # selection_sort 选择排序
 2 | # 选择排序算法的实现思路有点类似插入排序，也分已排序区间和未排序区间。但是选择排序每次会从未排序区间中找到最小的元素，将其放到已排序区间的末尾。
 3 | 
 4 | import time
 5 | 
 6 | 
 7 | def insertion_sort(array):
 8 |     length = len(array)
 9 |     if length <= 1:
10 |         return
11 | 
12 |     for i in range(length):
13 |         min_index = i
14 |         min_val = array[i]
15 |         for j in range(i, length):
16 |             if array[j] < min_val:
17 |                 min_val = array[j]
18 |                 min_index = j
19 |         array[i], array[min_index] = array[min_index], array[i]
20 | 
21 | 
22 | if __name__ == '__main__':
23 |     array = [5, 6, -1, 4, 2, 8, 10, 7, 6]
24 |     start = time.clock()
25 |     insertion_sort(array)
26 |     print(array)
27 |     end = time.clock()
28 |     print('用时：', str(end - start))


--------------------------------------------------------------------------------
/Python+Algorithm/Sorting-Algorithm/sleep_sort.py:
--------------------------------------------------------------------------------
 1 | import time
 2 | import threading
 3 | 
 4 | # 你需要排序的序列（可以包含负数）
 5 | num = [-5, 3, 9, 11, -1, 3, 12, 0, 8, -3, 23, 5, 19]
 6 | 
 7 | 
 8 | # 睡眠的方法
 9 | def doSleep(func):
10 |     co = 0.02  # 添加系数让睡眠时间短一些
11 |     time.sleep(co * pow(1.1, float(func)))  # 使用幂函数就不怕负数排序了
12 |     print(func)
13 | 
14 | 
15 | # 将多个线程存在一个数组中
16 | thread_list = []
17 | for i in range(len(num)):
18 |     temp = threading.Thread(target=doSleep, args=(str(num[i]), ))
19 |     thread_list.append(temp)
20 | 
21 | if __name__ == '__main__':
22 |     start = time.clock()
23 |     for t in thread_list:
24 |         t.start()  # 开启线程
25 |     for t in thread_list:
26 |         t.join()  # 所有子线程都结束了主线程才关闭
27 |     end = time.clock()
28 |     print('用时：', str(end - start))
29 | 


--------------------------------------------------------------------------------
/Python+Algorithm/Uncategorized/pyramid.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | 一个生成金字塔的代码，根据你输入的层数显示出来
 3 | '''
 4 | 
 5 | a=input("Enter N = ")
 6 | b=int(a)
 7 | for i in range(b-1):
 8 |     print(" ",end='')
 9 | print("*",end='')
10 | print("\n")
11 | 
12 | for i in range(1,b-1):
13 |     for j in range(b-i-1):
14 |         print(" ",end='')
15 |     print("*",end='')
16 |     for l in range(2*i-1):
17 |         print("#",end='')
18 |     print("*",end='')
19 |     print("\n")
20 | 	
21 | if b>1:
22 |     for i in range(2*b-1):
23 |         print("*",end='')


--------------------------------------------------------------------------------
/Python+Algorithm/Uncategorized/xingxingdiandeng.py:
--------------------------------------------------------------------------------
 1 | 
 2 | #coding: utf-8
 3 | def xingxing(N):
 4 |     for i in range(N):
 5 |         j=i+1
 6 |         if j==1:
 7 |             print(' '*(N-1)+'*'+' '*(N-1))
 8 |         elif j<N:
 9 |             print(' '*(N-j)+'*'+(2*j-3)*'#'+'*'+' '*(N-j))
10 |         else:
11 |             print((2*N-1)*'*')
12 | 
13 | 
14 | xingxing(10)
15 | 


--------------------------------------------------------------------------------
/Python+Algorithm/kalman.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | import numpy as np
 3 | import cv2 as cv
 4 | 
 5 | from math import cos, sin, sqrt
 6 | import numpy as np
 7 | 
 8 | def main():
 9 |     img_height = 500
10 |     img_width = 500
11 |     kalman = cv.KalmanFilter(2, 1, 0)
12 | 
13 |     code = -1
14 | 
15 |     cv.namedWindow("Kalman")
16 | 
17 |     while True:
18 |         state = 0.1 * np.random.randn(2, 1)
19 | 
20 |         kalman.transitionMatrix = np.array([[1., 1.], [0., 1.]])
21 |         kalman.measurementMatrix = 1. * np.ones((1, 2))
22 |         kalman.processNoiseCov = 1e-5 * np.eye(2)
23 |         kalman.measurementNoiseCov = 1e-1 * np.ones((1, 1))
24 |         kalman.errorCovPost = 1. * np.ones((2, 2))
25 |         kalman.statePost = 0.1 * np.random.randn(2, 1)
26 | 
27 |         while True:
28 |             def calc_point(angle):
29 |                 return (np.around(img_width/2 + img_width/3*cos(angle), 0).astype(int),
30 |                         np.around(img_height/2 - img_width/3*sin(angle), 1).astype(int))
31 | 
32 |             state_angle = state[0, 0]
33 |             state_pt = calc_point(state_angle)
34 | 
35 |             prediction = kalman.predict()
36 |             predict_angle = prediction[0, 0]
37 |             predict_pt = calc_point(predict_angle)
38 | 
39 |             measurement = kalman.measurementNoiseCov * np.random.randn(1, 1)
40 | 
41 |             # generate measurement
42 |             measurement = np.dot(kalman.measurementMatrix, state) + measurement
43 | 
44 |             measurement_angle = measurement[0, 0]
45 |             measurement_pt = calc_point(measurement_angle)
46 | 
47 |             # plot points
48 |             def draw_cross(center, color, d):
49 |                 cv.line(img,
50 |                          (center[0] - d, center[1] - d), (center[0] + d, center[1] + d),
51 |                          color, 1, cv.LINE_AA, 0)
52 |                 cv.line(img,
53 |                          (center[0] + d, center[1] - d), (center[0] - d, center[1] + d),
54 |                          color, 1, cv.LINE_AA, 0)
55 | 
56 |             img = np.zeros((img_height, img_width, 3), np.uint8)
57 |             draw_cross(np.int32(state_pt), (255, 255, 255), 3)
58 |             draw_cross(np.int32(measurement_pt), (0, 0, 255), 3)
59 |             draw_cross(np.int32(predict_pt), (0, 255, 0), 3)
60 | 
61 |             cv.line(img, state_pt, measurement_pt, (0, 0, 255), 3, cv.LINE_AA, 0)
62 |             cv.line(img, state_pt, predict_pt, (0, 255, 255), 3, cv.LINE_AA, 0)
63 | 
64 |             kalman.correct(measurement)
65 | 
66 |             process_noise = sqrt(kalman.processNoiseCov[0,0]) * np.random.randn(2, 1)
67 |             state = np.dot(kalman.transitionMatrix, state) + process_noise
68 | 
69 |             cv.imshow("Kalman", img)
70 | 
71 |             code = cv.waitKey(100)
72 |             if code != -1:
73 |                 break
74 | 
75 |         if code in [27, ord('q'), ord('Q')]:
76 |             break
77 | 
78 |     print('Done')
79 | 
80 | 
81 | if __name__ == '__main__':
82 |     print(__doc__)
83 |     main()
84 |     cv.destroyAllWindows()


--------------------------------------------------------------------------------
/Python+Crawler/DoubanTop250.py:
--------------------------------------------------------------------------------
 1 | import requests
 2 | from bs4 import BeautifulSoup
 3 | import xlwt
 4 |  
 5 | def request_douban(url):
 6 |     try:
 7 |         response = requests.get(url)
 8 |         if response.status_code == 200:
 9 |             return response.text
10 |     except requests.RequestException:
11 |         return None
12 |  
13 |  
14 | book=xlwt.Workbook(encoding='utf-8',style_compression=0)
15 |  
16 | sheet=book.add_sheet('豆瓣电影Top250',cell_overwrite_ok=True)
17 | sheet.write(0,0,'名称')
18 | sheet.write(0,1,'图片')
19 | sheet.write(0,2,'排名')
20 | sheet.write(0,3,'评分')
21 | sheet.write(0,4,'作者')
22 | sheet.write(0,5,'简介')
23 |  
24 | n=1
25 |  
26 |  
27 | def save_to_excel(soup):
28 |     list = soup.find(class_='grid_view').find_all('li')
29 |  
30 |     for item in list:
31 |         item_name = item.find(class_='title').string
32 |         item_img = item.find('a').find('img').get('src')
33 |         item_index = item.find(class_='').string
34 |         item_score = item.find(class_='rating_num').string
35 |         item_author = item.find('p').text
36 |         if(item.find(class_='inq')!=None):
37 |             item_intr = item.find(class_='inq').string
38 |  
39 |         # print('爬取电影：' + item_index + ' | ' + item_name +' | ' + item_img +' | ' + item_score +' | ' + item_author +' | ' + item_intr )
40 |         print('爬取电影：' + item_index + ' | ' + item_name  +' | ' + item_score  +' | ' + item_intr )
41 |  
42 |         global n
43 |  
44 |         sheet.write(n, 0, item_name)
45 |         sheet.write(n, 1, item_img)
46 |         sheet.write(n, 2, item_index)
47 |         sheet.write(n, 3, item_score)
48 |         sheet.write(n, 4, item_author)
49 |         sheet.write(n, 5, item_intr)
50 |  
51 |         n = n + 1
52 |  
53 |  
54 | def main(page):
55 |     url = 'https://movie.douban.com/top250?start='+ str(page*25)+'&filter='
56 |     html = request_douban(url)
57 |     soup = BeautifulSoup(html, 'lxml')
58 |     save_to_excel(soup)
59 |  
60 |  
61 | if __name__ == '__main__':
62 |  
63 |     for i in range(0, 10):
64 |         main(i)
65 |  
66 |     book.save('豆瓣最受欢迎的250部电影.xlsx')


--------------------------------------------------------------------------------
/Python+Crawler/README.md:
--------------------------------------------------------------------------------
1 | # Reptile
2 | 
3 | 
4 | 利用关键词爬谷歌的图片
5 | 
6 | 修改keyword即可


--------------------------------------------------------------------------------
/Python+Crawler/Web/README.md:
--------------------------------------------------------------------------------
 1 | # Add_webpage_view
 2 | 
 3 | 这是一个用来刷网页访问量的程序，自娱自乐使用，切勿用来违法犯罪。
 4 | 
 5 | 
 6 | 
 7 | ## 文件说明
 8 | 
 9 | - `view_ua.py`：刷访问量核心程序
10 | - `ip_get`：从网上获取免费ip
11 | - `ip_test`：验证ip是否可用
12 | 


--------------------------------------------------------------------------------
/Python+Crawler/Web/fake_uragent.py:
--------------------------------------------------------------------------------
1 | from fake_useragent import UserAgent
2 | ua = UserAgent()
3 | print(ua.chrome)


--------------------------------------------------------------------------------
/Python+Crawler/Web/html+save.py:
--------------------------------------------------------------------------------
 1 | # -*-coding:utf8-*-
 2 | 
 3 | import requests
 4 | 
 5 | req = requests.get("http://news.sina.com.cn/")
 6 | 
 7 | if req.encoding == 'ISO-8859-1':
 8 |     encodings = requests.utils.get_encodings_from_content(req.text)
 9 |     if encodings:
10 |         encoding = encodings[0]
11 |     else:
12 |         encoding = req.apparent_encoding
13 | 
14 |     # encode_content = req.content.decode(encoding, 'replace').encode('utf-8', 'replace')
15 |     global encode_content
16 |     encode_content = req.content.decode(encoding, 'replace') #如果设置为replace，则会用?取代非法字符；
17 | 
18 | 
19 | print(encode_content)
20 | 
21 | with open('test.html','w',encoding='utf-8') as f:
22 |     f.write(encode_content)


--------------------------------------------------------------------------------
/Python+Crawler/Web/ip_get.py:
--------------------------------------------------------------------------------
 1 | import urllib.request
 2 | import re
 3 | import time
 4 | f = open('ip.txt', 'a')
 5 | for i in range(100):
 6 |     url_ip = 'http://webapi.http.zhimacangku.com/getip?num=20&type=1&pro=&city=0&yys=0&port=1&pack=35845&ts=0&ys=0&cs=0&lb=4&sb=0&pb=4&mr=2&regions='
 7 |     response = urllib.request.urlopen(url_ip)
 8 |     ip=response.read()
 9 |     ip=ip.decode("utf-8")
10 |     print(ip)
11 |     # ip = re.split('\n', ip)
12 |     # ip = '\n'.join(ip)
13 |     # # ip = '\n' + ip  
14 |     f.write(ip)
15 |     time.sleep(2)
16 | f.close()
17 | 


--------------------------------------------------------------------------------
/Python+Crawler/Web/ip_test.py:
--------------------------------------------------------------------------------
 1 | import telnetlib
 2 |  
 3 | print('------------------------connect---------------------------')
 4 | # 连接Telnet服务器
 5 | try:
 6 |     tn = telnetlib.Telnet('111.20.226.13',port='9808',timeout=10)
 7 | except:
 8 |     print('该代理IP  无效')
 9 | else:
10 |     print('该代理IP  有效')
11 |  
12 | print('-------------------------end----------------------------')


--------------------------------------------------------------------------------
/Python+Crawler/Web/key_ua+ip.py:
--------------------------------------------------------------------------------
 1 | import requests
 2 | import random
 3 | import time
 4 | 
 5 | url = 'http://www.baidu.com'
 6 | 
 7 | with open('ip.txt') as f:
 8 |     lines = (line.strip() for line in f)
 9 |     ip = list(lines)
10 | 
11 | with open('user_agent.txt') as f:
12 |     lines = (line.strip() for line in f)
13 |     user_agent = list(lines)
14 |     
15 | count = 0
16 | count1 = 0
17 | 
18 | while True: 
19 |     # 访问次数设置
20 |     if count < 1000:
21 | 
22 |         headers = {'User-Agent': random.choice(user_agent)}  # 随机选择浏览器标识
23 | 
24 |         payload = {'key': '如吉生物'}  # 检索关键词
25 | 
26 |         proxies = {'http': 'http://' + random.choice(ip)}  # 代理ip
27 | 
28 |         try:  # 正常运行
29 |             response = requests.get(url, params=payload, headers=headers, proxies=proxies, timeout=5)
30 |             if response.status_code == 200:
31 |                 count = count + 1
32 |                 print(count, 'times')
33 |             # time.sleep(60)
34 | 
35 |         except Exception:  # 异常
36 |             print('Retry')
37 |             # time.sleep(60)
38 |     else:
39 |         break


--------------------------------------------------------------------------------
/Python+Crawler/Web/test.py:
--------------------------------------------------------------------------------
 1 | # 测试网页爬虫的效果
 2 | 
 3 | import requests
 4 | import random
 5 | import time
 6 | 
 7 | with open('ip1.txt') as f:
 8 |     lines = (line.strip() for line in f)
 9 |     ip = list(lines)
10 | 
11 | url = 'http://httpbin.org/get'
12 | 
13 | headers = {'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.103 Safari/537.36'}   
14 | 
15 | proxies = {'http':'http://117.191.11.105:8080'}
16 | 
17 | response = requests.get(url, headers=headers, proxies=proxies, timeout=5)
18 | 
19 | if response.status_code == 200:
20 |     with open('html.txt', 'w', encoding='utf-8') as f:
21 |         f.write(response.text)


--------------------------------------------------------------------------------
/Python+Crawler/Web/urlib.py:
--------------------------------------------------------------------------------
 1 | from urllib import request
 2 | 
 3 | if __name__ == "__main__":
 4 |     #访问网址
 5 |     url = 'http://www.whatismyip.com.tw/'
 6 |     #这是代理IP
 7 |     proxy = {'http':'106.46.136.112:808'}
 8 |     #创建ProxyHandler
 9 |     proxy_support = request.ProxyHandler(proxy)
10 |     #创建Opener
11 |     opener = request.build_opener(proxy_support)
12 |     #添加User Angent
13 |     opener.addheaders = [('User-Agent','Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36')]
14 |     #安装OPener
15 |     request.install_opener(opener)
16 |     #使用自己安装好的Opener
17 |     response = request.urlopen(url)
18 |     #读取相应信息并解码
19 |     html = response.read().decode("utf-8")
20 |     #打印信息
21 |     print(html)
22 | 


--------------------------------------------------------------------------------
/Python+Crawler/Web/view_ua+ip.py:
--------------------------------------------------------------------------------
 1 | import requests
 2 | import random
 3 | import time
 4 | 
 5 | url = ['https://blog.csdn.net/yzy_1996/article/details/85318705',
 6 |        'https://blog.csdn.net/yzy_1996/article/details/86992770',
 7 |        'https://blog.csdn.net/yzy_1996/article/details/87917772',
 8 |        'https://blog.csdn.net/yzy_1996/article/details/85318179',
 9 |        'https://blog.csdn.net/yzy_1996/article/details/88383365',
10 |        'https://blog.csdn.net/yzy_1996/article/details/88896320',
11 |        'https://crazyang.blog.csdn.net/article/details/88960229',
12 |        'https://crazyang.blog.csdn.net/article/details/89139203',
13 |        'https://blog.csdn.net/yzy_1996/article/details/89351413',
14 |        'https://blog.csdn.net/yzy_1996/article/details/89308517',
15 |        'https://blog.csdn.net/yzy_1996/article/details/89139203',
16 |        'https://crazyang.blog.csdn.net/article/details/89321214',
17 |        'https://blog.csdn.net/yzy_1996/article/details/89452063',
18 |        'https://crazyang.blog.csdn.net/article/details/89556049',
19 |        'https://crazyang.blog.csdn.net/article/details/89527834',
20 |        'https://crazyang.blog.csdn.net/article/details/89452063',
21 |        'https://crazyang.blog.csdn.net/article/details/89164827',
22 |        'https://blog.csdn.net/yzy_1996/article/details/89519702',
23 |        'https://crazyang.blog.csdn.net/article/details/83756357'
24 |        ]
25 | 
26 | with open('ip1.txt') as f:
27 |     lines = (line.strip() for line in f)
28 |     ip = list(lines)
29 | 
30 | with open('user_agent.txt') as f:
31 |     lines = (line.strip() for line in f)
32 |     user_agent = list(lines)
33 |     
34 | count = 0
35 | countUrl = len(url)
36 | 
37 | # 访问次数设置
38 | for i in range(1000):
39 | 
40 |     headers = {'User-Agent': random.choice(user_agent)}  # 随机选择浏览器标识
41 | 
42 |     proxies = {'http': 'http://' + random.choice(ip)}  # 代理ip
43 |     
44 |     print(proxies)
45 |     try:  # 正常运行
46 |         for i in range(countUrl):
47 |             response = requests.get(url[i], headers=headers, proxies=proxies)
48 |             if response.status_code == 200:
49 |                 count = count + 1
50 |                 print('Success', count, 'times')
51 |                 time.sleep(2)
52 |         time.sleep(70)
53 | 
54 |     except Exception:  # 异常
55 |         print('Retry')
56 |         time.sleep(60)


--------------------------------------------------------------------------------
/Python+Crawler/Web/view_ua.py:
--------------------------------------------------------------------------------
 1 | # 该程序实现刷CSDN网页访问量，当访问被拒绝或者遇到其他异常时会自动重启，无限刷
 2 | # 经过测试发现大概间隔70秒访问一下，访问量才会增加1
 3 | # 只需要修改或者添加url的链接就可以了
 4 |  
 5 | import requests
 6 | import time
 7 | 
 8 | url = ['https://blog.csdn.net/yzy_1996/article/details/101563858',
 9 |        'https://blog.csdn.net/yzy_1996/article/details/102665398']
10 | 
11 | headers = {'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.70 Safari/537.36'}   
12 |  
13 | count = 0
14 | countUrl = len(url) 
15 | 
16 | # 访问次数设置
17 | while count < 10000:
18 |     try:  # 正常运行
19 |         for i in range(countUrl):
20 |             response = requests.get(url[i], headers=headers)
21 |             if response.status_code == 200:
22 |                 count = count + 1
23 |                 print('Success ' + str(count), 'times')
24 |         time.sleep(100)
25 | 
26 |     except Exception:  # 异常
27 |         print('Failed and Retry')
28 |         time.sleep(1)


--------------------------------------------------------------------------------
/Python+Crawler/Web/webpage_viewer.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | import urllib.request
 4 | import re
 5 | 
 6 | url = 'https://blog.csdn.net/yzy_1996/article/details/82916940'
 7 | 
 8 | headers = {'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.110 Safari/537.36'}
 9 | 
10 | req = urllib.request.Request(url,headers=headers)
11 | response = urllib.request.urlopen(req)
12 | page = response.read()
13 | print(page.decode('utf-8'))
14 | #利用正则表达式获取博客的访问量
15 | view = re.findall('<p class="link_view".*?><a href=".*?" title="阅读次数">阅读</a>\((.*?)\)</p>', page, re.S)
16 | #将结果输出 
17 | print('访问量:%s' % (view.zfill(4)))
18 | 
19 | 
20 | # <p class="read">阅读数 <span>22828</span></p>


--------------------------------------------------------------------------------
/Python+Crawler/Web/刷网页.py:
--------------------------------------------------------------------------------
 1 | #coding: utf-8
 2 | 
 3 | from urllib import request
 4 | from urllib import parse
 5 | import urllib.error
 6 | import time
 7 | from http import cookiejar
 8 | import threading
 9 | import linecache
10 | 
11 | #cj = http.cookiejar.CookieJar()
12 | #opener = request.build_opener(request.HTTPCookieProcessor(cj), request.HTTPHandler)
13 | #request.install_opener(opener)
14 | 
15 | THREAD_NUMBER = 2
16 | IP_NUMBER = 20
17 | 
18 | url = ['http://yun.zjer.cn/index.php?r=space/person/show&sid=NID555912']
19 | 
20 | head = {
21 |     'User-Agent':
22 |     'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.110 Safari/537.36'
23 | }
24 | global count
25 | count = 0
26 | global count1
27 | count1 = 0
28 | 
29 | lock = threading.Lock()
30 | 
31 | 
32 | def brash(proxy_dict):
33 |     #print(proxy_dict)
34 |     global count
35 |     global count1
36 |     if count1 < 100:
37 |         try:  #正常运行
38 |             count = count + 1
39 |             print(count, 'times')  #监视程序是否在正常运行，输出运行了多少次
40 |             proxy_handler = request.ProxyHandler({'http': proxy_dict})
41 |             opener = request.build_opener(proxy_handler)
42 |             request.install_opener(opener)
43 |             countUrl = len(url)
44 |             for i in range(countUrl):  #遍历所有url
45 |                 req = request.Request(url[i], headers=head, method='POST')
46 |                 try:
47 |                     #lock.acquire()
48 |                     response = request.urlopen(req)  #访问网页
49 |                     html = response.read().decode('utf-8')
50 |                     print(html)
51 |                     #lock.release()
52 |                 except urllib.error.URLError as e:
53 |                     print(e.reason)
54 |                     print("EEEEEE")
55 |             #time.sleep(1)  #间隔执行
56 | 
57 |         except Exception:  #出现异常
58 |             print('Retry')
59 |             count1 = count1 + 1
60 |             time.sleep(1)  #间隔执行
61 |     else:
62 |         print('much error')
63 | 
64 | 
65 | def ReadSpecialLine(ipfilename, linenumber):
66 |     proxy_dict = linecache.getline(ipfilename, linenumber).strip('\n')
67 |     lock.acquire()
68 |     #print(proxy_dict)
69 |     print(linenumber)
70 |     lock.release()
71 |     return proxy_dict
72 | 
73 | 
74 | #while True:  #让程序一直执行
75 | def For_EveryThread(Thread_i):
76 |     while True:
77 |         for i in range(int(IP_NUMBER / THREAD_NUMBER)):
78 |             linenumber = THREAD_NUMBER * i + Thread_i
79 |             proxy_dict = ReadSpecialLine('ip.txt', linenumber)
80 |             brash(proxy_dict)
81 |     return
82 | 
83 | 
84 | if __name__ == '__main__':
85 |     count = 0
86 |     count1 = 0
87 |     thread_list = []
88 |     start = time.clock()
89 |     for Thread_i in range(THREAD_NUMBER):
90 |         temp = threading.Thread(target=For_EveryThread, args=(Thread_i + 1, ))
91 |         thread_list.append(temp)
92 |     for t in thread_list:
93 |         t.start()  # 开启线程
94 |     for t in thread_list:
95 |         t.join()  # 所有子线程都结束了主线程才关闭
96 |     end = time.clock()
97 |     print('用时：', str(end - start))
98 | 


--------------------------------------------------------------------------------
/Python+Crawler/caixukun.py:
--------------------------------------------------------------------------------
  1 | from selenium import webdriver
  2 | from selenium.common.exceptions import TimeoutException
  3 | from selenium.webdriver.common.by import By
  4 | from selenium.webdriver.support.ui import WebDriverWait
  5 | from selenium.webdriver.support import expected_conditions as EC
  6 | from bs4 import BeautifulSoup
  7 | import xlwt
  8 |  
  9 |  
 10 | browser = webdriver.PhantomJS()
 11 | WAIT = WebDriverWait(browser, 10)
 12 | browser.set_window_size(1400,900)
 13 |  
 14 |  
 15 | book=xlwt.Workbook(encoding='utf-8',style_compression=0)
 16 |  
 17 | sheet=book.add_sheet('蔡徐坤篮球',cell_overwrite_ok=True)
 18 | sheet.write(0,0,'名称')
 19 | sheet.write(0,1,'地址')
 20 | sheet.write(0,2,'描述')
 21 | sheet.write(0,3,'观看次数')
 22 | sheet.write(0,4,'弹幕数')
 23 | sheet.write(0,5,'发布时间')
 24 |  
 25 | n=1
 26 |  
 27 | def search():
 28 |  
 29 |     try:
 30 |         print('开始访问b站....')
 31 |         browser.get("https://www.bilibili.com/")
 32 |  
 33 |         # 被那个破登录遮住了
 34 |         index = WAIT.until(EC.element_to_be_clickable((By.CSS_SELECTOR, "#primary_menu > ul > li.home > a")))
 35 |         index.click()
 36 |  
 37 |         input = WAIT.until(EC.presence_of_element_located((By.CSS_SELECTOR, "#banner_link > div > div > form > input")))
 38 |         submit = WAIT.until(EC.element_to_be_clickable((By.XPATH, '//*[@id="banner_link"]/div/div/form/button')))
 39 |  
 40 |         input.send_keys('蔡徐坤 篮球')
 41 |         submit.click()
 42 |  
 43 |         # 跳转到新的窗口
 44 |         print('跳转到新窗口')
 45 |         all_h = browser.window_handles
 46 |         browser.switch_to.window(all_h[1])
 47 |  
 48 |         get_source()
 49 |         total = WAIT.until(EC.presence_of_element_located((By.CSS_SELECTOR, "#server-search-app > div.contain > div.body-contain > div > div.page-wrap > div > ul > li.page-item.last > button")))
 50 |         return int(total.text)
 51 |     except TimeoutException:
 52 |         return search()
 53 |  
 54 |  
 55 | def next_page(page_num):
 56 |     try:
 57 |         print('获取下一页数据')
 58 |         next_btn = WAIT.until(EC.element_to_be_clickable((By.CSS_SELECTOR, '#server-search-app > div.contain > div.body-contain > div > div.page-wrap > div > ul > li.page-item.next > button')))
 59 |         next_btn.click()
 60 |         WAIT.until(EC.text_to_be_present_in_element((By.CSS_SELECTOR, '#server-search-app > div.contain > div.body-contain > div > div.page-wrap > div > ul > li.page-item.active > button'),str(page_num)))
 61 |         get_source()
 62 |     except TimeoutException:
 63 |         browser.refresh()
 64 |         return next_page(page_num)
 65 |  
 66 |  
 67 | def save_to_excel(soup):
 68 |     list = soup.find(class_='all-contain').find_all(class_='info')
 69 |  
 70 |     for item in list:
 71 |         item_title = item.find('a').get('title')
 72 |         item_link = item.find('a').get('href')
 73 |         item_dec = item.find(class_='des hide').text
 74 |         item_view = item.find(class_='so-icon watch-num').text
 75 |         item_biubiu = item.find(class_='so-icon hide').text
 76 |         item_date = item.find(class_='so-icon time').text
 77 |  
 78 |         print('爬取：' + item_title)
 79 |  
 80 |         global n
 81 |  
 82 |         sheet.write(n, 0, item_title)
 83 |         sheet.write(n, 1, item_link)
 84 |         sheet.write(n, 2, item_dec)
 85 |         sheet.write(n, 3, item_view)
 86 |         sheet.write(n, 4, item_biubiu)
 87 |         sheet.write(n, 5, item_date)
 88 |  
 89 |         n = n + 1
 90 |  
 91 |  
 92 | def get_source():
 93 |     WAIT.until(EC.presence_of_element_located((By.CSS_SELECTOR,'#server-search-app > div.contain > div.body-contain > div > div.result-wrap.clearfix')))
 94 |     html = browser.page_source
 95 |     soup = BeautifulSoup(html,'lxml')
 96 |     save_to_excel(soup)
 97 |  
 98 | def main():
 99 |  
100 |     try:
101 |         total = search()
102 |         print(total)
103 |  
104 |         for i in range(2,int(total+1)):
105 |             next_page(i)
106 |  
107 |     finally:
108 |         browser.close()
109 |  
110 |  
111 | if __name__ == '__main__':
112 |     main()
113 |     book.save(u'蔡徐坤篮球.xlsx')


--------------------------------------------------------------------------------
/Python+Crawler/crawler1.py:
--------------------------------------------------------------------------------
 1 | # 1、爬虫起步
 2 | 
 3 | import urllib.request
 4 | import re 
 5 | 
 6 | response = urllib.request.urlopen('http://www.baidu.com')
 7 | 
 8 | content = response.read().decode('utf-8')
 9 | 
10 | print(content)
11 | 
12 | 


--------------------------------------------------------------------------------
/Python+Crawler/crawler2.py:
--------------------------------------------------------------------------------
 1 | # 2、爬虫进阶
 2 | 
 3 | from urllib import request,parse
 4 | import ssl
 5 | context = ssl._create_unverified_context()
 6 | url = 'https://biihu.cc//account/ajax/login_process/'
 7 | headers = {'User-Agent':' Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36'}
 8 | dict = {
 9 |     'return_url':'https://biihu.cc/',
10 |     'user_name':'xiaoshuaib@gmail.com',
11 |     'password':'123456789',
12 |     '_post_type':'ajax',
13 | }
14 | data = bytes(parse.urlencode(dict),'utf-8')
15 | 
16 | req = request.Request(url, data=data, headers=headers, method='POST')
17 | response = request.urlopen(req, context=context)
18 | print(response.read().decode('utf-8'))


--------------------------------------------------------------------------------
/Python+Crawler/crawler3.py:
--------------------------------------------------------------------------------
 1 | # 使用requests库
 2 | 
 3 | import requests
 4 | 
 5 | url = 'http://www.baidu.com'
 6 | 
 7 | headers = {'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.110 Safari/537.36'}
 8 | 
 9 | response = requests.get(url, headers=headers)
10 | 
11 | content = response.content.decode('utf-8')
12 | 
13 | print(content)
14 | 
15 | 


--------------------------------------------------------------------------------
/Python+Crawler/debug.log:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yzy1996/Python-Code/2abcaa6fbfa4a84aaffdf10d7bcc6b12649dd221/Python+Crawler/debug.log


--------------------------------------------------------------------------------
/Python+Crawler/form_test.py:
--------------------------------------------------------------------------------
 1 | import random
 2 | 
 3 | with open('ip.txt') as f:
 4 |     lines = (line.strip() for line in f)
 5 |     ip = list(lines)
 6 | 
 7 | with open('user_agent.txt') as f:
 8 |     lines = (line.strip() for line in f)
 9 |     user_agent = list(lines)
10 | 
11 | # user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.131 Safari/537.36
12 | # proxy-server=http://117.191.11.105:8080
13 | headers = 'User-Agent=' + random.choice(user_agent)  # 随机选择浏览器标识
14 | proxies = 'proxy-server=http://' + random.choice(ip)  # 代理ip
15 | print(headers)
16 | print(proxies)


--------------------------------------------------------------------------------
/Python+Crawler/ip.txt:
--------------------------------------------------------------------------------
 1 | 120.234.138.102:53779
 2 | 120.198.230.15:8080
 3 | 117.135.77.30:8060
 4 | 111.26.9.26:80
 5 | 120.234.138.99:53779
 6 | 117.191.11.77:8080
 7 | 117.191.11.102:8080
 8 | 39.137.168.230:80
 9 | 117.191.11.103:80
10 | 117.191.11.104:8080
11 | 117.191.11.113:80
12 | 117.191.11.73:8080
13 | 117.131.119.98:80
14 | 117.191.11.75:80
15 | 117.191.11.108:8080
16 | 211.136.127.125:80
17 | 223.82.247.122:80
18 | 223.82.247.121:80
19 | 183.245.98.6:8118
20 | 117.158.189.238:9999
21 | 223.114.75.157:9999
22 | 120.213.176.255:9999
23 | 39.150.84.98:9999
24 | 39.163.49.244:9999
25 | 117.163.247.119:9999
26 | 120.214.30.168:9999
27 | 120.219.249.86:9999
28 | 120.215.204.168:9999
29 | 183.216.175.237:9999
30 | 39.164.222.116:9999
31 | 39.163.47.161:9999
32 | 223.96.95.229:3128
33 | 223.85.196.75:9797
34 | 39.137.77.66:8080
35 | 39.137.77.67:8080
36 | 223.93.145.186:8060
37 | 39.137.77.68:8080
38 | 39.137.77.68:80
39 | 39.137.77.66:80
40 | 39.137.77.67:80
41 | 120.234.138.99:53779
42 | 117.191.11.77:8080
43 | 117.191.11.102:8080
44 | 39.137.168.230:80
45 | 117.191.11.103:80
46 | 117.191.11.104:8080
47 | 117.191.11.113:80
48 | 117.191.11.73:8080
49 | 117.131.119.98:80
50 | 117.191.11.75:80
51 | 117.191.11.108:8080
52 | 211.136.127.125:80
53 | 223.82.247.122:80
54 | 223.82.247.121:80
55 | 183.245.98.6:8118
56 | 117.158.189.238:9999
57 | 112.35.56.134:80
58 | 183.230.157.236:8088
59 | 223.68.190.130:8181
60 | 112.12.37.196:53281
61 | 117.131.75.134:80
62 | 39.137.107.98:8080
63 | 183.230.179.157:8060
64 | 183.230.179.164:8060
65 | 223.85.196.75:9999
66 | 183.215.206.39:53281
67 | 117.191.11.106:8080
68 | 117.191.11.74:8080
69 | 117.191.11.111:8080
70 | 117.191.11.105:8080
71 | 117.191.11.71:80
72 | 117.186.214.74:9999
73 | 117.191.11.107:8080
74 | 117.191.11.101:8080
75 | 117.191.11.80:8080
76 | 117.191.11.106:80
77 | 117.191.11.105:80
78 | 117.191.11.71:8080
79 | 117.191.11.107:80
80 | 117.191.11.74:80


--------------------------------------------------------------------------------
/Python+Crawler/ip1.txt:
--------------------------------------------------------------------------------
  1 | 5.202.74.51:80
  2 | 5.202.101.141:80
  3 | 93.126.32.92:80
  4 | 5.202.148.98:80
  5 | 109.122.242.198:80
  6 | 5.202.47.43:80
  7 | 5.202.44.228:80
  8 | 5.202.148.185:80
  9 | 5.202.159.47:80
 10 | 5.202.101.226:80
 11 | 47.107.163.15:3128
 12 | 110.52.235.30:9999
 13 | 109.60.140.89:59901
 14 | 110.52.235.204:9999
 15 | 5.202.77.91:80
 16 | 5.202.101.146:80
 17 | 93.126.37.59:80
 18 | 5.202.147.38:80
 19 | 5.202.68.221:80
 20 | 5.202.101.175:80
 21 | 5.202.151.43:80
 22 | 5.202.220.64:80
 23 | 5.202.77.112:80
 24 | 5.202.78.123:80
 25 | 5.202.151.102:80
 26 | 5.202.44.165:80
 27 | 5.202.127.46:80
 28 | 5.202.74.106:80
 29 | 5.202.94.6:80
 30 | 31.184.143.206:80
 31 | 5.202.157.166:80
 32 | 5.202.151.92:80
 33 | 91.237.254.19:80
 34 | 5.202.69.51:80
 35 | 5.202.159.79:80
 36 | 5.202.94.192:80
 37 | 5.202.151.59:80
 38 | 93.126.59.74:80
 39 | 5.202.77.201:80
 40 | 5.202.157.245:80
 41 | 5.202.157.203:80
 42 | 5.202.93.60:80
 43 | 5.202.148.105:80
 44 | 5.202.148.124:80
 45 | 5.202.151.69:80
 46 | 5.202.101.208:80
 47 | 5.202.149.171:80
 48 | 5.202.47.68:80
 49 | 5.202.101.222:80
 50 | 5.202.44.74:80
 51 | 50.112.52.37:80
 52 | 95.38.64.3:8080
 53 | 36.67.8.27:53281
 54 | 85.192.166.189:8080
 55 | 137.74.254.242:3128
 56 | 218.214.29.92:8080
 57 | 94.23.159.76:9999
 58 | 218.214.29.92:80
 59 | 35.204.39.83:3128
 60 | 185.238.239.40:8090
 61 | 43.229.85.226:8080
 62 | 195.122.185.95:3128
 63 | 94.74.191.251:8088
 64 | 94.74.154.190:80
 65 | 187.28.39.155:8080
 66 | 41.242.166.233:53112
 67 | 185.212.127.32:41452
 68 | 89.22.255.82:36693
 69 | 178.134.71.138:47621
 70 | 88.148.183.139:3128
 71 | 187.28.39.156:8080
 72 | 125.162.136.91:80
 73 | 190.14.252.107:8080
 74 | 212.233.114.46:3128
 75 | 170.79.88.116:8080
 76 | 52.14.34.225:80
 77 | 103.224.101.155:46759
 78 | 202.153.231.147:3128
 79 | 202.153.231.147:80
 80 | 185.15.108.152:8080
 81 | 185.186.81.50:8080
 82 | 181.188.187.141:46664
 83 | 210.11.189.43:58993
 84 | 187.87.76.251:3128
 85 | 104.236.248.219:3128
 86 | 61.8.66.178:80
 87 | 202.146.2.131:47217
 88 | 180.250.219.58:53281
 89 | 182.253.60.114:53586
 90 | 202.146.0.219:47217
 91 | 88.255.101.247:8080
 92 | 187.102.48.202:8080
 93 | 187.102.48.193:8080
 94 | 111.198.154.116:8888
 95 | 180.180.123.68:8080
 96 | 123.200.20.6:8080
 97 | 187.45.54.81:8080
 98 | 177.75.4.34:80
 99 | 203.189.159.181:8080
100 | 202.169.238.82:8080
101 | 175.30.124.96:80
102 | 117.163.247.119:9999
103 | 35.185.22.15:80
104 | 112.245.171.227:9999
105 | 120.214.30.168:9999
106 | 177.54.144.160:3128
107 | 1.70.44.237:9999
108 | 223.245.168.45:9999
109 | 112.233.19.219:9999
110 | 115.229.131.80:8998
111 | 195.98.191.102:8081
112 | 119.184.142.202:9999
113 | 124.88.67.34:83
114 | 88.147.189.62:8081
115 | 42.230.149.23:9999
116 | 203.90.144.145:82
117 | 110.157.171.119:9000
118 | 112.233.249.106:9999
119 | 183.153.115.19:9999
120 | 115.46.199.2:9999
121 | 139.59.104.254:80
122 | 46.44.60.71:8081
123 | 5.2.75.15:1080
124 | 61.234.76.28:9999
125 | 120.219.249.86:9999
126 | 104.196.114.98:80
127 | 124.88.67.18:80
128 | 120.215.204.168:9999
129 | 116.116.112.139:9999
130 | 27.206.37.23:9999
131 | 92.208.138.179:80
132 | 115.216.168.55:9999
133 | 154.46.204.36:80
134 | 110.155.142.174:9999
135 | 183.216.175.237:9999
136 | 39.164.222.116:9999
137 | 60.162.51.77:8888
138 | 60.208.182.59:9999
139 | 115.55.152.132:9999
140 | 39.163.47.161:9999
141 | 185.148.218.246:8081
142 | 59.127.38.117:8080
143 | 212.22.86.114:3130
144 | 101.4.136.34:81
145 | 101.4.136.34:80
146 | 101.4.136.34:8080
147 | 178.169.64.76:8081
148 | 5.167.96.238:3128
149 | 47.94.230.42:9999
150 | 211.159.171.58:80
151 | 5.202.47.152:80
152 | 5.202.149.252:80
153 | 5.202.44.103:80
154 | 5.202.101.217:80
155 | 121.17.174.121:9797
156 | 5.202.148.113:80
157 | 5.202.220.21:80
158 | 5.202.151.58:80
159 | 5.202.151.77:80
160 | 5.202.109.18:80
161 | 5.202.68.82:80
162 | 5.202.76.226:80
163 | 103.111.56.69:50148
164 | 5.202.46.130:80
165 | 103.248.219.172:8080
166 | 206.81.11.75:80
167 | 5.202.44.170:80
168 | 140.227.64.94:3128
169 | 5.202.149.30:80
170 | 5.202.192.147:80
171 | 5.202.44.38:80
172 | 5.202.38.168:80
173 | 217.69.10.147:80
174 | 5.202.68.87:80
175 | 5.202.138.76:80
176 | 5.202.149.43:80
177 | 5.202.47.155:80
178 | 5.202.67.247:80
179 | 5.202.158.151:80
180 | 5.202.218.73:80
181 | 5.202.38.4:80
182 | 187.4.249.6:80
183 | 5.190.166.242:80
184 | 51.77.223.95:80
185 | 5.202.74.23:80
186 | 5.202.67.121:80
187 | 5.202.101.179:80
188 | 5.202.151.70:80
189 | 5.202.94.68:80
190 | 36.91.203.207:8080
191 | 189.204.158.161:8080
192 | 5.202.215.13:80
193 | 5.202.78.116:80
194 | 212.33.202.98:80
195 | 5.202.242.42:80
196 | 92.126.203.59:8080
197 | 111.177.183.168:9999
198 | 1.20.96.24:57132
199 | 93.171.156.209:51016
200 | 5.128.73.158:8081


--------------------------------------------------------------------------------
/Python+Crawler/sample1.py:
--------------------------------------------------------------------------------
 1 | # 爬虫1，抓取网页内容
 2 | 
 3 | '''
 4 | 第一个示例：简单的网页爬虫
 5 | 爬取豆瓣首页
 6 | '''
 7 |  
 8 | import urllib.request
 9 |  
10 | #网址
11 | url = "http://www.douban.com/"
12 |  
13 | #请求
14 | request = urllib.request.Request(url)
15 |  
16 | #爬取结果
17 | response = urllib.request.urlopen(request)
18 |  
19 | data = response.read()
20 |  
21 | #设置解码方式
22 | data = data.decode('utf-8')
23 |  
24 | #打印结果
25 | print(data)
26 |  
27 | #打印爬取网页的各类信息
28 |  
29 | print(type(response))
30 | print(response.geturl())
31 | print(response.info())
32 | print(response.getcode())
33 | 


--------------------------------------------------------------------------------
/Python+Crawler/selenium_first.py:
--------------------------------------------------------------------------------
 1 | from selenium import webdriver
 2 | import time
 3 | 
 4 | # http://httpbin.org/get
 5 | with open('ip.txt') as f:
 6 |     lines = (line.strip() for line in f)
 7 |     ip = list(lines)
 8 | 
 9 | with open('user_agent.txt') as f:
10 |     lines = (line.strip() for line in f)
11 |     user_agent = list(lines)
12 | 
13 | # 添加配置
14 | options = webdriver.ChromeOptions()
15 | options.add_argument("user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.131 Safari/537.36")
16 | options.add_argument("proxy-server=http://117.191.11.105:8080")
17 | 
18 | # 创建浏览器驱动
19 | driver = webdriver.Chrome(chrome_options=options)
20 | driver.get("http://httpbin.org/get")
21 | 
22 | # input = driver.find_element_by_css_selector('#kw')
23 | # input.send_keys("如吉生物")
24 | 
25 | # button = driver.find_element_by_css_selector('#su')
26 | # button.click()
27 | 
28 | print(driver.page_source)
29 | time.sleep(10)
30 | driver.quit()


--------------------------------------------------------------------------------
/Python+Crawler/spider-google.py:
--------------------------------------------------------------------------------
1 | # 爬谷歌的图片
2 | # keyword是关键词
3 | 
4 | from icrawler.builtin import GoogleImageCrawler
5 | 
6 | google_crawler = GoogleImageCrawler(storage={'root_dir': '111'})
7 | google_crawler.crawl(keyword='cat', max_num=10)


--------------------------------------------------------------------------------
/Python+Crawler/test.py:
--------------------------------------------------------------------------------
 1 | from selenium import webdriver
 2 | import time
 3 | import random
 4 | 
 5 | with open('ip.txt') as f:
 6 |     lines = (line.strip() for line in f)
 7 |     ip = list(lines)
 8 | 
 9 | with open('user_agent.txt') as f:
10 |     lines = (line.strip() for line in f)
11 |     user_agent = list(lines)
12 | 
13 | # http://httpbin.org/get
14 | count = 0
15 | for i in range(len(ip)):
16 |     try:
17 |         # 添加配置
18 |         options = webdriver.ChromeOptions()
19 |         options.add_argument('User-Agent=' + random.choice(user_agent))
20 |         options.add_argument('proxy-server=http://' + ip[i])
21 | 
22 |         # 创建浏览器驱动
23 |         driver = webdriver.Chrome(chrome_options=options)
24 |         driver.get("http://www.baidu.com")
25 | 
26 |         input = driver.find_element_by_css_selector('#kw')
27 |         input.send_keys("如吉生物")
28 | 
29 |         button = driver.find_element_by_css_selector('#su')
30 |         button.click()
31 | 
32 |         # print(driver.page_source)
33 |         time.sleep(10)
34 |         print(str(count) + 'times')
35 |         count = count + 1
36 |         driver.quit()
37 | 
38 |     except Exception:  # 其他异常
39 |         driver.quit()
40 |         print('Retry')
41 | 
42 | 
43 |         


--------------------------------------------------------------------------------
/Python+Crawler/豆瓣最受欢迎的250部电影.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yzy1996/Python-Code/2abcaa6fbfa4a84aaffdf10d7bcc6b12649dd221/Python+Crawler/豆瓣最受欢迎的250部电影.xlsx


--------------------------------------------------------------------------------
/Python+HTML/README.md:
--------------------------------------------------------------------------------
1 | # Python+HTML
2 | 
3 | 使用方法参见：[CSDN博客](https://blog.csdn.net/yzy_1996/article/details/80223053)
4 | 


--------------------------------------------------------------------------------
/Python+HTML/test1.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>  
 2 | <html>  
 3 |   <head>  
 4 |     <meta charset="utf-8">  
 5 | 	<script language="javascript">     
 6 | 	function exec1(command) 
 7 | 	{     
 8 | 	  var ws = new ActiveXObject("WScript.Shell");      
 9 | 	  ws.run(command);
10 | 	}     
11 | 	</script>   
12 |   </head>  
13 |   
14 | <body>
15 | 	<div id="header">
16 | 	<h1>打开python命令行</h1>
17 | 	</div>
18 |  
19 | 	<div id="nav1">
20 | 		打开python命令行（方式1）
21 |         <button οnclick="exec1('python')">运行 python</button>
22 | 	</div>
23 |     
24 | 	<div id="nav2">
25 | 		打开python命令行（方式2）
26 |         <input type="button" value="运行 python" οnclick="exec1('python')" />
27 | 	</div>
28 |  
29 | </body> 
30 | </html> 


--------------------------------------------------------------------------------
/Python+HTML/test2.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>  
 2 | <html>  
 3 |   <head>  
 4 |     <meta charset="utf-8">  
 5 | 	<script language="javascript">     
 6 | 	function exec1(command) 
 7 | 	{     
 8 | 	  var ws = new ActiveXObject("WScript.Shell");      
 9 | 	  ws.run(command);
10 | 	}     
11 | 	</script>   
12 |   </head>  
13 |   
14 | <body>
15 | 	<div id="header">
16 | 	<h1>运行python</h1>
17 | 	</div>
18 | 
19 | 	<div id="nav1">
20 | 		执行test2程序（方式1）
21 |         <button onclick="exec1('python test2.py')">运行 python</button>
22 | 	</div>
23 |     
24 | 	<div id="nav2">
25 | 		执行test2程序（方式2）
26 |         <input type="button" value="运行 python" onclick="exec1('python test2.py')" />
27 | 	</div>
28 | 
29 | </body> 
30 | </html> 


--------------------------------------------------------------------------------
/Python+HTML/test2.py:
--------------------------------------------------------------------------------
1 | file = open('new_file' + '.txt','w')
2 | file.close()


--------------------------------------------------------------------------------
/Python+HTML/test3.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | 
 3 | <head>
 4 |     <meta charset="UTF-8">
 5 |     <script language="javascript">
 6 |         function exec1(command) {
 7 |             var ws = new ActiveXObject("WScript.Shell");
 8 |             ws.run(command);
 9 |         }
10 |     </script>
11 | </head>
12 | 
13 | <body>
14 |     <div id="header">
15 |         <h1>读取python运行的结果</h1>
16 |     </div>
17 | 
18 |     <!-- 按键开始执行python程序 -->
19 | 	
20 | 	<div id="program1">
21 | 		执行test3程序（方式1）
22 |         <button onclick="exec1('python test3.py')">运行 python</button>
23 | 	</div>
24 |     
25 | 	<div id="program2">
26 | 		执行test3程序（方式2）
27 |         <input type="button" value="运行 python" onclick="exec1('python test3.py')" />
28 | 	</div>
29 | 
30 |     <!-- 读取python运行结果 -->
31 |     <!-- 链接的 target 属性必须引用 iframe 的 name 属性 -->
32 |     <div id="result">
33 |         <p><a href="number.txt" target="result">结果</a></p>
34 |         <iframe name="result" width="100" height="30" scrolling="no" frameborder="3"></iframe>
35 |     </div>
36 | 
37 | </body>
38 | 
39 | </html>


--------------------------------------------------------------------------------
/Python+HTML/test3.py:
--------------------------------------------------------------------------------
1 | # 示例代码：生成一个随机数并写入（每次重写）txt
2 | 
3 | import random
4 |  
5 | number = random.randint(1,10)
6 | 
7 | f = open('number.txt','w')
8 | f.write(str(number))
9 | f.close()


--------------------------------------------------------------------------------
/Python+HTML/test4.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html lang="en">
 3 | 
 4 | <head>
 5 |     <meta charset="UTF-8">
 6 |     <script language="javascript">
 7 |         function exec1(command) {
 8 |             var ws = new ActiveXObject("WScript.Shell");
 9 |             ws.run(command);
10 |         }
11 | 
12 |         function input() {
13 |             text = document.getElementById("data").value;  //获得网页文本框的值
14 |             var strFile = "input.txt";	//定义文件保存的路径
15 |             var objFSO = new ActiveXObject("Scripting.FileSystemObject");
16 |             var objStream = objFSO.CreateTextFile(strFile, true);
17 |             objStream.Write(text);  //写值
18 |             objStream.Close();
19 |         }
20 |     </script>
21 | </head>
22 | 
23 | <body>
24 |     <div id="header">
25 |         <h1>对输入值进行平方计算</h1>
26 |     </div>
27 | 
28 |     <!-- 在文本框内输入一个数字，点击输入完毕按钮，将在本地生成一个input.txt，内容为输入的数字 -->
29 |     <div id="input">
30 |         <input id="data" type="text" value="22" />
31 |         <p></p>
32 |         <input type="button" value="输入完毕" onclick="input()" />
33 |     </div>
34 |     <p></p>
35 | 	
36 |     <!-- 点击运行Python按钮，将本地执行test4.py程序 -->
37 | 	<div id="program1">
38 | 		执行test3程序（方式1）
39 |         <button onclick="exec1('python test4.py')">运行 python</button>
40 | 	</div>
41 |     
42 | 	<div id="program2">
43 | 		执行test3程序（方式2）
44 |         <input type="button" value="运行 python" onclick="exec1('python test4.py')" />
45 | 	</div>
46 | 
47 |     <!-- 点击结果, 将在框内显示出计算的结果-->
48 |     <div id="result">	
49 |         <p><a href="result.txt" target="result">显示结果</a></p>
50 |         <iframe name="result" width="100" height="30" scrolling="no" frameborder="3"></iframe>	
51 |     </div>
52 | 
53 |     <p></p>
54 |     <div id="result">
55 |         使用说明：文本框输入一个数字，然后依次点击 [输入完毕] [运行python] [显示结果]
56 |     </div>
57 | </body>
58 | 
59 | </html>


--------------------------------------------------------------------------------
/Python+HTML/test4.py:
--------------------------------------------------------------------------------
 1 | # 读取生成的input.txt内容
 2 | f1 = open('input.txt') # 读取的数据类型为str
 3 | number1 = int(f1.read())
 4 | 
 5 | # 执行你要执行的程序（例子为计算平方）
 6 | number2 = number1 * number1
 7 | 
 8 | # 把运行的结果写入result.txt中
 9 | f2 = open('result.txt', 'w')
10 | f2.write((str(number2)))
11 | 
12 | f1.close()
13 | f2.close()


--------------------------------------------------------------------------------
/Python+Media/发邮件/163.py:
--------------------------------------------------------------------------------
 1 | import smtplib
 2 | from email.mime.text import MIMEText
 3 | from email.mime.multipart import MIMEMultipart
 4 | from email.header import Header
 5 | 
 6 | 
 7 | smtp_server = 'smtp.163.com'
 8 | smtp_username = '*********'
 9 | smtp_password = '*********'
10 | 
11 | receivers = '*********'
12 | 
13 | # Set up the email message
14 | msg = MIMEMultipart()
15 | msg['From'] = smtp_username
16 | msg['To'] = '*********'
17 | msg["Subject"] = Header('邮件发送测试', 'utf-8')
18 | 
19 | 
20 | body = '邮件发送测试'
21 | msg.attach(MIMEText(body, 'plain', 'utf-8'))
22 | 
23 | try:
24 |     with smtplib.SMTP_SSL(smtp_server) as server:
25 |         server.login(smtp_username, smtp_password)
26 |         server.sendmail(smtp_username, receivers, msg.as_string())
27 |         print("发送成功")
28 | 
29 | except smtplib.SMTPException as e:
30 |     print("无法发送邮", e)


--------------------------------------------------------------------------------
/Python+Media/发邮件/README.md:
--------------------------------------------------------------------------------
 1 | smtplib 是 SMTP protocol client
 2 | 
 3 | **smtplib模块主要负责发送邮件的动作，email模块主要负责构造邮件**
 4 | 
 5 | 
 6 | 
 7 | 
 8 | ## 提前准备
 9 | 
10 | 在邮箱中开启SMTP服务
11 | 
12 | 
13 | smtplib.SMTP(smtp_server) as server:
14 | server.starttls()
15 | 


--------------------------------------------------------------------------------
/Python+Media/发邮件/content.txt:
--------------------------------------------------------------------------------
1 | 测试邮件


--------------------------------------------------------------------------------
/Python+Media/发邮件/outlook.py:
--------------------------------------------------------------------------------
 1 | import smtplib
 2 | from email.mime.text import MIMEText
 3 | from email.mime.multipart import MIMEMultipart
 4 | 
 5 | 
 6 | from email.header import Header
 7 | 
 8 | 
 9 | # Set up the SMTP server
10 | smtp_server = 'smtp.office365.com'
11 | smtp_port = 587
12 | smtp_username = '*********'
13 | smtp_password = '*********'
14 | 
15 | receivers = '*********'
16 | 
17 | # Set up the email message
18 | msg = MIMEMultipart()
19 | msg['From'] = smtp_username
20 | msg['To'] = receivers
21 | msg["Subject"] = Header('Python自动发送的邮件','utf-8')
22 | 
23 | 
24 | with open('content.txt', 'r', encoding='utf-8') as f:
25 |     body = f.read()
26 | msg.attach(MIMEText(body, 'html', 'utf-8'))
27 | 
28 | # Send the email
29 | try:
30 |     with smtplib.SMTP(smtp_server, smtp_port) as server:
31 |         server.starttls()
32 |         server.login(smtp_username, smtp_password)
33 |         server.sendmail(smtp_username, receivers, msg.as_string())
34 |         print("发送成功") 
35 | 
36 | except smtplib.SMTPException as e:
37 |     print("无法发送邮", e)


--------------------------------------------------------------------------------
/Python+Media/文字转音频/demo.pcm:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yzy1996/Python-Code/2abcaa6fbfa4a84aaffdf10d7bcc6b12649dd221/Python+Media/文字转音频/demo.pcm


--------------------------------------------------------------------------------
/Python+Media/文字转音频/pcm2wav.py:
--------------------------------------------------------------------------------
1 | import os
2 | 
3 | 
4 | command = 'ffmpeg ' + '-y -f s16le -ar 16000 -i ' + 'demo.pcm' + ' aa' + '.wav'
5 | os.system(command)


--------------------------------------------------------------------------------
/Python+Media/文字转音频/test_webtts.py:
--------------------------------------------------------------------------------
 1 | #-*- coding: utf-8 -*-
 2 | #  语音合成 WebAPI 接口调用示例 接口文档（必看）：https://doc.xfyun.cn/msc_android/%E8%AF%AD%E9%9F%B3%E5%90%88%E6%88%90.html
 3 | #  webapi 合成服务参考帖子：http://bbs.xfyun.cn/forum.php?mod=viewthread&tid=38997&extra=
 4 | #  webapi是单次只支持1000个字节，具体看您的编码格式，计算一下具体支持多少文字
 5 | # （Very Important）创建完webapi应用添加合成服务之后一定要设置ip白名单，找到控制台--我的应用--设置ip白名单，如何设置参考：http://bbs.xfyun.cn/forum.php?mod=viewthread&tid=41891
 6 | #  合成发音人自动添加获取测试权限使用方法：登陆开放平台https://www.xfyun.cn/后--我的应用（必须为webapi类型应用）--添加在线语音合成（已添加的不用添加）--发音人管理---添加发音人--测试代码里需修改发音人参数
 7 | #  错误码链接：https://www.xfyun.cn/document/error-code （code返回错误码时必看）
 8 | #  @author iflytek
 9 | import requests
10 | import time
11 | import hashlib
12 | import base64
13 | #  合成webapi接口地址
14 | URL = "http://api.xfyun.cn/v1/service/v1/tts"
15 | #  音频编码(raw合成的音频格式pcm、wav,lame合成的音频格式MP3)
16 | AUE = "raw"
17 | #  应用APPID（必须为webapi类型应用，并开通语音合成服务，参考帖子如何创建一个webapi应用：http://bbs.xfyun.cn/forum.php?mod=viewthread&tid=36481
18 | APPID = "5d3abe07"
19 | #  接口密钥（webapi类型应用开通合成服务后，控制台--我的应用---语音合成---相应服务的apikey）
20 | API_KEY = "e4a5650869b3a64e1fdb8aaaf47bc056"
21 | 
22 | # 组装http请求头
23 | def getHeader():
24 |     curTime = str(int(time.time()))
25 |     # ttp=ssml
26 |     param = "{\"aue\":\"" + AUE + "\",\"auf\":\"audio/L16;rate=16000\",\"voice_name\":\"aisjinger\",\"speed\":\"30\",\"volume\":\"100\",\"engine_type\":\"intp65\"}"
27 |     print("param:{}".format(param))
28 | 
29 |     paramBase64 = str(base64.b64encode(param.encode('utf-8')), 'utf-8')
30 |     print("x_param:{}".format(paramBase64))
31 | 
32 |     m2 = hashlib.md5()
33 |     m2.update((API_KEY + curTime + paramBase64).encode('utf-8'))
34 | 
35 |     checkSum = m2.hexdigest()
36 |     print('checkSum:{}'.format(checkSum))
37 | 
38 |     header = {
39 |         'X-CurTime': curTime,
40 |         'X-Param': paramBase64,
41 |         'X-Appid': APPID,
42 |         'X-CheckSum': checkSum,
43 |         'X-Real-Ip': '127.0.0.1',
44 |         'Content-Type': 'application/x-www-form-urlencoded; charset=utf-8',
45 |     }
46 |     print(header)
47 |     return header
48 | 
49 | 
50 | def getBody(text):
51 |     data = {'text': text}
52 |     return data
53 | 
54 | 
55 | def writeFile(file, content):
56 |     with open(file, 'wb') as f:
57 |         f.write(content)
58 |     f.close()
59 | 
60 | #  待合成文本内容
61 | r = requests.post(URL, headers=getHeader(), data=getBody("沸腾炉出口负压异常   "))
62 | 
63 | contentType = r.headers['Content-Type']
64 | if contentType == "audio/mpeg":
65 |     sid = r.headers['sid']
66 |     if AUE == "raw":
67 |         print(r.content)
68 | #   合成音频格式为pcm、wav并保存在audio目录下
69 |         writeFile("audio/" + sid + ".wav", r.content)
70 |     else:
71 |         print(r.content)
72 | #   合成音频格式为mp3并保存在audio目录下
73 |         writeFile("audio/" + "xiaoyan" + ".mp3", r.content)
74 |     print("success, sid = " + sid)
75 | else:
76 | #   错误码链接：https://www.xfyun.cn/document/error-code （code返回错误码时必看）
77 |     print(r.text)


--------------------------------------------------------------------------------
/Python+Media/文字转音频/text.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yzy1996/Python-Code/2abcaa6fbfa4a84aaffdf10d7bcc6b12649dd221/Python+Media/文字转音频/text.txt


--------------------------------------------------------------------------------
/Python+Media/文字转音频/text2audio.py:
--------------------------------------------------------------------------------
 1 | #-*- coding: utf-8 -*-
 2 | #  语音合成 WebAPI 接口调用示例 接口文档（必看）：https://doc.xfyun.cn/msc_android/%E8%AF%AD%E9%9F%B3%E5%90%88%E6%88%90.html
 3 | #  webapi 合成服务参考帖子：http://bbs.xfyun.cn/forum.php?mod=viewthread&tid=38997&extra=
 4 | #  webapi是单次只支持1000个字节，具体看您的编码格式，计算一下具体支持多少文字
 5 | # （Very Important）创建完webapi应用添加合成服务之后一定要设置ip白名单，找到控制台--我的应用--设置ip白名单，如何设置参考：http://bbs.xfyun.cn/forum.php?mod=viewthread&tid=41891
 6 | #  合成发音人自动添加获取测试权限使用方法：登陆开放平台https://www.xfyun.cn/后--我的应用（必须为webapi类型应用）--添加在线语音合成（已添加的不用添加）--发音人管理---添加发音人--测试代码里需修改发音人参数
 7 | #  错误码链接：https://www.xfyun.cn/document/error-code （code返回错误码时必看）
 8 | #  @author iflytek
 9 | import requests
10 | import time
11 | import hashlib
12 | import base64
13 | #  合成webapi接口地址
14 | URL = "http://api.xfyun.cn/v1/service/v1/tts"
15 | #  音频编码(raw合成的音频格式pcm、wav,lame合成的音频格式MP3)
16 | AUE = "raw"
17 | #  应用APPID（必须为webapi类型应用，并开通语音合成服务，参考帖子如何创建一个webapi应用：http://bbs.xfyun.cn/forum.php?mod=viewthread&tid=36481
18 | APPID = "5d3abe07"
19 | #  接口密钥（webapi类型应用开通合成服务后，控制台--我的应用---语音合成---相应服务的apikey）
20 | API_KEY = "e4a5650869b3a64e1fdb8aaaf47bc056"
21 | 
22 | # 组装http请求头
23 | def getHeader():
24 |     curTime = str(int(time.time()))
25 |     # ttp=ssml
26 |     param = "{\"aue\":\"" + AUE + "\",\"auf\":\"audio/L16;rate=16000\",\"voice_name\":\"aisjinger\",\"speed\":\"30\",\"engine_type\":\"intp65\"}"
27 |     # print("param:{}".format(param))
28 | 
29 |     paramBase64 = str(base64.b64encode(param.encode('utf-8')), 'utf-8')
30 |     # print("x_param:{}".format(paramBase64))
31 | 
32 |     m2 = hashlib.md5()
33 |     m2.update((API_KEY + curTime + paramBase64).encode('utf-8'))
34 | 
35 |     checkSum = m2.hexdigest()
36 |     # print('checkSum:{}'.format(checkSum))
37 | 
38 |     header = {
39 |         'X-CurTime': curTime,
40 |         'X-Param': paramBase64,
41 |         'X-Appid': APPID,
42 |         'X-CheckSum': checkSum,
43 |         'X-Real-Ip': '127.0.0.1',
44 |         'Content-Type': 'application/x-www-form-urlencoded; charset=utf-8',
45 |     }
46 |     # print(header)
47 |     return header
48 | 
49 | 
50 | def getBody(text):
51 |     data = {'text': text}
52 |     return data
53 | 
54 | 
55 | def writeFile(file, content):
56 |     with open(file, 'wb') as f:
57 |         f.write(content)
58 |     f.close()
59 | 
60 | 
61 | with open(r'text.txt', 'rt') as f:
62 |     lines = (line.strip() for line in f)
63 |     for line in lines:
64 | 
65 |         r = requests.post(URL, headers=getHeader(), data=getBody(line))
66 | 
67 |         contentType = r.headers['Content-Type']
68 |         if contentType == "audio/mpeg":
69 |             sid = r.headers['sid']
70 |             if AUE == "raw":
71 |         #   合成音频格式为pcm、wav并保存在audio目录下
72 |                 writeFile("C:/Users/Jerry/Desktop/audio/" + line + ".wav", r.content)
73 | 
74 | 
75 | ffmpeg -y -f s16le -ar 11025 -i demo.pcm demo.wav


--------------------------------------------------------------------------------
/Python+Media/文字转音频/use_old.py:
--------------------------------------------------------------------------------
 1 | #-*- coding: utf-8 -*-
 2 | #  语音合成 WebAPI 接口调用示例 接口文档（必看）：https://doc.xfyun.cn/msc_android/%E8%AF%AD%E9%9F%B3%E5%90%88%E6%88%90.html
 3 | #  webapi 合成服务参考帖子：http://bbs.xfyun.cn/forum.php?mod=viewthread&tid=38997&extra=
 4 | #  webapi是单次只支持1000个字节，具体看您的编码格式，计算一下具体支持多少文字
 5 | # （Very Important）创建完webapi应用添加合成服务之后一定要设置ip白名单，找到控制台--我的应用--设置ip白名单，如何设置参考：http://bbs.xfyun.cn/forum.php?mod=viewthread&tid=41891
 6 | #  合成发音人自动添加获取测试权限使用方法：登陆开放平台https://www.xfyun.cn/后--我的应用（必须为webapi类型应用）--添加在线语音合成（已添加的不用添加）--发音人管理---添加发音人--测试代码里需修改发音人参数
 7 | #  错误码链接：https://www.xfyun.cn/document/error-code （code返回错误码时必看）
 8 | #  @author iflytek
 9 | import requests
10 | import time
11 | import hashlib
12 | import base64
13 | #  合成webapi接口地址
14 | URL = "http://api.xfyun.cn/v1/service/v1/tts"
15 | #  音频编码(raw合成的音频格式pcm、wav,lame合成的音频格式MP3)
16 | AUE = "raw"
17 | #  应用APPID（必须为webapi类型应用，并开通语音合成服务，参考帖子如何创建一个webapi应用：http://bbs.xfyun.cn/forum.php?mod=viewthread&tid=36481
18 | APPID = "5d3abe07"
19 | #  接口密钥（webapi类型应用开通合成服务后，控制台--我的应用---语音合成---相应服务的apikey）
20 | API_KEY = "e4a5650869b3a64e1fdb8aaaf47bc056"
21 | 
22 | # 组装http请求头
23 | def getHeader():
24 |     curTime = str(int(time.time()))
25 |     # ttp=ssml
26 |     param = "{\"aue\":\"" + AUE + "\",\"auf\":\"audio/L16;rate=16000\",\"voice_name\":\"aisjinger\",\"speed\":\"30\",\"volume\":\"100\",\"engine_type\":\"intp65\"}"
27 |     # print("param:{}".format(param))
28 | 
29 |     paramBase64 = str(base64.b64encode(param.encode('utf-8')), 'utf-8')
30 |     # print("x_param:{}".format(paramBase64))
31 | 
32 |     m2 = hashlib.md5()
33 |     m2.update((API_KEY + curTime + paramBase64).encode('utf-8'))
34 | 
35 |     checkSum = m2.hexdigest()
36 |     # print('checkSum:{}'.format(checkSum))
37 | 
38 |     header = {
39 |         'X-CurTime': curTime,
40 |         'X-Param': paramBase64,
41 |         'X-Appid': APPID,
42 |         'X-CheckSum': checkSum,
43 |         'X-Real-Ip': '127.0.0.1',
44 |         'Content-Type': 'application/x-www-form-urlencoded; charset=utf-8',
45 |     }
46 |     # print(header)
47 |     return header
48 | 
49 | 
50 | def getBody(text):
51 |     data = {'text': text}
52 |     return data
53 | 
54 | 
55 | def writeFile(file, content):
56 |     with open(file, 'wb') as f:
57 |         f.write(content)
58 |     f.close()
59 | 
60 | 
61 | with open(r'text.txt', 'rt') as f:
62 |     lines = (line.strip() for line in f)
63 |     for line in lines:
64 | 
65 |         r = requests.post(URL, headers=getHeader(), data=getBody(line))
66 | 
67 |         contentType = r.headers['Content-Type']
68 |         if contentType == "audio/mpeg":
69 |             sid = r.headers['sid']
70 |             if AUE == "raw":
71 |                 # print(r.content)
72 |           # 合成音频格式为pcm、wav并保存在audio目录下
73 |                 writeFile("audio/" + line + ".wav", r.content)
74 |             else:
75 |                 print(r.content)
76 |           # 合成音频格式为mp3并保存在audio目录下
77 |                 writeFile("audio/" + "xiaoyan" + ".mp3", r.content)
78 |             print("success, sid = " + sid)
79 |         else:
80 |           # 错误码链接：https://www.xfyun.cn/document/error-code （code返回错误码时必看）
81 |             print(r.text)


--------------------------------------------------------------------------------
/Python+Opencv/README.md:
--------------------------------------------------------------------------------
 1 | # OpenCV-python
 2 | 
 3 | # 拾色器
 4 | 
 5 | 顾名思义，判断一张图片中的像素点的颜色值，实现的效果是给出一张图片，鼠标任意指一个地方，会输出那个地方的RGB值。并且同时编写了交互版和非交互版，非交互版是在代码中给出图片地址，交互版是在命令行按提示输入图片地址。
 6 | 
 7 | # 图像处理
 8 | 
 9 | 实现最基本的一些操作，包括了二值化…
10 | 
11 | # 相机
12 | 
13 | 与相机相关的一些操作
14 | 
15 | 
16 | 
17 | 
18 | 
19 | 


--------------------------------------------------------------------------------
/Python+Opencv/opencv_draw.py:
--------------------------------------------------------------------------------
 1 | import cv2
 2 | import numpy as np
 3 | 
 4 | drawing = False # true if mouse is pressed
 5 | mode = True # if True, draw rectangle. Press 'm' to toggle to curve
 6 | ix,iy = -1,-1
 7 | 
 8 | # mouse callback function
 9 | def draw_circle(event,x,y,flags,param):
10 |     global ix,iy,drawing,mode
11 | 
12 |     if event == cv2.EVENT_LBUTTONDOWN:
13 |         drawing = True
14 |         ix,iy = x,y
15 | 
16 |     elif event == cv2.EVENT_MOUSEMOVE:
17 |         if drawing == True:
18 |             if mode == True:
19 |                 cv2.rectangle(img,(ix,iy),(x,y),(0,255,0),-1)
20 |             else:
21 |                 cv2.circle(img,(x,y),5,(0,0,255),-1)
22 | 
23 |     elif event == cv2.EVENT_LBUTTONUP:
24 |         drawing = False
25 |         if mode == True:
26 |             cv2.rectangle(img,(ix,iy),(x,y),(0,255,0),-1)
27 |         else:
28 |             cv2.circle(img,(x,y),5,(0,0,255),-1)
29 | 
30 | img = np.zeros((512,512,3), np.uint8)
31 | cv2.namedWindow('image')
32 | cv2.setMouseCallback('image',draw_circle)
33 | 
34 | while(1):
35 |     cv2.imshow('image',img)
36 |     k = cv2.waitKey(1) & 0xFF
37 |     if k == ord('m'):
38 |         mode = not mode
39 |     elif k == 27:
40 |         break
41 | 
42 | cv2.destroyAllWindows()


--------------------------------------------------------------------------------
/Python+Opencv/template_matching.py:
--------------------------------------------------------------------------------
 1 | # 2D图片的匹配
 2 | 
 3 | import cv2
 4 | import numpy as np
 5 | import math
 6 | 
 7 | image = cv2.imread("0.jpg")
 8 | 
 9 | image2 = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
10 | 
11 | comple = cv2.imread("1.jpg",0)
12 | # shape()得到的是行*列，所以需要倒序转换一下
13 | w,h = comple.shape[::-1]
14 | # 匹配比较
15 | res = cv2.matchTemplate(image2, comple, cv2.TM_CCOEFF_NORMED)
16 | # 设置阈值
17 | threshold = 0.39
18 | # 找到大于阈值的部分，返回值是坐标
19 | loc = np.where(res >= threshold)
20 | # *号是一个逆操作，二维[::-1]是将 行 倒序（最后一行变成了第一行）
21 | for pt in zip(*loc[::-1]):
22 |     cv2.rectangle(image, pt, (pt[0] + w, pt[1] + h), (255, 0, 255), 2)
23 | 
24 | cv2.imshow('result', image)
25 | 
26 | cv2.waitKey(0)
27 | cv2.destroyAllWindows()


--------------------------------------------------------------------------------
/Python+Opencv/图像处理/binarization.py:
--------------------------------------------------------------------------------
 1 | import cv2
 2 | import numpy as np
 3 | 
 4 | # 读取图像
 5 | img = cv2.imread('black-white.jpg')
 6 | 
 7 | # 变微灰度图
 8 | gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
 9 | 
10 | # 大津法二值化
11 | retval, dst = cv2.threshold(gray, 0, 255, cv2.THRESH_OTSU)
12 | 
13 | # 腐蚀和膨胀是对白色部分而言的，膨胀，白区域变大，最后的参数为迭代次数
14 | # dst = cv2.dilate(dst, None, iterations=2)
15 | 
16 | # # 腐蚀，白区域变小
17 | dst = cv2.erode(dst, None, iterations=2)
18 | 
19 | cv2.imshow('dst', dst)
20 | 
21 | cv2.waitKey(0)


--------------------------------------------------------------------------------
/Python+Opencv/图像处理/black-white.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yzy1996/Python-Code/2abcaa6fbfa4a84aaffdf10d7bcc6b12649dd221/Python+Opencv/图像处理/black-white.jpg


--------------------------------------------------------------------------------
/Python+Opencv/基于颜色的物体追踪.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yzy1996/Python-Code/2abcaa6fbfa4a84aaffdf10d7bcc6b12649dd221/Python+Opencv/基于颜色的物体追踪.py


--------------------------------------------------------------------------------
/Python+Opencv/拾色器/0.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yzy1996/Python-Code/2abcaa6fbfa4a84aaffdf10d7bcc6b12649dd221/Python+Opencv/拾色器/0.jpg


--------------------------------------------------------------------------------
/Python+Opencv/拾色器/1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yzy1996/Python-Code/2abcaa6fbfa4a84aaffdf10d7bcc6b12649dd221/Python+Opencv/拾色器/1.jpg


--------------------------------------------------------------------------------
/Python+Opencv/拾色器/2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yzy1996/Python-Code/2abcaa6fbfa4a84aaffdf10d7bcc6b12649dd221/Python+Opencv/拾色器/2.png


--------------------------------------------------------------------------------
/Python+Opencv/拾色器/color_picker(取色器交互版).py:
--------------------------------------------------------------------------------
 1 | # 拾色器
 2 | # 该程序是为了拾取颜色，可以输出BGR、HSV、GRAY等格式
 3 | 
 4 | import cv2
 5 | 
 6 | 
 7 | # 定义鼠标交互函数
 8 | def mouseColor(event, x, y, flags, param):
 9 |     if event == cv2.EVENT_LBUTTONDOWN:
10 |         print(str.upper(out), color[y, x])  #输出图像坐标(x,y)处的HSV的值
11 | 
12 | 
13 | path, out = input('请输入图片名称或路径，再空格输入选择的颜色格式（bgr/gray/hsv）\n').split()
14 | img = cv2.imread(path)  #读进来是BGR格式
15 | # 进行颜色格式的转换
16 | gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)  #变成灰度图
17 | hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)  #变成HSV格式
18 | if out == 'bgr':
19 |     color = img
20 | if out == 'gray':
21 |     color = gray
22 | if out == 'hsv':
23 |     color = hsv
24 | cv2.namedWindow("Color Picker")
25 | cv2.setMouseCallback("Color Picker", mouseColor)
26 | cv2.imshow("Color Picker", img)
27 | if cv2.waitKey(0):
28 |     cv2.destroyAllWindows()
29 | 
30 | 


--------------------------------------------------------------------------------
/Python+Opencv/拾色器/color_picker(取色器无交互版).py:
--------------------------------------------------------------------------------
 1 | # 拾色器
 2 | # 该程序是为了拾取颜色，可以输出BGR、HSV、GRAY等格式
 3 | 
 4 | import cv2
 5 | 
 6 | # 定义鼠标交互函数
 7 | def mouseColor(event, x, y, flags, param):
 8 |     if event == cv2.EVENT_LBUTTONDOWN:
 9 |         print('HSV:', hsv[y, x])  #输出图像坐标(x,y)处的HSV的值
10 | 
11 | img = cv2.imread('0.jpg')  #读进来是BGR格式
12 | hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)  #变成HSV格式
13 | cv2.namedWindow("Color Picker")
14 | cv2.setMouseCallback("Color Picker", mouseColor)
15 | cv2.imshow("Color Picker", img)
16 | if cv2.waitKey(0):
17 |     cv2.destroyAllWindows()
18 | 
19 | 
20 | 
21 | 


--------------------------------------------------------------------------------
/Python+Opencv/相机/camera_photo.py:
--------------------------------------------------------------------------------
 1 | # 该程序打开摄像头，显示视频，按q拍摄一张照片并储存下来
 2 | 
 3 | import cv2
 4 | 
 5 | cap = cv2.VideoCapture(0)
 6 | 
 7 | while(1):
 8 |     # 获得图片
 9 |     ret, frame = cap.read()
10 |     # 展示图片
11 |     cv2.imshow("capture", frame)
12 |     if cv2.waitKey(1) & 0xFF == ord('q'):
13 |         # 存储图片
14 |         cv2.imwrite("camera.jpg", frame)
15 |         break
16 | 
17 | cap.release()
18 | cv2.destroyAllWindows()
19 | 
20 | 


--------------------------------------------------------------------------------
/Python+Opencv/颜色_圆_相关/图片/color_filtering.py:
--------------------------------------------------------------------------------
 1 | # 按照选择的颜色对图片进行过滤
 2 | 
 3 | import cv2
 4 | import numpy as np
 5 | import color_list
 6 | 
 7 | 
 8 | def get_color(frame, color):
 9 |     hsv = cv2.cvtColor(frame, cv2.COLOR_BGR2HSV)
10 |     color_dict = color_list.getColorList()
11 |     mask = cv2.inRange(hsv, color_dict[color][0], color_dict[color][1])
12 |     res = cv2.bitwise_and(frame, frame, mask=mask)
13 |     # 存储一张图片
14 |     cv2.imwrite(filename + color + '.jpg', mask)
15 |     # 展示一张图片
16 |     cv2.imshow('Result', res)
17 |     cv2.waitKey(0)
18 |     
19 | 
20 | if __name__ == '__main__':
21 |     filename = '33.bmp'
22 |     color = 'black'
23 |     frame = cv2.imread(filename)
24 |     get_color(frame, color)
25 | 


--------------------------------------------------------------------------------
/Python+Opencv/颜色_圆_相关/图片/color_list.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import collections
  3 |  
  4 | #定义字典存放颜色分量上下限
  5 | #例如：{颜色: [min分量, max分量]}
  6 | #{'red': [array([160,  43,  46]), array([179, 255, 255])]}
  7 |  
  8 | def getColorList():
  9 |     dict = collections.defaultdict(list)
 10 |  
 11 |     # 黑色
 12 |     lower_black = np.array([0, 0, 0])
 13 |     upper_black = np.array([180, 255, 46])
 14 |     color_list = []
 15 |     color_list.append(lower_black)
 16 |     color_list.append(upper_black)
 17 |     dict['black'] = color_list
 18 |  
 19 |     #灰色
 20 |     lower_gray = np.array([0, 0, 46])
 21 |     upper_gray = np.array([180, 43, 220])
 22 |     color_list = []
 23 |     color_list.append(lower_gray)
 24 |     color_list.append(upper_gray)
 25 |     dict['gray']=color_list
 26 |  
 27 |     # 白色
 28 |     lower_white = np.array([0, 0, 221])
 29 |     upper_white = np.array([180, 30, 255])
 30 |     color_list = []
 31 |     color_list.append(lower_white)
 32 |     color_list.append(upper_white)
 33 |     dict['white'] = color_list
 34 |  
 35 |     #红色
 36 |     lower_red = np.array([156, 43, 46])
 37 |     upper_red = np.array([180, 255, 255])
 38 |     color_list = []
 39 |     color_list.append(lower_red)
 40 |     color_list.append(upper_red)
 41 |     dict['red']=color_list
 42 |  
 43 |     # 红色2
 44 |     lower_red = np.array([0, 43, 46])
 45 |     upper_red = np.array([10, 255, 255])
 46 |     color_list = []
 47 |     color_list.append(lower_red)
 48 |     color_list.append(upper_red)
 49 |     dict['red2'] = color_list
 50 |  
 51 |     #橙色
 52 |     lower_orange = np.array([11, 43, 46])
 53 |     upper_orange = np.array([25, 255, 255])
 54 |     color_list = []
 55 |     color_list.append(lower_orange)
 56 |     color_list.append(upper_orange)
 57 |     dict['orange'] = color_list
 58 |  
 59 |     #黄色
 60 |     lower_yellow = np.array([26, 43, 46])
 61 |     upper_yellow = np.array([34, 255, 255])
 62 |     color_list = []
 63 |     color_list.append(lower_yellow)
 64 |     color_list.append(upper_yellow)
 65 |     dict['yellow'] = color_list
 66 |  
 67 |     #绿色
 68 |     lower_green = np.array([35, 43, 46])
 69 |     upper_green = np.array([77, 255, 255])
 70 |     color_list = []
 71 |     color_list.append(lower_green)
 72 |     color_list.append(upper_green)
 73 |     dict['green'] = color_list
 74 |  
 75 |     #青色
 76 |     lower_cyan = np.array([78, 43, 46])
 77 |     upper_cyan = np.array([99, 255, 255])
 78 |     color_list = []
 79 |     color_list.append(lower_cyan)
 80 |     color_list.append(upper_cyan)
 81 |     dict['cyan'] = color_list
 82 |  
 83 |     #蓝色
 84 |     lower_blue = np.array([100, 43, 46])
 85 |     upper_blue = np.array([124, 255, 255])
 86 |     color_list = []
 87 |     color_list.append(lower_blue)
 88 |     color_list.append(upper_blue)
 89 |     dict['blue'] = color_list
 90 |  
 91 |     # 紫色
 92 |     lower_purple = np.array([125, 43, 46])
 93 |     upper_purple = np.array([155, 255, 255])
 94 |     color_list = []
 95 |     color_list.append(lower_purple)
 96 |     color_list.append(upper_purple)
 97 |     dict['purple'] = color_list
 98 |  
 99 |     return dict
100 |  
101 |  
102 | if __name__ == '__main__':
103 |     color_dict = getColorList()
104 |     print(color_dict)
105 |  
106 |     num = len(color_dict)
107 |     print('num=',num)
108 |  
109 |     for d in color_dict:
110 |         print('key=',d)
111 |         print('value=',color_dict[d][1])
112 | 


--------------------------------------------------------------------------------
/Python+Opencv/颜色_圆_相关/图片/detect_picture_color.py:
--------------------------------------------------------------------------------
 1 | # 为了找到图像中带有特定颜色的区域，画出区域外接圆和质心
 2 | 
 3 | import numpy as np
 4 | import cv2
 5 | import color_list
 6 | 
 7 | filename = '22.bmp'
 8 | color = 'black'
 9 | image = cv2.imread(filename)
10 | 
11 | #转到HSV空间
12 | hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
13 | #根据阈值构建掩膜
14 | color_dict = color_list.getColorList()
15 | mask = cv2.inRange(hsv, color_dict[color][0], color_dict[color][1])
16 | #腐蚀操作
17 | mask = cv2.erode(mask, None, iterations=2)
18 | #膨胀操作，其实先腐蚀再膨胀的效果是开运算，去除噪点
19 | mask = cv2.dilate(mask, None, iterations=2)
20 | #轮廓检测
21 | cnts = cv2.findContours(mask.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)[-2]
22 | #初始化瓶盖圆形轮廓质心
23 | center = None
24 | #如果存在轮廓
25 | if len(cnts) > 0:
26 | 	#找到面积最大的轮廓
27 | 	c = max(cnts, key = cv2.contourArea)
28 | 	#确定面积最大的轮廓的外接圆
29 | 	((x, y), radius) = cv2.minEnclosingCircle(c)
30 | 	#计算轮廓的矩
31 | 	M = cv2.moments(c)
32 | 	#计算质心
33 | 	center = (int(M["m10"]/M["m00"]), int(M["m01"]/M["m00"]))
34 | 	#只有当半径大于10时，才执行画图
35 | 	print(center)
36 | 	if radius > 10:
37 | 		cv2.circle(image, (int(x), int(y)), int(radius), (0, 255, 255), 2)
38 | 		cv2.circle(image, center, 5, (0, 0, 255), -1)
39 | 
40 | cv2.imshow('Image', image)
41 | #键盘检测，检按q退出
42 | cv2.waitKey(0)
43 | 


--------------------------------------------------------------------------------
/Python+Opencv/颜色_圆_相关/图片/detect_picture_color_circle.py:
--------------------------------------------------------------------------------
 1 | # 该程序为了找到图片中特定颜色的圆，返回圆心坐标，并标出圆
 2 | # 通过的是霍夫圆
 3 | 
 4 | import cv2
 5 | import numpy as np
 6 | import time
 7 | import color_list
 8 | 
 9 | def findPiccircle(frame, color):
10 | 
11 | 	hsv = cv2.cvtColor(frame, cv2.COLOR_BGR2HSV)	
12 | 	color_dict = color_list.getColorList()
13 | 	mask = cv2.inRange(hsv, color_dict[color][0], color_dict[color][1])
14 | 	dilated = cv2.dilate(mask, cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (3, 3)), iterations=2)
15 | 	## 需要修改minRadius以及maxRadius，用来限制识别圆的大小，排除其他的干扰
16 | 	circles = cv2.HoughCircles(dilated, cv2.HOUGH_GRADIENT, 1, 1000, param1=15, param2=10, minRadius=15, maxRadius=50)
17 | 	
18 | 	center = None
19 | 	if circles is not None:
20 | 		x, y, radius = circles[0][0]
21 | 		center = (x, y)
22 | 		cv2.circle(frame, center, radius, (0, 255, 0), 2)
23 | 		cv2.circle(frame, center, 2, (0,255,0), -1, 8, 0 );
24 | 		print('圆心：{}, {}'.format(x, y))
25 | 		
26 | 	cv2.imshow('result', frame)	
27 | 	
28 | 	if center != None:
29 | 		return center
30 | 	
31 | 			
32 | if __name__ == '__main__':
33 | 	filename ='22.bmp'   ##修改为图片
34 | 	color = 'black'      ##修改颜色
35 | 	frame = cv2.imread(filename)
36 | 	findPiccircle(frame, color)
37 | 	cv2.waitKey(0)
38 | 


--------------------------------------------------------------------------------
/Python+Opencv/颜色_圆_相关/图片/judge_color_center.py:
--------------------------------------------------------------------------------
 1 | # 该程序是为了判断给定图片的主要颜色，并找到主要颜色的质心
 2 | # 思路为：用不同颜色进行过滤，求出每种颜色滤波后的面积，找到最大面积的滤波颜色为目标颜色，然后找到质心
 3 | 
 4 | import cv2
 5 | import colorList
 6 | 
 7 | def getColor(frame):
 8 | 	hsv = cv2.cvtColor(frame,cv2.COLOR_BGR2HSV)
 9 | 	maxsum = 0
10 | 	color = None
11 | 	color_dict = colorList.getColorList()
12 | 
13 | 	# 对每个颜色进行判断
14 | 	for d in color_dict:
15 | 		# 根据阈值构建掩膜
16 | 		mask = cv2.inRange(hsv, color_dict[d][0], color_dict[d][1])
17 | 		# 腐蚀操作
18 | 		mask = cv2.erode(mask, None, iterations=2)
19 | 		# 膨胀操作，其实先腐蚀再膨胀的效果是开运算，去除噪点
20 | 		mask = cv2.dilate(mask, None, iterations=2)	
21 | 		img, cnts, hiera = cv2.findContours(mask.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
22 | 		
23 | 		# 有轮廓才进行后面的判断
24 | 		if len(cnts) > 0:	
25 | 			# 计算识别区域的面积
26 | 			sum = 0
27 | 			for c in cnts:
28 | 				sum += cv2.contourArea(c)
29 | 			
30 | 			# 找到最大面积并找到质心
31 | 			if sum > maxsum :
32 | 				maxsum = sum	
33 | 				if maxsum != 0:
34 | 					color = d
35 | 				else:
36 | 					color = None
37 | 				# 找到面积最大的轮廓
38 | 				c = max(cnts, key = cv2.contourArea)
39 | 				# 确定面积最大的轮廓的外接圆
40 | 				((x, y), radius) = cv2.minEnclosingCircle(c)
41 | 				# 计算轮廓的矩
42 | 				M = cv2.moments(c)
43 | 				# 计算质心
44 | 				center = (int(M["m10"]/M["m00"]), int(M["m01"]/M["m00"]))
45 |  
46 | 	return color, center
47 |  
48 | if __name__ == '__main__':
49 | 	filename = '22.bmp' 
50 | 	frame = cv2.imread(filename)
51 | 	print('判断主要颜色为：', getColor(frame)[0], '圆心为：', getColor(frame)[1])


--------------------------------------------------------------------------------
/Python+Opencv/颜色_圆_相关/图片/judge_multi_color.py:
--------------------------------------------------------------------------------
 1 | # 该程序是为了判断出图片中的主要颜色（不只一种）
 2 | # 思路为：用不同颜色进行过滤，求出每种颜色滤波后的面积，找到最大面积的几个确定为主要颜色
 3 | 
 4 | import cv2
 5 | import numpy as np
 6 | import color_list
 7 | import heapq
 8 | 
 9 | 
10 | def getColor(frame, count_color):
11 | 
12 |     hsv = cv2.cvtColor(frame, cv2.COLOR_BGR2HSV)
13 | 
14 |     color = []
15 | 
16 |     color_dict = color_list.getColorList()
17 | 
18 |     # 颜色字典的颜色数量
19 |     num_color = len(color_dict)
20 |     sum = [0] * num_color
21 |     search_color_list = []
22 |     # 对每个颜色进行判断，d是颜色字符串（如：red）
23 |     for (d, i) in zip(color_dict, range(num_color)):
24 |         
25 |         search_color_list.append(d)
26 |         # 根据阈值构建掩膜
27 |         mask = cv2.inRange(hsv, color_dict[d][0], color_dict[d][1])
28 |         # 腐蚀操作
29 |         mask = cv2.erode(mask, None, iterations=2)
30 |         # 膨胀操作，其实先腐蚀再膨胀的效果是开运算，去除噪点
31 |         mask = cv2.dilate(mask, None, iterations=2)
32 |         img, cnts, hiera = cv2.findContours(mask.copy(), cv2.RETR_EXTERNAL,
33 |                                             cv2.CHAIN_APPROX_SIMPLE)
34 |         # 有轮廓才进行后面的判断
35 |         
36 |         if len(cnts) > 0:
37 |             for c in cnts:
38 |                 sum[i] += cv2.contourArea(c)
39 | 
40 |     find_color_list = heapq.nlargest(count_color, sum)
41 |     for j in range(count_color):     
42 |         color.append(search_color_list[sum.index(find_color_list[j])])
43 | 
44 |     return color
45 | 
46 | 
47 | if __name__ == '__main__':
48 | 
49 |     # 设定要检测的图片
50 |     filename = '111.jpg'
51 |     # 设定要检测的颜色数量
52 |     count_color = 3
53 | 
54 |     frame = cv2.imread(filename)
55 | 
56 |     print('判断主要的' + str(count_color) + '个颜色为：', ' '.join(getColor(frame, count_color)))
57 | 


--------------------------------------------------------------------------------
/Python+Opencv/颜色_圆_相关/图片/judge_single_color.py:
--------------------------------------------------------------------------------
 1 | # 该程序是为了判断给定图片的主要颜色
 2 | # 思路为：用不同颜色进行过滤，求出每种颜色滤波后的面积，找到最大面积的滤波颜色为目标颜色
 3 | 
 4 | import cv2
 5 | import numpy as np
 6 | import color_list
 7 | 
 8 | def getColor(frame):
 9 | 	hsv = cv2.cvtColor(frame,cv2.COLOR_BGR2HSV)
10 | 	maxsum = 0
11 | 	color = None
12 | 	color_dict = color_list.getColorList()
13 | 
14 | 	# 对每个颜色进行判断
15 | 	for d in color_dict:
16 | 		# 根据阈值构建掩膜
17 | 		mask = cv2.inRange(hsv, color_dict[d][0], color_dict[d][1])
18 | 		# 腐蚀操作
19 | 		mask = cv2.erode(mask, None, iterations=2)
20 | 		# 膨胀操作，其实先腐蚀再膨胀的效果是开运算，去除噪点
21 | 		mask = cv2.dilate(mask, None, iterations=2)	
22 | 		img, cnts, hiera = cv2.findContours(mask.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
23 | 		
24 | 		# 有轮廓才进行后面的判断
25 | 		if len(cnts) > 0:	
26 | 			# 计算识别区域的面积
27 | 			sum = 0
28 | 			for c in cnts:
29 | 				sum += cv2.contourArea(c)
30 | 			
31 | 			# 找到最大面积并找到质心
32 | 			if sum > maxsum :
33 | 				maxsum = sum	
34 | 				if maxsum != 0:
35 | 					color = d
36 | 				else:
37 | 					color = None
38 |  
39 | 	return color
40 |  
41 | if __name__ == '__main__':
42 | 	filename = '22.bmp' 
43 | 	frame = cv2.imread(filename)
44 | 	print('判断主要颜色为：', getColor(frame))


--------------------------------------------------------------------------------
/Python+Opencv/颜色_圆_相关/图片/multi_color_filtering.py:
--------------------------------------------------------------------------------
 1 | # 按照选择的多种颜色对图片进行过滤
 2 | 
 3 | import  cv2
 4 | import numpy as np
 5 | import colorList
 6 | 
 7 | def get_color(frame, color1, color2):
 8 | 	hsv = cv2.cvtColor(frame,cv2.COLOR_BGR2HSV)
 9 | 	color_dict = colorList.getColorList()
10 | 	mask1 = cv2.inRange(hsv, color_dict[color1][0], color_dict[color1][1])
11 | 	mask2 = cv2.inRange(hsv, color_dict[color2][0], color_dict[color2][1])
12 | 	mask = mask1 + mask2
13 | 	cv2.imwrite(filename + color1 + color2 + '.jpg', mask)
14 | 
15 | if __name__ == '__main__':
16 | 	filename ='222.jpg'
17 | 	color1 = 'blue'
18 | 	color2 = 'red'
19 | 	frame = cv2.imread(filename)
20 | 	get_color(frame, color1, color2)


--------------------------------------------------------------------------------
/Python+Opencv/颜色_圆_相关/视频/color_list.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import collections
  3 |  
  4 | #定义字典存放颜色分量上下限
  5 | #例如：{颜色: [min分量, max分量]}
  6 | #{'red': [array([160,  43,  46]), array([179, 255, 255])]}
  7 |  
  8 | def getColorList():
  9 |     dict = collections.defaultdict(list)
 10 |  
 11 |     # 黑色
 12 |     lower_black = np.array([0, 0, 0])
 13 |     upper_black = np.array([180, 255, 46])
 14 |     color_list = []
 15 |     color_list.append(lower_black)
 16 |     color_list.append(upper_black)
 17 |     dict['black'] = color_list
 18 |  
 19 |     #灰色
 20 |     lower_gray = np.array([0, 0, 46])
 21 |     upper_gray = np.array([180, 43, 220])
 22 |     color_list = []
 23 |     color_list.append(lower_gray)
 24 |     color_list.append(upper_gray)
 25 |     dict['gray']=color_list
 26 |  
 27 |     # 白色
 28 |     lower_white = np.array([0, 0, 221])
 29 |     upper_white = np.array([180, 30, 255])
 30 |     color_list = []
 31 |     color_list.append(lower_white)
 32 |     color_list.append(upper_white)
 33 |     dict['white'] = color_list
 34 |  
 35 |     #红色
 36 |     lower_red = np.array([156, 43, 46])
 37 |     upper_red = np.array([180, 255, 255])
 38 |     color_list = []
 39 |     color_list.append(lower_red)
 40 |     color_list.append(upper_red)
 41 |     dict['red']=color_list
 42 |  
 43 |     # 红色2
 44 |     lower_red = np.array([0, 43, 46])
 45 |     upper_red = np.array([10, 255, 255])
 46 |     color_list = []
 47 |     color_list.append(lower_red)
 48 |     color_list.append(upper_red)
 49 |     dict['red2'] = color_list
 50 |  
 51 |     #橙色
 52 |     lower_orange = np.array([11, 43, 46])
 53 |     upper_orange = np.array([25, 255, 255])
 54 |     color_list = []
 55 |     color_list.append(lower_orange)
 56 |     color_list.append(upper_orange)
 57 |     dict['orange'] = color_list
 58 |  
 59 |     #黄色
 60 |     lower_yellow = np.array([26, 43, 46])
 61 |     upper_yellow = np.array([34, 255, 255])
 62 |     color_list = []
 63 |     color_list.append(lower_yellow)
 64 |     color_list.append(upper_yellow)
 65 |     dict['yellow'] = color_list
 66 |  
 67 |     #绿色
 68 |     lower_green = np.array([35, 43, 46])
 69 |     upper_green = np.array([77, 255, 255])
 70 |     color_list = []
 71 |     color_list.append(lower_green)
 72 |     color_list.append(upper_green)
 73 |     dict['green'] = color_list
 74 |  
 75 |     #青色
 76 |     lower_cyan = np.array([78, 43, 46])
 77 |     upper_cyan = np.array([99, 255, 255])
 78 |     color_list = []
 79 |     color_list.append(lower_cyan)
 80 |     color_list.append(upper_cyan)
 81 |     dict['cyan'] = color_list
 82 |  
 83 |     #蓝色
 84 |     lower_blue = np.array([100, 43, 46])
 85 |     upper_blue = np.array([124, 255, 255])
 86 |     color_list = []
 87 |     color_list.append(lower_blue)
 88 |     color_list.append(upper_blue)
 89 |     dict['blue'] = color_list
 90 |  
 91 |     # 紫色
 92 |     lower_purple = np.array([125, 43, 46])
 93 |     upper_purple = np.array([155, 255, 255])
 94 |     color_list = []
 95 |     color_list.append(lower_purple)
 96 |     color_list.append(upper_purple)
 97 |     dict['purple'] = color_list
 98 |  
 99 |     return dict
100 |  
101 |  
102 | if __name__ == '__main__':
103 |     color_dict = getColorList()
104 |     print(color_dict)
105 |  
106 |     num = len(color_dict)
107 |     print('num=',num)
108 |  
109 |     for d in color_dict:
110 |         print('key=',d)
111 |         print('value=',color_dict[d][1])
112 | 


--------------------------------------------------------------------------------
/Python+Opencv/颜色_圆_相关/视频/detect_camera_color.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import cv2
 3 | 
 4 | #设定红色阈值，HSV空间
 5 | redLower = np.array([170, 100, 100])
 6 | redUpper = np.array([179, 255, 255])
 7 | 
 8 | #打开摄像头
 9 | camera = cv2.VideoCapture(0)
10 | 
11 | #遍历每一帧，检测红色瓶盖
12 | while True:
13 |     #读取帧
14 |     (ret, frame) = camera.read()
15 |     #判断是否成功打开摄像头
16 |     if not ret:
17 |         print('No Camera')
18 |         break
19 |     #转到HSV空间
20 |     hsv = cv2.cvtColor(frame, cv2.COLOR_BGR2HSV)
21 |     #根据阈值构建掩膜
22 |     mask = cv2.inRange(hsv, redLower, redUpper)
23 |     #腐蚀操作
24 |     mask = cv2.erode(mask, None, iterations=2)
25 |     #膨胀操作，其实先腐蚀再膨胀的效果是开运算，去除噪点
26 |     mask = cv2.dilate(mask, None, iterations=2)
27 |     #轮廓检测
28 |     cnts = cv2.findContours(mask.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)[-2]
29 |     #初始化瓶盖圆形轮廓质心
30 |     center = None
31 |     #如果存在轮廓
32 |     if len(cnts) > 0:
33 |         #找到面积最大的轮廓
34 |         c = max(cnts, key = cv2.contourArea)
35 |         #确定面积最大的轮廓的外接圆
36 |         ((x, y), radius) = cv2.minEnclosingCircle(c)
37 |         #计算轮廓的矩
38 |         M = cv2.moments(c)
39 |         #计算质心
40 |         center = (int(M["m10"]/M["m00"]), int(M["m01"]/M["m00"]))
41 |         #只有当半径大于10时，才执行画图
42 |         if radius > 10:
43 |             cv2.circle(frame, (int(x), int(y)), int(radius), (0, 255, 255), 2)
44 |             cv2.circle(frame, center, 5, (0, 0, 255), -1)
45 | 
46 |     cv2.imshow('Frame', frame)
47 |     #键盘检测，检测到esc键退出
48 |     if cv2.waitKey(1) & 0xFF == ord('q'):
49 |         break
50 | #摄像头释放
51 | camera.release()
52 | #销毁所有窗口
53 | cv2.destroyAllWindows()
54 | 


--------------------------------------------------------------------------------
/Python+Opencv/颜色_圆_相关/视频/detect_camera_color_circle.py:
--------------------------------------------------------------------------------
 1 | import cv2
 2 | import numpy as np
 3 | import time
 4 | 
 5 | cap = cv2.VideoCapture(3)
 6 | 
 7 | 
 8 | # color from website:https://blog.csdn.net/taily_duan/article/details/51506776
 9 | #black
10 | low_range = np.array([0, 0, 0])
11 | high_range = np.array([180, 255, 46])
12 | 
13 | #red
14 | 
15 | while(1):
16 |     # get a frame and show
17 |     ret, frame = cap.read()
18 | 
19 |     # change to hsv model
20 |     hsv = cv2.cvtColor(frame, cv2.COLOR_BGR2HSV)
21 | 
22 |     # get mask
23 |     mask = cv2.inRange(hsv, low_range, high_range)
24 |     dilated = cv2.dilate(mask, cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (3, 3)), iterations=2)
25 | 
26 |     # detect red
27 |     circles = cv2.HoughCircles(dilated, cv2.HOUGH_GRADIENT, 1, 1000, param1=15, param2=10, minRadius=0, maxRadius=50)
28 | 
29 |     if circles is not None:
30 |         x, y, radius = circles[0][0]
31 |         center = (x, y)
32 |         cv2.circle(frame, center, radius, (0, 255, 0), 2)
33 |         cv2.circle(frame, center, 2, (0,255,0), -1, 8, 0 );
34 |         print('圆心：{}, {}'.format(x, y))
35 | 
36 |     cv2.imshow('result', frame)
37 | 
38 |     if cv2.waitKey(1) & 0xFF == ord('q'):
39 |         break
40 | 
41 | cap.release()
42 | cv2.destroyAllWindows()


--------------------------------------------------------------------------------
/Python+PC-Control/mouse_control.py:
--------------------------------------------------------------------------------
 1 | import cv2
 2 | import pyautogui
 3 | from pynput.mouse import Button, Controller
 4 | 
 5 | ## 使用pynput库进行控制
 6 | mouse = Controller()
 7 | print('鼠标位置', mouse.position)
 8 | 
 9 | #获取屏幕的尺寸
10 | print('屏幕尺寸', pyautogui.size())
11 | 
12 | #获取当前鼠标的位置
13 | 
14 | print('鼠标位置', pyautogui.position())
15 | 


--------------------------------------------------------------------------------
/Python+PC-Control/mouse_monitor.py:
--------------------------------------------------------------------------------
 1 | from pynput import mouse
 2 | 
 3 | 
 4 | def on_move(x, y):
 5 |     print('Pointer moved to {0}'.format((x, y)))
 6 | 
 7 | 
 8 | def on_click(x, y, button, pressed):
 9 |     print('{0} at {1}'.format('Pressed' if pressed else 'Released', (x, y)))
10 |     if not pressed:
11 |         # Stop listener
12 |         return False
13 | 
14 | # Collect events until released
15 | with mouse.Listener(on_move=on_move, on_click=on_click) as listener:
16 |     listener.join()
17 | 


--------------------------------------------------------------------------------
/Python+PC-Control/moyu.py:
--------------------------------------------------------------------------------
 1 | # 摸鱼脚本：鼠标划拉划拉
 2 | 
 3 | from pynput import mouse, keyboard
 4 | from pynput.mouse import Button, Controller
 5 | import time
 6 | 
 7 | mouse_move = Controller()
 8 | 
 9 | def move(t=10):
10 |     mouse_move.move(10, 10)
11 |     time.sleep(t)
12 |     mouse_move.move(-10, -10)
13 |     time.sleep(t)
14 | 
15 | def on_click(x, y, button, pressed):
16 |     while True:2e
17 |     print('{0} at {1}'.format('Pressed' if pressed else 'Released', (x, y)))
18 |     if not pressed:
19 |         # Stop listener
20 |         return False
21 | 
22 | # Collect events until released
23 | with mouse.Listener(on_click=on_click) as listener:
24 |     listener.join()


--------------------------------------------------------------------------------
/Python+Piano/read.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yzy1996/Python-Code/2abcaa6fbfa4a84aaffdf10d7bcc6b12649dd221/Python+Piano/read.py


--------------------------------------------------------------------------------
/Python+arXiv/README.md:
--------------------------------------------------------------------------------
 1 | # Paper info into MarkDown
 2 | 
 3 | A tool to help you transfer arXiv information into markdown format.
 4 | 
 5 | <div align=center><img width="600" src="https://raw.githubusercontent.com/yzy1996/Image-Hosting/master/202303282243237.png"/></div>
 6 | 
 7 | ## Installation
 8 | 
 9 | You can directly use such script anywhere after you install the specific package: 
10 | 
11 | ```bash
12 | pip install -U yzy 
13 | ```
14 | 
15 | Or you can develop from the source code.
16 | 
17 | ## Usage (v1.1)
18 | 
19 | - **id2md.py**: input arxiv id and then get the markdown format output.
20 | - **file2md.py**: input foldername containing papers and then get the markdown format output.
21 | - **checkv.py**: auto-update the pdf version.
22 | 
23 | ```bash
24 | python -m yzy.id2md
25 | 
26 | python -m yzy.file2md
27 | 
28 | python -m yzy.checkv
29 | ```
30 | 
31 | Then you can copy the text and (shift) paste to your markdown file and modify the information as you wish.
32 | 
33 | ## Development
34 | 
35 | You can add more conferences in [this](./conf_list.txt).
36 | 
37 | ## TODO
38 | 
39 | - [x] build pypi package
40 | - [x] check and update the local pdf version
41 | - [ ] add Internet conf search
42 | - [ ] build a website to use
43 | 
44 | ## 3rd-Party Usages
45 | 
46 | [arxiv_daily_tools](https://github.com/weihaox/arxiv_daily_tools)
47 | 
48 | ## Contact
49 | 
50 | Please email im.crazyang@gmail.com or create Github issues here if you have any questions or suggestions. 
51 | 
52 | ## Related
53 | 
54 | https://github.com/MLNLP-World/SimBiber
55 | 
56 | https://github.com/yuchenlin/rebiber
57 | 
58 | https://github.com/j3soon/arxiv-utils
59 | 
60 | https://github.com/vict0rsch/PaperMemory
61 | 
62 | https://www.mybib.com/#/projects/WZg97M/citations


--------------------------------------------------------------------------------
/Python+arXiv/check_update.py:
--------------------------------------------------------------------------------
 1 | from urllib import request
 2 | from urllib.error import URLError, HTTPError
 3 | import re
 4 | from pathlib import Path
 5 | from pypdf import PdfReader
 6 | import feedparser
 7 | import os
 8 | 
 9 | class Information():
10 |     def __init__(self, query_title) -> None:
11 | 
12 |         with open(query_title, 'rb') as f:
13 | 
14 |             pdf = PdfReader(f)
15 |             first_page = pdf.pages[0]
16 |             text_split = first_page.extract_text().split()
17 | 
18 | 
19 |             for page in pdf.pages:
20 |                 if "/Annots" in page:
21 |                     for annot in page["/Annots"]:
22 |                         if annot.get_object()["/Subtype"] != "/Link":
23 |                             self.flag_annot = True
24 |                             break
25 |                     else:
26 |                         continue
27 |                     break
28 |             else:
29 |                 self.flag_annot = False
30 | 
31 | 
32 |         str_id = text_split[-5]
33 |         self.id_version_local = re.findall(r'\d{4}\.\d{5}v\d{1}', str_id)[0]
34 |         query_url = f'http://export.arxiv.org/api/query?id_list={self.id_version_local[:-2]}'
35 | 
36 |         try:
37 |             response = request.urlopen(query_url)
38 |         except HTTPError as e:
39 |             print('Error code: ', e.code)
40 |         except URLError as e:
41 |             print('Reason: ', e.reason)
42 |         else:
43 |             export_arxiv = response.read().decode('utf-8')
44 | 
45 |         feed = feedparser.parse(export_arxiv)
46 | 
47 |         self.query_title = query_title
48 |         self.title = re.sub(r'[^\w\s-]', '', feed.entries[0].title).replace("\n", "").replace("  ", " ")
49 |         self.year = feed.entries[0].published[:4]
50 |         self.abs_url_version = feed.entries[0].id
51 |         self.abs_url = self.abs_url_version[:-2]
52 |         self.pdf_url = self.abs_url.replace('abs', 'pdf')
53 |         self.id_version_server = self.abs_url_version[-12:]
54 | 
55 |     def check_update(self):
56 |         if self.id_version_server != self.id_version_local:
57 | 
58 |             # 检查是否有笔记
59 |             if not self.flag_annot:
60 |                 print(f'>>> Downloading from {self.id_version_local} to {self.id_version_server}')
61 |                 request.urlretrieve(information.pdf_url, f'{information.year}_{information.title}.pdf')
62 |                 os.remove(self.query_title)  
63 | 
64 |             else:
65 |                 print(f'>>> 请注意：{information.title} 存在更新版，但旧版本有笔记标注。')
66 |                 request.urlretrieve(information.pdf_url, f'{information.year}_{information.title} (1).pdf')
67 | 
68 | if __name__ == "__main__":
69 | 
70 |     root_dir = Path('./')
71 |     pdf_list = sorted(root_dir.glob('*.pdf'))
72 | 
73 |     for file in list(pdf_list):
74 |         try:
75 |             information = Information(query_title=file)
76 |             information.check_update()
77 | 
78 |         except Exception as ex:
79 |             print('Error: ', ex)
80 |             pass
81 | 
82 | 
83 | 


--------------------------------------------------------------------------------
/Python+arXiv/conf.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "CVPR": "{IEEE/CVF} Conference on Computer Vision and Pattern Recognition",
 3 |   "ICCV": "{IEEE/CVF} International Conference on Computer Vision",
 4 |   "ECCV": "European Conference on Computer Vision",
 5 | 
 6 |   "ICML": "International Conference on Machine Learning",
 7 |   "NeurIPS": "Advances in Neural Information Processing Systems",
 8 |   "ICLR": "International Conference on Learning Representations",
 9 |   
10 |   "AAAI": "{AAAI} Conference on Artificial Intelligence",
11 |   "IJCAI": "International Joint Conferences on Artificial Intelligence",
12 | 
13 |   "gecco": "Genetic and Evolutionary Computation Conference",
14 |   "cec": "{IEEE} Congress on Evolutionary Computation",
15 |   "wcci": "{IEEE} World Congress on Computational Intelligence",
16 | 
17 |   "uai": "Conference on Uncertainty in Artificial Intelligence",
18 |   "ppsn": "International Conference on Parallel Problem Solving from Nature",
19 |   "aistats": "International Conference on Artificial Intelligence and Statistics",
20 |   "emo": "International Conference on Evolutionary Multi-Criterion Optimization",
21 |   
22 |   "sac": "ACM Symposium on Applied Computing",
23 | 
24 |   "ICIP": "{IEEE} International Conference on Image Processing",
25 |   "ICPR": "International Conference on Pattern Recognition",
26 |   "3DV": "International Conference on 3D Vision",
27 |   "ACCV": "Asian Conference on Computer Vision",
28 |   "WACV": "{IEEE} Winter Conference on Applications of Computer Vision",
29 |   "ICME": "International conference on multimedia and expo",
30 |   "BMVC": "British machine vision conference",
31 |   "ACM MM": "{ACM} International Conference on Multimedia",
32 |   "SIGGRAPH": "{ACM SIGGRAPH} Annual Conference"
33 | }
34 | 


--------------------------------------------------------------------------------
/Python+arXiv/conf_list.txt:
--------------------------------------------------------------------------------
 1 | ICML
 2 | NeurIPS
 3 | NIPS
 4 | ICLR
 5 | CVPR
 6 | ICCV
 7 | ECCV
 8 | AAAI
 9 | IJCAI
10 | UAI
11 | WSDM
12 | WWW
13 | Webconf
14 | AAAI
15 | AISTATS
16 | ALENEX
17 | ASONAM
18 | BMVC
19 | CHI
20 | CIDR
21 | CIKM
22 | COLT
23 | ICASSP
24 | INTERSPEECH
25 | KDD
26 | MM
27 | RECSYS
28 | SDM
29 | SIGIR
30 | SIGMOD
31 | SODA
32 | STOC


--------------------------------------------------------------------------------
/Python+arXiv/file2md.py:
--------------------------------------------------------------------------------
 1 | from urllib import request
 2 | import re
 3 | from pathlib import Path
 4 | from pypdf import PdfReader
 5 | import feedparser
 6 | 
 7 | p = Path(__file__)
 8 | 
 9 | with open(p.parents[0] / 'conf_list.txt', 'r') as f:
10 |     data = f.read()
11 | CONF = data.split('\n')
12 | CONF = '|'.join(CONF)
13 | 
14 | class Information():
15 |     def __init__(self, query_title) -> None:
16 | 
17 |         query_id = read_pdf(query_title)
18 | 
19 |         if re.findall(r'\d{4}.\d{5}', query_id):
20 |             query_url = f'http://export.arxiv.org/api/query?id_list={query_id}'
21 | 
22 |         else:
23 |             query_title1 = str(query_title)[:-4].replace(' ', '+').replace('-', '+')
24 |             self.query_url = f'https://export.arxiv.org/api/query?search_query=ti:{query_title1}&max_results=1'
25 | 
26 |         export_arxiv = request.urlopen(query_url).read().decode('utf-8')
27 |         feed = feedparser.parse(export_arxiv)
28 | 
29 |         self.title = re.sub(r'[^\w\s-]', '', feed.entries[0].title)
30 |         self.authors = [author.name for author in feed.entries[0].authors]
31 |         self.abs_url_version = feed.entries[0].id
32 |         self.abs_url = self.abs_url_version[:-2]
33 |         self.pdf_url = self.abs_url.replace('abs', 'pdf')
34 |         self.id_version = self.abs_url_version[-12:]
35 |         self.year = feed.entries[0].published[:4]
36 |         self.summary = feed.entries[0].summary
37 | 
38 |         try: # try for no attribute 'arxiv_comment'
39 |             self.comment = feed.entries[0].arxiv_comment
40 |             self.conf = re.findall(rf'({CONF})', self.comment)[0]
41 |             self.conf_year = re.findall(r'(\d{4})', self.comment)[0]
42 |             self.publish = f'{self.conf} {self.conf_year}' if self.conf else f'arXiv {self.year}'
43 |         except:
44 |             self.publish = f'arXiv {self.year}'
45 | 
46 |     def write_notes(self):
47 | 
48 |         title_url = f'[{self.title}]({self.abs_url})  '
49 |         publish = f'**[`{self.publish}`]**'
50 |         authors = ', '.join(self.authors)
51 |         authors = f'*{authors}*'
52 | 
53 |         print('-', title_url)
54 |         print(' ', publish, authors, '\n')
55 | 
56 | 
57 | def read_pdf(filename, update=False):
58 |     '''
59 |     read pdf and return id
60 |     '''
61 | 
62 |     with open(filename, 'rb') as f:
63 | 
64 |         pdf = PdfReader(f)
65 | 
66 |         first_page = pdf.pages[0]
67 |         text_split = first_page.extract_text().split()
68 |         str_id = text_split[-5]
69 |         id_version_local = re.findall(r'\d{4}\.\d{5}v\d{1}', str_id)[0]
70 |         id = id_version_local[:-2]
71 | 
72 |     return id
73 | 
74 | 
75 | if __name__ == "__main__":
76 | 
77 |     root_dir = Path('./')
78 |     pdf_list = sorted(root_dir.glob('*.pdf'))
79 | 
80 |     for file in list(pdf_list):
81 |         try:
82 |             information = Information(query_title=file)
83 |             information.write_notes()
84 | 
85 |         except Exception as ex:
86 |             print('Error: ', ex)
87 |             pass
88 | 


--------------------------------------------------------------------------------
/Python+arXiv/id2md.py:
--------------------------------------------------------------------------------
 1 | from urllib import request
 2 | import re
 3 | import feedparser
 4 | from pathlib import Path
 5 | 
 6 | p = Path(__file__)
 7 | 
 8 | with open(p.parents[0] / 'conf_list.txt', 'r') as f:
 9 |     data = f.read()
10 | CONF = data.split('\n')
11 | CONF = '|'.join(CONF)
12 | 
13 | class Information():
14 |     '''
15 |     extract information from arxiv api
16 |     '''    
17 |     
18 |     def __init__(self, query_id) -> None:
19 | 
20 |         query_url = f'http://export.arxiv.org/api/query?id_list={query_id}'
21 |         export_arxiv = request.urlopen(query_url).read().decode('utf-8')
22 |         feed = feedparser.parse(export_arxiv)
23 | 
24 |         self.title = re.sub(r'[^\w\s-]', '', feed.entries[0].title)
25 |         self.authors = [author.name for author in feed.entries[0].authors]
26 |         self.abs_url_version = feed.entries[0].id
27 |         self.abs_url = self.abs_url_version[:-2]
28 |         self.pdf_url = self.abs_url.replace('abs', 'pdf')
29 |         self.id_version = self.abs_url_version[-12:]
30 |         self.year = feed.entries[0].published[:4]
31 |         self.summary = feed.entries[0].summary
32 | 
33 |         try: # try for no attribute 'arxiv_comment'
34 |             self.comment = feed.entries[0].arxiv_comment
35 |             self.conf = re.findall(rf'({CONF})', self.comment)[0]
36 |             self.conf_year = re.findall(r'(\d{4})', self.comment)[0]
37 |             self.publish = f'{self.conf} {self.conf_year}' if self.conf else f'arXiv {self.year}'
38 |         except:
39 |             self.publish = f'arXiv {self.year}'
40 | 
41 |     def write_notes(self):
42 |         '''
43 |         define the markdown format and write notes
44 |         '''
45 | 
46 |         title_url = f'[{self.title}]({self.abs_url})  '
47 |         publish = f'**[`{self.publish}`]**'
48 |         authors = ', '.join(self.authors)
49 |         authors = f'*{authors}*'
50 | 
51 |         print('-', title_url)
52 |         print(' ', publish, authors)
53 | 
54 | if __name__ == "__main__":
55 | 
56 |     while True:
57 |         id = input("type id: ")
58 | 
59 |         # 2011.13126 2210.08823
60 |         if re.match(r'\d{4}\.\d{5}', id):
61 | 
62 |             information = Information(id)
63 |             information.write_notes()
64 |         
65 |         # >>>
66 |         # - [Lifting 2D StyleGAN for 3D-Aware Face Generation](http://arxiv.org/abs/2011.13126v2)  
67 |         #   **[`CVPR 2021`]** *Yichun Shi, Divyansh Aggarwal, Anil K. Jain*


--------------------------------------------------------------------------------
/Pythonic-Standard/Config文件编写/Argparse使用指南.md:
--------------------------------------------------------------------------------
  1 | # Argparse使用指南
  2 | 
  3 | > argument parser 参数 解析器
  4 | >
  5 | > [官方介绍](https://docs.python.org/zh-cn/3/library/argparse.html)，先介绍的是不常用的ArgumentParser对象参数设定，后面才是常用的add_argument参数设定
  6 | 
  7 | 
  8 | 
  9 | ## 基本使用只有三步：
 10 | 
 11 | - 创建解析
 12 | 
 13 | ```python
 14 | import argparse
 15 | parser = argparse.ArgumentParser()
 16 | ```
 17 | 
 18 | - 添加参数
 19 | 
 20 | ```python
 21 | parser.add_argument("square", help="display a square of a given number", type=int)
 22 | ```
 23 | 
 24 | - 解析参数
 25 | 
 26 | ```python
 27 | args = parser.parse_args()
 28 | ```
 29 | 
 30 | - 使用参数
 31 | 
 32 | ```python
 33 | print(args.square ** 2)
 34 | ```
 35 | 
 36 | 
 37 | 
 38 | ## 详细讲如何添加参数
 39 | 
 40 | 
 41 | 
 42 | `add_argument()` 可以设置的选项非常多
 43 | 
 44 | ```python
 45 | parser.add_argument([名称], action=[动作], nargs=[], const=[], default=[默认值], type=[参数类型], choices=[], required=[], help=[], metavar=[], dest=[])	
 46 | ```
 47 | 
 48 | 但最常用的是
 49 | 
 50 | ```python
 51 | parser.add_argument('--square', type=int, default=1, help='display a square of a given number')
 52 | ```
 53 | 
 54 | > 为了可读性，一般都是选择使用可选参数
 55 | 
 56 | 
 57 | 
 58 | ### name
 59 | 
 60 | > 参数名是为了在使用时可以直接调用 `args.name`
 61 | 
 62 | 有两种参数名的设置方式，差异体现在名称前有没有 `--` 或者 `-` (名称缩写的时候)
 63 | 
 64 | - 必写参数 (没有`-`)：按照设置的先后顺序对应读取，调用时不用写名称
 65 | - 可选参数 (有`-`)：可以无序读取，调用时要写名称
 66 | 
 67 | 
 68 | 
 69 | ### type
 70 | 
 71 | > 默认是 `str`，指定为其他可以充当类型转换器
 72 | 
 73 | 
 74 | 
 75 | ### default
 76 | 
 77 | > 当**可选参数**没有传入值时，使用默认值
 78 | 
 79 | ```python
 80 | import argparse
 81 | parser = argparse.ArgumentParser()
 82 | parser.add_argument("--square", type=int, default=1)
 83 | args = parser.parse_args()
 84 | print(args.square ** 2)
 85 | ```
 86 | 
 87 | 运行 `python use_argparse.py` 输出 `1`
 88 | 
 89 | 运行 `python use_argparse.py --square 2` 输出 `4`
 90 | 
 91 | 
 92 | 
 93 | ### help
 94 | 
 95 | > 用来描述这个参数的目的，执行 `-h` 或者 `--help` 时会显示
 96 | 
 97 | 
 98 | 
 99 | ### action
100 | 
101 | > 指定了这个命令行参数应当如何被处理
102 | 
103 | 
104 | 
105 | 支持的操作有（只介绍主要的，其余的看文档）：
106 | 
107 | - store
108 | 
109 | 这是默认操作，我们能够使用参数，就是因为输入值被 `store` 在这个参数名里
110 | 
111 | 
112 | 
113 | - store_true / store_false
114 | 
115 | store_const 的特殊用法，存储 `True` 和 `False`
116 | 
117 | 
118 | 
119 | - append
120 | 
121 | 储存一个列表，将每个参数值添加到列表中，一般是为了允许多次使用选项，只在可选参数时使用
122 | 
123 | ```python
124 | import argparse
125 | parser = argparse.ArgumentParser()
126 | parser.add_argument("--foo")
127 | args = parser.parse_args()
128 | print(args.foo)
129 | ```
130 | 
131 | 运行 `python use_argparse.py --foo 1 --foo 2` 输出 `2`
132 | 
133 | ```python
134 | import argparse
135 | parser = argparse.ArgumentParser()
136 | parser.add_argument("--foo", action='append')
137 | args = parser.parse_args()
138 | print(args.foo)
139 | ```
140 | 
141 | 运行 `python use_argparse.py --foo 1 --foo 2` 输出 `['1', '2']`
142 | 
143 | 
144 | 
145 | - count
146 | 
147 | > 计算一个关键字参数出现的次数
148 | 
149 | ```
150 | import argparse
151 | parser = argparse.ArgumentParser()
152 | parser.add_argument('-v', action='count', default=0)
153 | args = parser.parse_args()
154 | print(args.v)
155 | ```
156 | 
157 | 运行 `python use_argparse.py -vvv` 输出 `3`
158 | 
159 | 运行 `python use_argparse.py -v -v -v` 输出 `3`
160 | 
161 | 
162 | 
163 | ### nargs
164 | 
165 | > 关联不同数目的命令行参数到单一动作，因为 `action` 是单一项目消耗单一命令行参数
166 | 
167 | - N：可以且必须传入 `N` 个参数，然后被聚集到一个列表中
168 | 
169 | ```python
170 | import argparse
171 | parser = argparse.ArgumentParser()
172 | parser.add_argument("foo", nargs=2)
173 | args = parser.parse_args()
174 | print(args.foo)
175 | ```
176 | 
177 | 运行 `python use_argparse.py 1` 输出 `error`
178 | 
179 | 运行 `python use_argparse.py 1 2` 输出 `['1', '2']`
180 | 
181 | 
182 | 
183 | - ？：首先从命令行中获取，若没有则从const中获取，仍然没有则从default中获取
184 | 
185 | 
186 | 
187 | - *、+：任意数量参数
188 | 
189 | ```python
190 | import argparse
191 | parser = argparse.ArgumentParser()
192 | parser.add_argument("foo", nargs='+')
193 | args = parser.parse_args()
194 | print(args.foo)
195 | ```
196 | 
197 | 运行 `python use_argparse.py 1` 输出 `['1']`
198 | 
199 | 运行 `python use_argparse.py 1 2` 输出 `['1', '2']`
200 | 
201 | 
202 | 
203 | 
204 | 
205 | ---
206 | 
207 | 
208 | 
209 | 通常 `Argparse` 都是在命令行，如果想要在脚本文件里直接调试怎么办呢？
210 | 
211 | **解析的时候把参数传进去**，像这样：
212 | 
213 | ```python
214 | import argparse
215 | parser = argparse.ArgumentParser()
216 | parser.add_argument('echo')
217 | args = parser.parse_args(['hello world!'])
218 | print(args.echo)
219 | ```
220 | 
221 | 如果是可选参数
222 | 
223 | ```python
224 | import argparse
225 | parser = argparse.ArgumentParser()
226 | parser.add_argument('--echo')
227 | args = parser.parse_args(['--echo', 'hello world!'])
228 | print(args.echo)
229 | ```
230 | 
231 | 两者也可以结合起来


--------------------------------------------------------------------------------
/Pythonic-Standard/Config文件编写/Config文件编写.md:
--------------------------------------------------------------------------------
 1 | # Config文件编写
 2 | 
 3 | 
 4 | 
 5 | 主要是使用[Omegaconf](https://omegaconf.readthedocs.io/en/2.2_branch/) and addict 两个库
 6 | 
 7 | [hydra](https://hydra.cc/docs/intro/)库
 8 | 
 9 | 
10 | omegaconf 是一个基于YAML的分层配置系统，
11 | 
12 | 
13 | ### addict
14 | 
15 | 像访问属性一样访问字典。
16 | 
17 | Omegaconf 也提供了addict一样的访问方式


--------------------------------------------------------------------------------
/Pythonic-Standard/Config文件编写/YAML使用指南.md:
--------------------------------------------------------------------------------
  1 | # YAML使用指南
  2 | 
  3 | > 配置文件
  4 | 
  5 | 
  6 | 
  7 | 安装并使用包
  8 | 
  9 | ```python
 10 | pip install pyyaml
 11 | import yaml
 12 | ```
 13 | 
 14 | 
 15 | 
 16 | 读取
 17 | 
 18 | ```
 19 | import yaml
 20 | 
 21 | with open('config.yaml') as f:
 22 |     config = yaml.safe_load(f)
 23 |     
 24 | print(config)
 25 | ```
 26 | 
 27 | 
 28 | 
 29 | ## YAML文件规范
 30 | 
 31 | - 区分大小写；
 32 | - 使用缩进表示层级关系；
 33 | - 使用空格键缩进，而非Tab键缩进
 34 | - 缩进的空格数目不固定，只需要相同层级的元素左侧对齐；
 35 | - 文件中的字符串不需要使用引号标注，但若字符串包含有特殊字符则需用引号标注；
 36 | - 注释标识为#
 37 | 
 38 | 
 39 | 
 40 | - 对象：键值对的集合（简称 "映射或字典"）
 41 | - 键值对用冒号 “:” 结构表示，冒号与值之间需用空格分隔
 42 | - 数组：一组按序排列的值（简称 "序列或列表"）
 43 | - 数组前加有 “-” 符号，符号与值之间需用空格分隔
 44 | - 纯量(scalars)：单个的、不可再分的值（如：字符串、bool值、整数、浮点数、时间、日期、null等）
 45 | - None值可用null可 ~ 表示
 46 | 
 47 | 
 48 | 
 49 | ## 几种类型
 50 | 
 51 | **(1) 字典**
 52 | 
 53 | ```
 54 | usr: Jerry
 55 | psw: 123456
 56 | ```
 57 | 
 58 | 解析结果：
 59 | 
 60 | ```
 61 | {'usr': 'Jerry', 'psw': 123456}
 62 | ```
 63 | 
 64 | 
 65 | 
 66 | **(2) 数组**
 67 | 
 68 | ```
 69 | - 1
 70 | - 2
 71 | - 3
 72 | ```
 73 | 
 74 | 解析结果：
 75 | 
 76 | ```
 77 | [1, 2, 3]
 78 | ```
 79 | 
 80 | 
 81 | 
 82 | **(3) 字典 嵌套 字典**
 83 | 
 84 | ```
 85 | usr1:
 86 |   name: Tom
 87 |   psw: 123456
 88 | usr2:
 89 |   name: Jerry
 90 |   psw: 123456
 91 | ```
 92 | 
 93 | 解析结果：
 94 | 
 95 | ```
 96 | {'usr1': {'name': 'Tom', 'psw': 123456}, 'usr2': {'name': 'Jerry', 'psw': 123456}}
 97 | ```
 98 | 
 99 | 
100 | 
101 | **(4) 字典 嵌套 数组**
102 | 
103 | ```
104 | usr1:
105 |   - 1
106 |   - 2
107 | usr2:
108 |   - 1
109 |   - 2
110 | ```
111 | 
112 | 解析结果：
113 | 
114 | ```
115 | {'usr1': [1, 2], 'usr2': [1, 2]}
116 | ```
117 | 
118 | 
119 | 
120 | **(5) 数组 嵌套 字典**
121 | 
122 | ```
123 | - use1: Jerry
124 |   psw: 123456
125 | - use2: Tome
126 |   psw: 123456
127 | ```
128 | 
129 | 解析结果：
130 | 
131 | ```
132 | [{'use1': 'Jerry', 'psw': 123456}, {'use2': 'Tome', 'psw': 123456}]
133 | ```
134 | 
135 | 
136 | 
137 | 
138 | 
139 | ## 基本数据类型
140 | 
141 | ```
142 | s_val: name             # 字符串：{'s_val': 'name'}
143 | spec_s_val: "name\n"    # 特殊字符串：{'spec_s_val': 'name\n'
144 | num_val: 31.14          # 数字：{'num_val': 31.14}
145 | bol_val: true           # 布尔值：{'bol_val': True}
146 | nul_val: null           # null值：{'nul_val': None}
147 | nul_val1: ~             # null值：{'nul_val1': None}
148 | time_val: 2018-03-01t11:33:22.55-06:00     # 时间值(iso8601格式)：{'time_val': datetime.datetime(2018, 3, 1, 17, 33, 22, 550000)}
149 | date_val: 2019-01-10    # 日期值：{'date_val': datetime.date(2019, 1, 10)}
150 | ```
151 | 
152 | 
153 | 
154 | 


--------------------------------------------------------------------------------
/Pythonic-Standard/Config文件编写/config.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import yaml
 3 | import addict
 4 | import argparse
 5 | from omegaconf import OmegaConf
 6 | 
 7 | 
 8 | class ConfigDict(addict.Dict):
 9 |     # Borrowed from https://github.com/open-mmlab/mmcv/blob/master/mmcv/utils/config.py
10 |     def __missing__(self, name):
11 |         raise KeyError(name)
12 |     def __getattr__(self, name):
13 |         try:
14 |             value = super().__getattr__(name)
15 |         except KeyError:
16 |             ex = AttributeError(f"'{self.__class__.__name__}' object has no attribute '{name}'")
17 |         except Exception as e:
18 |             ex = e
19 |         else:
20 |             return value
21 |         raise ex
22 | 
23 | def load_config(path: str):
24 |     cfg = OmegaConf.load(path)
25 |     cfg = ConfigDict(OmegaConf.to_container(cfg, resolve=True)) # Resolve in advance.
26 |     return cfg


--------------------------------------------------------------------------------
/Pythonic-Standard/Config文件编写/config.yaml:
--------------------------------------------------------------------------------
1 | s_val: name 
2 | spec_s_val: "name"
3 | num_val: 31.14          
4 | bol_val: true           
5 | nul_val: null          
6 | nul_val1: ~            
7 | time_val: 2018-03-01t11:33:22.55-06:00   
8 | date_val: 2019-01-10


--------------------------------------------------------------------------------
/Pythonic-Standard/Config文件编写/use_argparse.py:
--------------------------------------------------------------------------------
 1 | '''argparse --- 命令行选项、参数和子命令解析器
 2 | argparse 模块可以让人轻松编写用户友好的命令行接口。
 3 | 程序定义它需要的参数，然后 argparse 将弄清如何从 sys.argv 解析出那些参数。 
 4 | argparse 模块还会自动生成帮助和使用手册，并在用户给程序传入无效参数时报出错误信息。
 5 | '''
 6 | 
 7 | import argparse
 8 | parser = argparse.ArgumentParser()
 9 | parser.add_argument('--echo')
10 | args, unknown = parser.parse_known_args()
11 | print(args, unknown)
12 | 
13 | 


--------------------------------------------------------------------------------
/Pythonic-Standard/Config文件编写/use_omegaconf.py:
--------------------------------------------------------------------------------
 1 | from omegaconf import OmegaConf
 2 | import addict 
 3 | 
 4 | # class ConfigDict(addict.Dict):
 5 | #     # Borrowed from https://github.com/open-mmlab/mmcv/blob/master/mmcv/utils/config.py
 6 | #     def __missing__(self, name):
 7 | #         raise KeyError(name)
 8 | #     def __getattr__(self, name):
 9 | #         try:
10 | #             value = super().__getattr__(name)
11 | #         except KeyError:
12 | #             ex = AttributeError(f"'{self.__class__.__name__}' object has no attribute '{name}'")
13 | #         except Exception as e:
14 | #             ex = e
15 | #         else:
16 | #             return value
17 | #         raise ex
18 | 
19 | # def load_config(path: str):
20 | #     cfg = OmegaConf.load(path)
21 | #     cfg = ConfigDict(OmegaConf.to_container(cfg, resolve=True)) # Resolve in advance.
22 | #     return cfg
23 | 
24 | conf = OmegaConf.load('config.yaml')
25 | 
26 | print(conf.s_val)
27 | 
28 | # print(load_config('config.yaml').s_val)


--------------------------------------------------------------------------------
/Pythonic-Standard/Config文件编写/use_yaml.py:
--------------------------------------------------------------------------------
1 | import yaml
2 | 
3 | with open('config.yaml') as f:
4 |     config = yaml.safe_load(f)
5 |     
6 | print(config)
7 | 


--------------------------------------------------------------------------------
/Pythonic-Standard/Partial使用指南.md:
--------------------------------------------------------------------------------
 1 | # functools.partial (偏函数) 使用指南
 2 | 
 3 | ```python
 4 | def add(*args):
 5 |     return sum(args)
 6 | ```
 7 | 
 8 | 
 9 | 
10 | 
11 | 
12 | partial() 被用作 “冻结” 函数的输入参数，同时返回一个新的具有原功能的函数。也就是提前输入了一部分参数，等效于最后一起输入。


--------------------------------------------------------------------------------
/Pythonic-Standard/Pathlib 使用指南.md:
--------------------------------------------------------------------------------
  1 | # Pathlib 使用指南
  2 | 
  3 | > 背景介绍：不同的操作系统，会使用不同的斜杠来划分路径，
  4 | 
  5 | Different operation uses slash `/` and backslash(also called a hack, reverse slash) `\`
  6 | 
  7 | 
  8 | 
  9 | `\` 会表示很多转义的含义
 10 | 
 11 | 
 12 | 
 13 | `.` 表示当前路径，通常也可以省略
 14 | 
 15 | `..` 表示父目录
 16 | 
 17 | 
 18 | 
 19 | ## New
 20 | 
 21 | ```python
 22 | from pathlib import Path
 23 | 
 24 | data_folder = Path("source_data/text_files/")
 25 | 
 26 | file_to_open = data_folder / "raw_data.txt"
 27 | ```
 28 | 
 29 | 
 30 | 
 31 | additional
 32 | 
 33 | ```python
 34 | from pathlib import Path
 35 | 
 36 | filename = Path("source_data/text_files/raw_data.txt")
 37 | 
 38 | print(filename.name)
 39 | # prints "raw_data.txt"
 40 | 
 41 | print(filename.suffix)
 42 | # prints "txt"
 43 | 
 44 | print(filename.stem)
 45 | # prints "raw_data"
 46 | 
 47 | if not filename.exists():
 48 |     print("Oops, file doesn't exist!")
 49 | else:
 50 |     print("Yay, the file exists!")
 51 | ```
 52 | 
 53 | 
 54 | 
 55 | 
 56 | 
 57 | ```python
 58 | Path.cwd()  # 返回当前路径
 59 | Path.resolve()  # 将路径绝对化
 60 | ```
 61 | 
 62 | 
 63 | 
 64 | ## Old
 65 | 
 66 | ```python
 67 | import os
 68 | 
 69 | data_folder = os.path.join("source_data", "text_files")
 70 | 
 71 | file_to_open = os.path.join(data_folder, "raw_data.txt")
 72 | ```
 73 | 
 74 | 
 75 | 
 76 | 使用 `pathlib` 进行路径的拆分
 77 | 
 78 | 
 79 | 
 80 | ```python
 81 | from pathlib import Path
 82 | 
 83 | path = "/foo/bar/baz/file"
 84 | path_split = Path(path).parts
 85 | path_split
 86 | ```
 87 | 
 88 | ```python
 89 | ('/', 'foo', 'bar', 'baz', 'file')
 90 | ```
 91 | 
 92 | 
 93 | 
 94 | os.path.split 将文件路径和文件名分开
 95 | 
 96 | ('/home/ubuntu/python', 'example.py')
 97 | 
 98 | .name .parent
 99 | 
100 | 
101 | 
102 | 
103 | 
104 | 与 os.path.split 的对照表
105 | 
106 | https://docs.python.org/zh-cn/3/library/pathlib.html#correspondence-to-tools-in-the-os-module
107 | 
108 | 
109 | 
110 | 
111 | 
112 | 
113 | 
114 | 
115 | 
116 | ## 前缀 `stem` 和后缀 `suffix`
117 | 
118 | ```shell
119 | >>> PurePosixPath('my/library.tar').stem
120 | 'library'
121 | ```
122 | 
123 | ```shell
124 | >>> PurePosixPath('my/library.tar').suffix
125 | '.tar'
126 | ```
127 | 
128 | 
129 | 
130 | ## 修改前缀 `with_stem` & **修改后缀 `with_suffix`**
131 | 
132 | ```shell
133 | >>> p = PureWindowsPath('c:/Downloads/draft.txt')
134 | >>> p.with_stem('final')
135 | PureWindowsPath('c:/Downloads/final.txt')
136 | ```
137 | 
138 | ```shell
139 | >>> p = PureWindowsPath('README.md')
140 | >>> p.with_suffix('.txt')
141 | PureWindowsPath('README.txt')
142 | ```
143 | 
144 | 


--------------------------------------------------------------------------------
/Pythonic-Standard/Python下划线含义.md:
--------------------------------------------------------------------------------
 1 | 通常会见到5种下划线的情况
 2 | 
 3 | 
 4 | 
 5 | |        类型        |   表示    |                        含义                        |
 6 | | :----------------: | :-------: | :------------------------------------------------: |
 7 | |    单前导下划线    |  `_var`   | 一种约定俗成的，解释器不会有额外操作，仅供内部使用 |
 8 | |    单末尾下划线    |  `var_`   |              为避免和python关键字冲突              |
 9 | |    双前导下划线    |  `__var`  |                     名称修饰器                     |
10 | | 双前导和末尾下划线 | `__var__` |        python一些自定义的方法，自己不要使用        |
11 | |      单下划线      |    `_`    |                 临时无意义的空变量                 |
12 | 
13 | 


--------------------------------------------------------------------------------
/Pythonic-Standard/decorator装饰器.md:
--------------------------------------------------------------------------------
 1 | # Decorator 装饰器用法
 2 | 
 3 | 参考 https://www.zhihu.com/question/26930016
 4 | 
 5 | 
 6 | 
 7 | 
 8 | 
 9 | ```python
10 | def decorator(func):
11 |     def wrapper(*args, **kwargs):
12 |         print('123')
13 |         return func(*args, **kwargs)
14 | 
15 |     return wrapper
16 | 
17 | @decorator
18 | def say_hello():
19 |     print('同学你好')
20 | 
21 | # 相当于执行了 say_hello = decorator(say_hello)
22 |     
23 | say_hello()
24 | 
25 | >>> 123
26 | 	同学你好
27 | ```
28 | 
29 | 
30 | 
31 | 
32 | 
33 | ```python
34 | def info(value):
35 |     def decorator(func):
36 |         def wrapper(*args, **kwargs):
37 |             print(value)
38 |             return func(*args, **kwargs)
39 | 
40 |         return wrapper
41 | 
42 |     return decorator
43 | 
44 | @info('456')
45 | def say_hello():
46 |     print('同学你好')
47 | 
48 | say_hello()
49 | 
50 | ```
51 | 
52 | 


--------------------------------------------------------------------------------
/Pythonic-Standard/import.md:
--------------------------------------------------------------------------------
1 | https://stackoverflow.com/questions/16981921/relative-imports-in-python-3?page=1&tab=scoredesc#tab-top
2 | 
3 | https://stackoverflow.com/questions/14132789/relative-imports-for-the-billionth-time/14132912#14132912
4 | 


--------------------------------------------------------------------------------
/Pythonic-Standard/multi-level/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yzy1996/Python-Code/2abcaa6fbfa4a84aaffdf10d7bcc6b12649dd221/Pythonic-Standard/multi-level/__init__.py


--------------------------------------------------------------------------------
/Pythonic-Standard/multi-level/file1.py:
--------------------------------------------------------------------------------
1 | import os
2 | import sys
3 | 
4 | # from folder1.folder22 import file221
5 | from folder1.folder11 import file111
6 | 


--------------------------------------------------------------------------------
/Pythonic-Standard/multi-level/file2.py:
--------------------------------------------------------------------------------
1 | print('hello file2')


--------------------------------------------------------------------------------
/Pythonic-Standard/multi-level/folder1/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yzy1996/Python-Code/2abcaa6fbfa4a84aaffdf10d7bcc6b12649dd221/Pythonic-Standard/multi-level/folder1/__init__.py


--------------------------------------------------------------------------------
/Pythonic-Standard/multi-level/folder1/file11.py:
--------------------------------------------------------------------------------
1 | import os
2 | import sys
3 | sys.path.append(os.getcwd())
4 | 
5 | print('hello file11')
6 | 


--------------------------------------------------------------------------------
/Pythonic-Standard/multi-level/folder1/file12.py:
--------------------------------------------------------------------------------
1 | 
2 | 
3 | 
4 | 
5 | print('hello file12')


--------------------------------------------------------------------------------
/Pythonic-Standard/multi-level/folder1/folder11/file111.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import sys
 3 | sys.path.append(os.getcwd())
 4 | 
 5 | 
 6 | print(sys.path)
 7 | 
 8 | # print('hello file111')
 9 | 
10 | # from ..folder22 import file221
11 | from folder1.folder11.file112 import foo
12 | 
13 | # print(sys.path)


--------------------------------------------------------------------------------
/Pythonic-Standard/multi-level/folder1/folder11/file112.py:
--------------------------------------------------------------------------------
1 | print('hello file112')
2 | 
3 | def foo():
4 |     print('1')


--------------------------------------------------------------------------------
/Pythonic-Standard/multi-level/folder1/folder22/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yzy1996/Python-Code/2abcaa6fbfa4a84aaffdf10d7bcc6b12649dd221/Pythonic-Standard/multi-level/folder1/folder22/__init__.py


--------------------------------------------------------------------------------
/Pythonic-Standard/multi-level/folder1/folder22/file221.py:
--------------------------------------------------------------------------------
1 | import os
2 | import sys
3 | sys.path.append(os.getcwd())
4 | 
5 | print('hello file221')
6 | 
7 | 


--------------------------------------------------------------------------------
/Pythonic-Standard/multi-level/folder2/file21.py:
--------------------------------------------------------------------------------
1 | 
2 | 
3 | print('hello file21')


--------------------------------------------------------------------------------
/Pythonic-Standard/multi-level/set_env.sh:
--------------------------------------------------------------------------------
1 | # put project directory into PYTHONPATH
2 | 
3 | DIR="$(pwd)"
4 | export PYTHONPATH="${DIR}":$PYTHONPATH
5 | echo "added $DIR to PYTHONPATH"


--------------------------------------------------------------------------------
/Pythonic-Standard/print 输出.md:
--------------------------------------------------------------------------------
 1 | ```python
 2 | import sys
 3 | import os
 4 | class Logger(object):
 5 |     def __init__(self, fileN="default.log"):
 6 |         self.terminal = sys.stdout
 7 |         self.log = open(fileN, "a")
 8 |  
 9 |     def write(self, message):
10 |         self.terminal.write(message)
11 |         self.log.write(message)
12 |  
13 |     def flush(self):
14 |         pass
15 |     
16 |  # print 前调用 
17 |  sys.stdout = Logger('datalog.txt')
18 | ```
19 | 
20 | 


--------------------------------------------------------------------------------
/Pythonic-Standard/tqdm使用指南.md:
--------------------------------------------------------------------------------
  1 | # tqdm 使用指南
  2 | 
  3 | 官方网站：https://tqdm.github.io/
  4 | 
  5 | 
  6 | 
  7 | ## 基本功能
  8 | 
  9 | `tqdm` 主要是能够显示迭代训练的进度条，先来看例子1
 10 | 
 11 | ```python
 12 | from tqdm import tqdm
 13 | 
 14 | for char in tqdm(["a", "b", "c", "d"]):
 15 |     pass
 16 | ```
 17 | 
 18 | 因为 tqdm() 里的对象是一个 list，是一个可迭代对象，因此上述操作等价于
 19 | 
 20 | ```python
 21 | from tqdm import tqdm
 22 | 
 23 | for char in ["a", "b", "c", "d"]:
 24 |     pass
 25 | ```
 26 | 
 27 | 所以这里先建立一种感觉，就是你第一遍还是写正常的迭代循环代码，然后改成`tqdm`进度条形式。
 28 | 
 29 | 
 30 | 
 31 | ## 更复杂的例子
 32 | 
 33 | 接着我们看一些更复杂的例子2
 34 | 
 35 | ```python
 36 | # 有一个等价 range 的 trange
 37 | 
 38 | from tqdm import trange
 39 | 
 40 | # 等同于 for i in tqdm(range(10)):
 41 | for i in trange(10):
 42 |     pass
 43 | ```
 44 | 
 45 | 如果要给进度条加一点说明
 46 | 
 47 | ```python
 48 | from tqdm import tqdm
 49 | from time import sleep
 50 | 
 51 | pbar = tqdm(["a", "b", "c", "d"])
 52 | for char in pbar:
 53 |     sleep(0.25)
 54 |     pbar.set_description("Processing %s" % char) # 出现在最前面
 55 | ```
 56 | 
 57 | 让上面代码更加优雅一点呢，似乎也并没有
 58 | 
 59 | ```python
 60 | from tqdm import tqdm
 61 | from time import sleep
 62 | 
 63 | with tqdm(["a", "b", "c", "d"]) as pbar:
 64 |     for char in pbar:
 65 |         sleep(0.25)
 66 |         pbar.set_description("Processing %s" % char) # 出现在最前面
 67 | ```
 68 | 
 69 | 所以这里引入了一个 with ... as ... 结构，在python里实际是为了简化自动清理流程的，例如读取文件时候的open close
 70 | 
 71 | 而这里如果 tqdm 内部是一个可迭代对象，则是会自动用完释放掉的，既然如此，那也就对应着没有自动释放掉的方法
 72 | 
 73 | ```python
 74 | from tqdm import tqdm
 75 | from time import sleep
 76 | 
 77 | pbar = tqdm(total=100)
 78 | for i in range(10):
 79 |     sleep(0.1)
 80 |     pbar.update(10)
 81 | pbar.close()
 82 | ```
 83 | 
 84 | 上面代码的含义是，意思是每隔10步更新一次，每次更新10，所以进度条就是10+10+10... 这里替换为with结构的话就成了
 85 | 
 86 | ```python
 87 | from tqdm import tqdm
 88 | from time import sleep
 89 | 
 90 | with tqdm(total=100) as pbar:
 91 |     for i in range(10):
 92 |         sleep(0.1)
 93 |         pbar.update(10)
 94 | ```
 95 | 
 96 | 
 97 | 
 98 | 再回去看例子2，到底有没有更加优雅的方式呢
 99 | 
100 | ```python
101 | from tqdm import tqdm
102 | from time import sleep
103 | 
104 | for char in (pbar := tqdm(["a", "b", "c", "d"])):
105 |     sleep(0.25)
106 |     pbar.set_description(f"Processing {char}") # 出现在最前面
107 | ```
108 | 
109 | 利用了海象运算符，可以在 if 语句本身中声明并分配值
110 | 
111 | **这也是最推荐的一种使用样式！！！**
112 | 
113 | 
114 | 
115 | ## 进阶细节
116 | 
117 | 自定义初始值和总值
118 | 
119 | ```python
120 | from tqdm import trange
121 | from time import sleep
122 | 
123 | epoch = 0
124 | with trange(epoch, 10, initial=epoch, total=10) as pbar:
125 |     for epoch in pbar:
126 |         sleep(1)
127 | ```
128 | 
129 | `trange(start, end, initial, total)`
130 | 
131 | 如果不加total，则显示的总数将是 end-start，但会从initial开始
132 | 
133 | 如果不加initial，则会从0开始，然后总数是 end-start
134 | 
135 | 
136 | 
137 | 更加优雅的写法
138 | 
139 | ```python
140 | from tqdm import trange
141 | from time import sleep
142 | 
143 | epoch = 0
144 | 
145 | for epoch in (pbar := trange(epoch, 10, initial=epoch, total=10):
146 |     sleep(1)
147 | ```
148 | 
149 | 好处是不用对epoch再进行任何判断操作，从中断开始，自动结束
150 | 
151 | 
152 | 
153 | tqdm 后依旧是一个可迭代对象，因此可以继续放心搭配 enumerate 使用
154 | 
155 | 
156 | 
157 | 
158 | 
159 |  ## 双循环
160 | 
161 | ```python
162 | from tqdm.auto import trange
163 | from time import sleep
164 | 
165 | for i in trange(4, desc='1st loop'):
166 |     for j in trange(5, desc='2nd loop'):
167 |         for k in trange(50, desc='3rd loop', leave=False):
168 |             sleep(0.01)
169 | ```
170 | 
171 |  
172 | 
173 | 最后我们再回过头看一些基本概念;
174 | 
175 | tqdm 内部是一个可迭代对象。与迭代器不同的是，我们可以遍历一个 range 对象而不「消耗」它。
176 | 
177 | 
178 | 
179 | ## 自用模板
180 | 
181 | 我在训练网络的时候，首先会有一个 epoch，然后对于每一个 epoch，都会把数据都迭代一遍，因此内循环用iteration表示，总共的迭代次数就是 epoch*iteration
182 | 
183 | ```python
184 | from tqdm import tqdm
185 | 
186 | epoch = 0
187 | 
188 | while epoch < 10:
189 |     epoch += 1
190 |     with tqdm(train_loader, desc=f'Epoch {epoch}/10') as pbar:
191 |         for i, (images, _) in enumerate(pbar):
192 | 
193 |             pbar.set_postfix(loss=i)
194 | ```
195 | 
196 | 再来说一下我们的目标：
197 | 
198 | - 在哪一个epoch
199 | - 每一个epoch里的迭代iteration
200 | 
201 | 
202 | 
203 | 


--------------------------------------------------------------------------------
/Pythonic-Standard/切片.md:
--------------------------------------------------------------------------------
  1 | #! https://zhuanlan.zhihu.com/p/426435347
  2 | # 切片(slice) 省略(Ellipsis) None对象
  3 | 
  4 | > 一定要看到最后
  5 | 
  6 | 一般我们要对一个列表切片
  7 | 
  8 | ```python
  9 | a = [1, 2, 3, 4, 5, 6]
 10 | a[1:3]
 11 | >>> [2, 3]
 12 | 
 13 | # 其实完整写法是
 14 | a[1:3:1]
 15 | >>> [2, 3]
 16 | ```
 17 | 
 18 | 所以方括号中的含义是 `[start: stop: step]`，如果省略 start，默认就是从开头（索引0）；如果省略 stop，默认就是到末尾（索引-1）；如果省略 step，默认就是步长为1。
 19 | 
 20 | 因此几个常见但让人摸不着头脑的用法是：
 21 | 
 22 | ```python
 23 | a = [1, 2, 3, 4, 5, 6]
 24 | 
 25 | a[:] # 全省略，就是从头到尾间隔1，即原始
 26 | >>> [1, 2, 3, 4, 5, 6]
 27 | 
 28 | a[::-1] # 从头到尾步长-1，即倒序
 29 | >>> [6, 5, 4, 3, 2, 1]
 30 | 
 31 | a[-1::] # 从倒数第一个开始到最后一个，即最后一个
 32 | >>> [6]
 33 | ```
 34 | 
 35 | ## 引入`slice()`
 36 | 
 37 | **slice()** 是 python 中的一个内置函数，实现切片对应，一般用在较为复杂的代码开头提前定义好切片的样式；等价于常用的简易版`:`用法。我们来重复上述操作
 38 | 
 39 | ```python
 40 | myslice = slice(1,3)
 41 | a[myslice]
 42 | >>> [2, 3]
 43 | 
 44 | myslice = slice(None,3)
 45 | a[myslice]
 46 | >>> [1, 2, 3]
 47 | ```
 48 | 
 49 | 可以发现，使用slice时，空的部分需要写为None。
 50 | 
 51 | 我们来看对**多维数组**的一些操作
 52 | 
 53 | ```python
 54 | import numpy as np
 55 | a = np.arange(9).reshape(3,3) 
 56 | >>> array([[0, 1, 2],
 57 |            [3, 4, 5],
 58 |            [6, 7, 8]])
 59 | 
 60 | # 取出第一行所有元素
 61 | a[0]
 62 | >>> array([0, 1, 2])
 63 | 
 64 | # 取出前两行所有元素
 65 | a[:2]
 66 | >>> array([[0, 1, 2],
 67 |            [3, 4, 5]])
 68 | 
 69 | # 取出第二列所有元素
 70 | a[:,1]
 71 | >>> array([1, 4, 7])
 72 | 
 73 | # 取出后两列所有元素
 74 | a[:,1:]
 75 | >>> array([[1, 2],
 76 |            [4, 5],
 77 |            [7, 8]])
 78 | ```
 79 | 
 80 | 在上述后面两个用法中，索引逗号前面的 `:` 代表了所有行。
 81 | 
 82 | ## 引入`...(ellipsis)`
 83 | 
 84 | 还要补充一个关于`...(ellipsis) ` 的用法，表示
 85 | 
 86 | ```python
 87 | import numpy as np
 88 | a = np.arange(9).reshape(3,3) 
 89 | 
 90 | # 取出第二列所有元素
 91 | a[...,1]
 92 | >>> array([1, 4, 7])
 93 | ```
 94 | 
 95 | 似乎并没有简化，那我们再看一个例子
 96 | 
 97 | ```python
 98 | import numpy as np
 99 | a = np.arange(27).reshape(3,3,3)
100 | >>> array([[[ 0,  1,  2],
101 |             [ 3,  4,  5],
102 |             [ 6,  7,  8]],
103 |            
104 |            [[ 9, 10, 11],
105 |             [12, 13, 14],
106 |             [15, 16, 17]],
107 |            
108 |            [[18, 19, 20],
109 |             [21, 22, 23],
110 |             [24, 25, 26]]])
111 | 
112 | # 取出第二个块所有元素
113 | a[1,:,:]
114 | >>> array([[ 9, 10, 11],
115 |            [12, 13, 14],
116 |            [15, 16, 17]])
117 | 
118 | # 等价于
119 | a[1,...]
120 | >>> array([[ 9, 10, 11],
121 |            [12, 13, 14],
122 |            [15, 16, 17]])
123 | ```
124 | 
125 | ellipsis会自动帮我们去填充，有点像tensor里的 -1 用法。
126 | 
127 | 最后我们来看一点高级用法
128 | 
129 | ```python
130 | import numpy as np
131 | a = np.arange(27).reshape(3,3,3)
132 | 
133 | a[None].shape
134 | >>> (1, 3, 3, 3)
135 | # 这里的`None`表示在指定位置(此处是第一维)添加一维。
136 | 
137 | a[:,None].shape
138 | >>> (3, 1, 3, 3)
139 | # 这里的`None`表示在指定位置(此处是第二维)添加一维。
140 | 
141 | a[:,:,:,None].shape
142 | >>> (3, 3, 3, 1)
143 | # 等价于
144 | a[...,None].shape
145 | >>> (3, 3, 3, 1)
146 | 
147 | a[:,None,None].shape
148 | >>> (3, 1, 1, 3, 3)
149 | ```
150 | 
151 | ## 引入`None对象`
152 | 
153 | 更复杂的情况
154 | 
155 | ```python
156 | import numpy as np
157 | a = np.arange(27).reshape(3,3,3)
158 | 
159 | a[:,None,...,None,None,None].shape
160 | >>> (3, 1, 3, 3, 1, 1, 1)
161 | 
162 | # 等价于
163 | myslice = (slice(None), None,) + (...,) + (None,)*3
164 | >>> (slice(None, None, None), None, Ellipsis, None, None, None)
165 | a[myslice].shape
166 | >>> (3, 1, 3, 3, 1, 1, 1)
167 | ```
168 | 
169 | 这样可以自定义更多的复杂组合，但必须使用到 `slice()` 而不是 `:`
170 | 
171 | **这是因为 python 中的 None是一个对象，":" 是一个 slice() 对象，"..." 是一个 ellipse对象，所以是能拼一起或者各种操作的。**
172 | 
173 | 


--------------------------------------------------------------------------------
/Pythonic-Standard/参数 传参 可变参数.md:
--------------------------------------------------------------------------------
 1 | '''
 2 | 
 3 | 位置参数（positional argument） 必须传入
 4 | 
 5 | 默认参数 可传或者不传就用默认参数
 6 | 
 7 | 可变参数 
 8 | 
 9 | \---
10 | 
11 | *args 是一个 tuple， 变量名 args 指向这个 tuple 对象
12 | 
13 | **kwargs 是一个 dict， 变量名 kwargs 指向这个 dict 对象
14 | 
15 | \---
16 | 
17 | 关键词参数（keyword argument）
18 | 
19 | 
20 | 
21 | '''
22 | 
23 | 
24 | 
25 | def foo(first, *args, **kwargs):
26 | 
27 |   print(first)
28 | 
29 |   print(args)
30 | 
31 |   print(kwargs)
32 | 
33 | 
34 | 
35 | \# foo(1, 3, a=1)
36 | 
37 | 
38 | 
39 | '''
40 | 
41 | 
42 | 
43 | '''
44 | 
45 | def foo1(a, second = 'hello'):
46 | 
47 |   print(a, second)
48 | 
49 | 
50 | 
51 | foo1(3, second='123')
52 | 
53 | 
54 | 
55 | def foo2(first, second):
56 | 
57 |   print(first, second)
58 | 
59 | 
60 | 
61 | str = ['hello', 'world']
62 | 
63 | 
64 | 
65 | \# foo2(*str)
66 | 
67 | 
68 | 
69 | \# print(*str) # 拆开了 str
70 | 
71 | 
72 | 
73 | 
74 | 
75 | def foo(k, *args, **kwargs):
76 | 
77 |   print(k)
78 | 
79 |   print(args)
80 | 
81 |   print(kwargs)
82 | 
83 | 
84 | 
85 | embed_kwargs = {'a': 1, 'b': 2}
86 | 
87 | 
88 | 
89 | def foo2(**kwargs):
90 | 
91 |   print(kwargs)
92 | 
93 | 
94 | 
95 | foo2(global_step=2)


--------------------------------------------------------------------------------
/Pythonic-Standard/基础使用.md:
--------------------------------------------------------------------------------
 1 | # 基础使用
 2 | 
 3 | > 这里不记录常识，只记录一些值得留意的用法
 4 | 
 5 | 
 6 | 
 7 | ## 字典
 8 | 
 9 | **defaultdict**
10 | 
11 | > 当字典中的key不存在时，返回不再是keyError，而是一个默认值，这个默认值是int, list，set, str等。也就是返回一个空值。
12 | 
13 | | 类型 | 返回值 |
14 | | :--: | :----: |
15 | | int  |   0    |
16 | | str  |   ‘’   |
17 | | list |   []   |
18 | 
19 | **计数**
20 | 
21 | ```python
22 | from collections import Counter
23 | 
24 | count_dict = Counter(count_total)
25 | 
26 | print('成功:', count_dict.get(0, 0))
27 | ```
28 | 
29 | 
30 | 
31 | ## pikle
32 | 
33 | > 持久化，也就是存到本地
34 | 
35 | 就是将python数据存为bytes文件，主要用法是 `pikle.dump`, `pikle.load`，使用的时候需要搭配 `rb,wb`。
36 | 
37 | ```python
38 | import pickle
39 | 
40 | with open('data.pickle', 'wb') as f:
41 |     pickle.dump(data, f)
42 | 
43 | with open('data.pickle', 'rb') as f:
44 |     data = pickle.load(f)
45 | ```
46 | 
47 | 不仅可以存单个数据，也可以存自定义的类
48 | 
49 | 
50 | 
51 | 
52 | 
53 | 


--------------------------------------------------------------------------------
/Pythonic-Standard/字典的实用.md:
--------------------------------------------------------------------------------
  1 | 
  2 | 
  3 | ## 字典
  4 | 
  5 | **defaultdict**
  6 | 
  7 | > 当字典中的key不存在时，返回不再是keyError，而是一个默认值，这个默认值是int, list，set, str等。也就是返回一个空值。
  8 | 
  9 | | 类型 | 返回值 |
 10 | | :--: | :----: |
 11 | | int  |   0    |
 12 | | str  |   ‘’   |
 13 | | list |   []   |
 14 | 
 15 | 
 16 | 
 17 | 字典用函数作为值
 18 | 
 19 | ```python
 20 | items = {
 21 | 		'laptop': lambda x: x * 600, 
 22 |   	'raspberry pi': lambda x: x * 5,
 23 | 		'arduino': lambda x: x * 50
 24 | 	}
 25 | 
 26 | quantity = 4
 27 | value of laptop = items['laptop'](quantity)
 28 | print(f'value of {quantity} laptops = ${value_of_laptop}')
 29 | ```
 30 | 
 31 | 
 32 | 
 33 | 
 34 | 
 35 | 有哪些好用的库
 36 | 
 37 | 字典库easydict和addict
 38 | 
 39 | ```bash
 40 | pip install easydict
 41 | pip install addict
 42 | ```
 43 | 
 44 | 
 45 | 
 46 | 基本使用
 47 | 
 48 | ```python
 49 | >>> from addict import Dict
 50 | >>> mapping = Dict()
 51 | >>> mapping.a.b.c.d.e = 2
 52 | >>> mapping
 53 | {'a': {'b': {'c': {'d': {'e': 2}}}}}
 54 | ```
 55 | 
 56 | 
 57 | 
 58 | 
 59 | 
 60 | 
 61 | 
 62 | 针对json
 63 | 
 64 | 
 65 | 
 66 | ```python
 67 | >>> from easydict import EasyDict as edict
 68 | >>> from simplejson import loads
 69 | >>> j = """{
 70 | "Buffer": 12,
 71 | "List1": [
 72 |     {"type" : "point", "coordinates" : [100.1,54.9] },
 73 |     {"type" : "point", "coordinates" : [109.4,65.1] },
 74 |     {"type" : "point", "coordinates" : [115.2,80.2] },
 75 |     {"type" : "point", "coordinates" : [150.9,97.8] }
 76 | ]
 77 | }"""
 78 | >>> d = edict(loads(j))
 79 | >>> d.Buffer
 80 | 12
 81 | >>> d.List1[0].coordinates[1]
 82 | 54.9
 83 | 
 84 | ```
 85 | 
 86 | 
 87 | 
 88 | 
 89 | 
 90 | 
 91 | 
 92 | 优先推荐使用 addict
 93 | 
 94 | 
 95 | 
 96 | 
 97 | 
 98 | 
 99 | 
100 | treelib
101 | 


--------------------------------------------------------------------------------
/Pythonic-Standard/类.md:
--------------------------------------------------------------------------------
 1 | ```python
 2 | class MyClass(object):
 3 |     def __init__(self, value1, value2):
 4 |         self.value1 = value1
 5 |         self.value2 = value2
 6 | 
 7 | class MySubClass(MyClass):
 8 |     def __init__(self, *args, **kwargs):
 9 |         super().__init__(*args, **kwargs)
10 | 
11 | 
12 | my_class = MyClass(1, 2)
13 | print(my_class.value1, my_class.value2)
14 | 
15 | my_subclass1 = MySubClass(3, 4)
16 | print(my_subclass1.value1, my_subclass1.value2)
17 | 
18 | my_subclass2 = MySubClass(3, value2=4)
19 | print(my_subclass2.value1, my_subclass2.value2)
20 | ```
21 | 
22 | 
23 | 
24 | 写 `*args, **kwargs` 只是为了在继承的时候，既可以直接给值，也可以是key-value的形式
25 | 
26 | 
27 | 
28 | 
29 | 
30 | 类的几种特殊方法
31 | 
32 | ```python
33 | class DictDemo:
34 |       def __init__(self,key,value):
35 |             self.dict = {}
36 |             self.dict[key] = value
37 |       def __getitem__(self,key):
38 |             return self.dict[key]
39 |       def __setitem__(self,key,value):
40 |             self.dict[key] = value
41 | dictDemo = DictDemo('key0','value0')
42 | print(dictDemo['key0']) #value0
43 | dictDemo['key1'] = 'value1'
44 | print(dictDemo['key1']) #value1
45 | ```
46 | 
47 | 
48 | 
49 | 
50 | 
51 | `__getitem__`
52 | 
53 | 当采用索引的方式取值，就会自动执行 这个方法
54 | 
55 | 用法在定义的时候会传入一个 key 的参数
56 | 
57 | ```
58 | def __getitem__(self, key):
59 | ```
60 | 
61 | 然后没当 这样调用类的时候就会执行，`Class实例[key] `
62 | 
63 | 
64 | 
65 | 
66 | 
67 | `__setitem__`
68 | 
69 | 当采用字典的方式赋值时，就会自动调用这个方法
70 | 
71 | 
72 | 
73 | `__lenth__`
74 | 
75 | 使用内建函数len()，就自动调用这个
76 | 
77 | 
78 | 
79 | 
80 | 
81 | `__call__` 是为了像函数一样调用它
82 | 
83 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | <h1 align="center">Python-Code</h1>
 2 | <div align="center">
 3 | </div>
 4 | A collection of the python code that I have used.
 5 | 
 6 | 
 7 | 
 8 | ## [Standard](./Pythonic-Standard)
 9 | 
10 | The standard of using python! Make python code more pythonic!
11 | 
12 | ## [Algorithm](./Python+Algorithm)
13 | 
14 | including：
15 | 
16 | - Geometric
17 | - Least Squares
18 | - Optimization algorithm
19 | - sorting algorithm
20 | 
21 | ## [Data Processing](./Data-Processing)
22 | 
23 | including：
24 | 
25 | - csv
26 | - docx
27 | - txt
28 | - excel
29 | - pdf
30 | 
31 | ## [Python+HTML](./Python+HTML)
32 | 
33 | A tutorial for using html to show python console, very nice!
34 | 
35 | ## [Python+Opencv](./Python+Opencv)
36 | 
37 | Some tutorials using OpenCV
38 | 
39 | 
40 | 
41 | > Others I think are not worth mentioned, hope you can find what you want.
42 | 
43 | 
44 | 
45 | ## Related
46 | 
47 | http://jobbole.github.io/awesome-python-cn/
48 | 
49 | https://github.com/metafy-social/daily-python-scripts
50 | 
51 | https://github.com/TheAlgorithms/Python
52 | 
53 | 


--------------------------------------------------------------------------------