├── .gitignore
├── README.md
├── code
├── README.md
├── ch04_单元测试与代码覆盖率
│ ├── scale.py
│ └── test.py
├── ch05_列表生成式
│ └── ch05.ipynb
├── ch06_Collections 库
│ └── ch06.ipynb
├── ch07_迭代器
│ └── ch07.ipynb
├── ch08_Python 多线程与多进程浅析
│ └── ch08.ipynb
├── ch09_Python 程序性能分析初步
│ ├── ch09.ipynb
│ ├── test_CPU_01.py
│ ├── test_CPU_02.py
│ └── test_CPU_03.py
├── ch12_K近邻算法
│ └── ch12.ipynb
├── ch13_主成分分析
│ └── ch13.ipynb
├── ch14_逻辑回归
│ └── ch14.ipynb
├── ch15_朴素贝叶斯
│ └── ch15.ipynb
├── ch16_决策树
│ └── ch16.ipynb
├── ch17_支持向量机
│ ├── ch17.ipynb
│ └── imgs
│ │ └── digits 数据集.png
├── ch18_K-Means聚类
│ └── ch18.ipynb
├── ch19_人工神经网络
│ ├── ch19.ipynb
│ ├── model_structure.json
│ └── model_weight.h5
├── ch21_Python 机器学习工具
│ └── ch21.ipynb
├── ch22_基于RFM的P2P用户聚类模型
│ ├── ch22.ipynb
│ ├── clean_data.csv
│ └── data.csv
├── ch23_文本的主题分类
│ ├── sample.txt
│ └── text_calssify.py
├── ch24_利用机器翻译实现自然语言查询
│ ├── seq2seq_model.py
│ └── train.py
└── ch25_身份证汉字和数字识别
│ ├── Dependency.txt
│ ├── back_all
│ ├── __init__.py
│ ├── back_rotate
│ │ ├── .DS_Store
│ │ ├── Back_rotate.py
│ │ ├── Preprocess.py
│ │ ├── TextLine_Index.py
│ │ └── __init__.py
│ ├── east_part
│ │ ├── __init__.py
│ │ ├── cut_back_v3.py
│ │ ├── data_util.py
│ │ ├── east_segment_line.py
│ │ ├── icdar.py
│ │ ├── lanms
│ │ │ ├── .gitignore
│ │ │ ├── .ycm_extra_conf.py
│ │ │ ├── Makefile
│ │ │ ├── __init__.py
│ │ │ ├── __main__.py
│ │ │ ├── adaptor.cpp
│ │ │ ├── include
│ │ │ │ ├── clipper
│ │ │ │ │ ├── clipper.cpp
│ │ │ │ │ └── clipper.hpp
│ │ │ │ └── pybind11
│ │ │ │ │ ├── attr.h
│ │ │ │ │ ├── buffer_info.h
│ │ │ │ │ ├── cast.h
│ │ │ │ │ ├── chrono.h
│ │ │ │ │ ├── class_support.h
│ │ │ │ │ ├── common.h
│ │ │ │ │ ├── complex.h
│ │ │ │ │ ├── descr.h
│ │ │ │ │ ├── eigen.h
│ │ │ │ │ ├── embed.h
│ │ │ │ │ ├── eval.h
│ │ │ │ │ ├── functional.h
│ │ │ │ │ ├── numpy.h
│ │ │ │ │ ├── operators.h
│ │ │ │ │ ├── options.h
│ │ │ │ │ ├── pybind11.h
│ │ │ │ │ ├── pytypes.h
│ │ │ │ │ ├── stl.h
│ │ │ │ │ ├── stl_bind.h
│ │ │ │ │ └── typeid.h
│ │ │ └── lanms.h
│ │ ├── locality_aware_nms.py
│ │ ├── model.py
│ │ └── nets
│ │ │ ├── __init__.py
│ │ │ ├── resnet_utils.py
│ │ │ └── resnet_v1.py
│ ├── main.py
│ ├── model_loader.py
│ ├── other_recognize
│ │ ├── __init__.py
│ │ ├── characterMap.txt
│ │ ├── combined_model_structure.json
│ │ ├── combined_model_weight.h5
│ │ ├── cut_character_v1.py
│ │ ├── do_recognition_for_sorted_id_card.py
│ │ ├── filter_using_cnn_prob_v1.py
│ │ ├── main_cut_v2_without_cut_line_sorted_part.py
│ │ ├── other_main.py
│ │ └── prepare_cut_character_v2.py
│ ├── test.png
│ └── valid_recognition
│ │ ├── CNN_test_date_modify.py
│ │ ├── Cut_Year_Date.py
│ │ ├── Preprocess_Image.py
│ │ ├── Split_Image.py
│ │ ├── WholeProcess_valid.py
│ │ ├── __init__.py
│ │ ├── model_structure_date.json
│ │ ├── model_weight_date.h5
│ │ ├── prepare_cut_character_v2.py
│ │ └── valid_main.py
│ └── front_all
│ ├── __init__.py
│ ├── east_part
│ ├── __init__.py
│ ├── data_util.py
│ ├── east_model
│ ├── east_segment_line.py
│ ├── icdar.py
│ ├── lanms
│ │ ├── .gitignore
│ │ ├── .ycm_extra_conf.py
│ │ ├── Makefile
│ │ ├── __init__.py
│ │ ├── __main__.py
│ │ ├── adaptor.cpp
│ │ ├── include
│ │ │ ├── clipper
│ │ │ │ ├── clipper.cpp
│ │ │ │ └── clipper.hpp
│ │ │ └── pybind11
│ │ │ │ ├── attr.h
│ │ │ │ ├── buffer_info.h
│ │ │ │ ├── cast.h
│ │ │ │ ├── chrono.h
│ │ │ │ ├── class_support.h
│ │ │ │ ├── common.h
│ │ │ │ ├── complex.h
│ │ │ │ ├── descr.h
│ │ │ │ ├── eigen.h
│ │ │ │ ├── embed.h
│ │ │ │ ├── eval.h
│ │ │ │ ├── functional.h
│ │ │ │ ├── numpy.h
│ │ │ │ ├── operators.h
│ │ │ │ ├── options.h
│ │ │ │ ├── pybind11.h
│ │ │ │ ├── pytypes.h
│ │ │ │ ├── stl.h
│ │ │ │ ├── stl_bind.h
│ │ │ │ └── typeid.h
│ │ └── lanms.h
│ ├── locality_aware_nms.py
│ ├── model.py
│ ├── nets
│ │ ├── __init__.py
│ │ ├── resnet_utils.py
│ │ └── resnet_v1.py
│ └── sort_cut_line_v2.py
│ ├── main.py
│ ├── model_loader.py
│ ├── number_recognition
│ ├── CNN_test_number_v3_modify.py
│ ├── Overall_Process_input_line.py
│ ├── Preprocess.py
│ ├── TextLine_Index.py
│ ├── __init__.py
│ ├── model_structure_number.json
│ ├── model_weight_number.h5
│ └── number_main.py
│ ├── other_recognize
│ ├── __init__.py
│ ├── characterMap.txt
│ ├── combined_model_structure.json
│ ├── combined_model_weight.h5
│ ├── cut_character_v1.py
│ ├── do_recognition_for_sorted_id_card.py
│ ├── filter_using_cnn_prob_v1.py
│ ├── main_cut_v2_without_cut_line_sorted_part.py
│ ├── other_main.py
│ └── prepare_cut_character_v2.py
│ ├── rotate_cert
│ ├── FaceDetection_usedto_adjust_rotate_via_MTCNN.py
│ ├── __init__.py
│ ├── det1.npy
│ ├── det2.npy
│ ├── det3.npy
│ ├── detect_face.py
│ └── haarcascade_frontalface_default.xml
│ └── test.png
├── datasets
└── README.md
├── docs
└── README.md
├── imgs
├── README.md
└── book-cover.jpg
└── 勘误.pdf
/.gitignore:
--------------------------------------------------------------------------------
1 | code/ch14_逻辑回归/.ipynb_checkpoints/ch14-checkpoint.ipynb
2 | code/ch18_K-Means聚类/.ipynb_checkpoints/ch18-checkpoint.ipynb
3 | code/ch05_列表生成式/.ipynb_checkpoints/ch05-checkpoint.ipynb
4 | code/ch09_Python 程序性能分析初步/.ipynb_checkpoints/ch09-checkpoint.ipynb
5 | code/ch04_单元测试与代码覆盖率/__pycache__/scale.cpython-35.pyc
6 | code/ch07_迭代器/.ipynb_checkpoints/ch07-checkpoint.ipynb
7 | code/ch08_Python 多线程与多进程浅析/.ipynb_checkpoints/ch08-checkpoint.ipynb
8 | code/ch06_Collections 库/.ipynb_checkpoints/ch06-checkpoint.ipynb
9 | code/ch12_K近邻算法/.ipynb_checkpoints/ch12-checkpoint.ipynb
10 | code/ch13_主成分分析/.ipynb_checkpoints/ch13-checkpoint.ipynb
11 | code/ch15_朴素贝叶斯/.ipynb_checkpoints/ch15-checkpoint.ipynb
12 | code/ch16_决策树/.ipynb_checkpoints/ch16-checkpoint.ipynb
13 | code/ch17_支持向量机/.ipynb_checkpoints/ch17-checkpoint.ipynb
14 | code/ch19_人工神经网络/.ipynb_checkpoints/ch19-checkpoint.ipynb
15 | code/ch21_Python 机器学习工具/.ipynb_checkpoints/ch21-checkpoint.ipynb
16 | code/ch22_基于RFM的P2P用户聚类模型/.ipynb_checkpoints/ch22-checkpoint.ipynb
17 | code/ch25_身份证汉子和数字识别/back_all/.DS_Store
18 | code/ch25_身份证汉子和数字识别/front_all/.DS_Store
19 | code/ch25_身份证汉字和数字识别/back_all/.DS_Store
20 | code/ch25_身份证汉字和数字识别/front_all/.DS_Store
21 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | ## **《Python 机器学习实战》 代码仓库**
2 |
3 | ------
4 |
5 | **Python 机器学习实战**
6 |
7 | - 出版社: 科学技术文献出版社
8 | - ISBN:9787518938087
9 | - 版次:1
10 | - 商品编码:12289823
11 | - 包装:平装
12 | - 开本:16开
13 | - 出版时间:2018-02-01book-cover](imgs/book-cover.jpg)
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 | ### **购买链接**
22 |
23 | [京东商城](https://item.jd.com/12289823.html)
24 |
25 |
--------------------------------------------------------------------------------
/code/README.md:
--------------------------------------------------------------------------------
1 | 这是 code 文件夹的帮助文档。
2 |
3 | 本文件夹会记录书籍相关的代码,按章节归类。
4 |
5 |
--------------------------------------------------------------------------------
/code/ch04_单元测试与代码覆盖率/scale.py:
--------------------------------------------------------------------------------
1 | # 这段代码中,我们实现了一个 n 进制转换的小工具,可以在任意进制之间进行转化。
2 |
3 | class Scale(object):
4 |
5 | #这里用常用的字母代表数字的字典。
6 |
7 | dic = {'10': 'A', '11': 'B', '12': 'C ',
8 | '13': 'D', '14': 'E', '15': 'F'}
9 |
10 | # 定义一个函数将 weight 进制的某一位的值对应的十进制的值算出来。
11 |
12 | @staticmethod
13 | def place_value(n_value, scale, digits):
14 | # 某一位的权值,初始为 1
15 | weight = 1
16 | for i in range(1, digits + 1):
17 | weight = scale * weight
18 | return n_value * weight
19 |
20 | # 定义一个函数将 scale 进制的值 value 转为对应十进制的值。
21 |
22 | @staticmethod
23 | def n_2_decimal(value_, scale):
24 | sum_ = 0
25 | n = len(str(value_))
26 | for i in range(1, n + 1):
27 | sum_ = sum_ + Scale.place_value(int(str(value_)
28 | [i-1]), scale, n-i)
29 | return sum_
30 |
31 | # 这个函数将十进制的值 value 转为对应 scale 进制的值。
32 |
33 | @staticmethod
34 | def decimal_2_n(value_, scale):
35 | arr = []
36 | i = 0
37 | while value_ is not 0:
38 | rem = value_ % scale
39 | if rem >= 16:
40 | rem = "*" + str(rem) + "*"
41 | elif 10 <= rem <= 15:
42 | rem = Scale.dic[str(rem)]
43 | value_ = value_ // scale
44 | arr.append(rem)
45 | i += 1
46 | return arr
47 |
48 | # 最后,这个函数可以进行不同进制间的转化。
49 |
50 | @staticmethod
51 | def any_scale(scale1_, value_, scale2_):
52 | mid_value = Scale.n_2_decimal(value_, scale1_)
53 | fin_value = Scale.decimal_2_n(mid_value, scale2_)
54 | fin_value.reverse()
55 | fin_value = ''.join([str(x) for x in fin_value])
56 | return fin_value
57 |
--------------------------------------------------------------------------------
/code/ch04_单元测试与代码覆盖率/test.py:
--------------------------------------------------------------------------------
1 | import unittest
2 | from scale import Scale
3 |
4 | # 创建单元测试类,并在 setUp 和 tearDown 中对 scale 属性进行初始化和还原。
5 |
6 | class AnyScaleTest(unittest.TestCase):
7 |
8 | def setUp(self):
9 | self.scale = Scale()
10 |
11 | def tearDown(self):
12 | self.scale = None
13 |
14 | # 测试用例 1,测试十进制下的 16 转化为十六进制。
15 |
16 | def test1_10_16_16(self):
17 | self.assertEquals('10', self.scale.any_scale(10, 16, 16))
18 |
19 | # 测试用例 2,测试十进制下的 17 转化为八进制。
20 |
21 | def test2_10_17_8(self):
22 | self.assertEquals('19', self.scale.any_scale(10, 17, 8))
23 |
24 | # 测试用例 3,测试八进制下的 19 转化为十进制。
25 |
26 | def test3_8_19_10(self):
27 | self.assertEquals('17', self.scale.any_scale(8, 19, 10))
28 |
29 |
30 | if __name__ == '__main__':
31 | unittest.main()
32 |
--------------------------------------------------------------------------------
/code/ch09_Python 程序性能分析初步/test_CPU_01.py:
--------------------------------------------------------------------------------
1 | @profile
2 | def my_func():
3 | a = [1] * (10 ** 6)
4 | b = [2] * (2 * 10 ** 7)
5 | del b
6 | return a
7 |
8 | if __name__ == '__main__':
9 | my_func()
10 |
--------------------------------------------------------------------------------
/code/ch09_Python 程序性能分析初步/test_CPU_02.py:
--------------------------------------------------------------------------------
1 | from memory_profiler import profile
2 |
3 | @profile
4 | def my_func():
5 | a = [1] * (10 ** 6)
6 | b = [2] * (2 * 10 ** 7)
7 | del b
8 | return a
9 |
10 | if __name__ == '__main__':
11 | my_func()
12 |
--------------------------------------------------------------------------------
/code/ch09_Python 程序性能分析初步/test_CPU_03.py:
--------------------------------------------------------------------------------
1 | from time import time
2 | from concurrent.futures import *
3 | from memory_profiler import profile
4 |
5 |
6 | def my_cal(a):
7 | j = 0
8 | for i in range(a):
9 | j = j + i
10 | print(j)
11 | return j
12 |
13 |
14 | @profile
15 | def run():
16 | list_01 = [1000000, 2000000, 1500000, 2500000, 3000000]
17 | start = time()
18 | pool = ProcessPoolExecutor(max_workers=10)
19 | list_02 = list(pool.map(my_cal, list_01))
20 | print(list_02)
21 | end = time()
22 | print('cost time {:f} s'.format(end - start))
23 |
24 | if __name__ == '__main__':
25 | run()
26 |
--------------------------------------------------------------------------------
/code/ch12_K近邻算法/ch12.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# Python 机器学习实战 ——代码样例\n",
8 | "\n",
9 | "# 第十二章 K 近邻算法"
10 | ]
11 | },
12 | {
13 | "cell_type": "markdown",
14 | "metadata": {
15 | "collapsed": true
16 | },
17 | "source": [
18 | "## 欧氏距离计算\n",
19 | "\n",
20 | "首先,构造两个样本点:"
21 | ]
22 | },
23 | {
24 | "cell_type": "code",
25 | "execution_count": 1,
26 | "metadata": {
27 | "collapsed": true
28 | },
29 | "outputs": [],
30 | "source": [
31 | "import numpy as np\n",
32 | "x=np.array([1,1])\n",
33 | "y=np.array([4,5]) \n"
34 | ]
35 | },
36 | {
37 | "cell_type": "markdown",
38 | "metadata": {
39 | "collapsed": true
40 | },
41 | "source": [
42 | "计算上述两个样本点之间的欧式距离:"
43 | ]
44 | },
45 | {
46 | "cell_type": "code",
47 | "execution_count": 2,
48 | "metadata": {
49 | "collapsed": false
50 | },
51 | "outputs": [
52 | {
53 | "name": "stdout",
54 | "output_type": "stream",
55 | "text": [
56 | "5.0\n"
57 | ]
58 | }
59 | ],
60 | "source": [
61 | "from math import *\n",
62 | "def e_distance(x,y):\n",
63 | " return sqrt(sum(pow(a-b,2) for a, b in zip(x, y)))\n",
64 | " \n",
65 | "print(e_distance(x, y))\n"
66 | ]
67 | },
68 | {
69 | "cell_type": "markdown",
70 | "metadata": {},
71 | "source": [
72 | "## 曼哈顿距离计算"
73 | ]
74 | },
75 | {
76 | "cell_type": "markdown",
77 | "metadata": {},
78 | "source": [
79 | "仍然使用上述的两个样本点,计算它们之间的曼哈顿距离:"
80 | ]
81 | },
82 | {
83 | "cell_type": "code",
84 | "execution_count": 3,
85 | "metadata": {
86 | "collapsed": false
87 | },
88 | "outputs": [
89 | {
90 | "name": "stdout",
91 | "output_type": "stream",
92 | "text": [
93 | "7\n"
94 | ]
95 | }
96 | ],
97 | "source": [
98 | "from math import *\n",
99 | "def m_distance(x,y):\n",
100 | " return sum(abs(x-y))\n",
101 | " \n",
102 | "print(m_distance(x, y))\n"
103 | ]
104 | },
105 | {
106 | "cell_type": "markdown",
107 | "metadata": {},
108 | "source": [
109 | "## 切比雪夫距离计算"
110 | ]
111 | },
112 | {
113 | "cell_type": "markdown",
114 | "metadata": {},
115 | "source": [
116 | "计算两点之间的切比雪夫距离:"
117 | ]
118 | },
119 | {
120 | "cell_type": "code",
121 | "execution_count": 4,
122 | "metadata": {
123 | "collapsed": false
124 | },
125 | "outputs": [
126 | {
127 | "name": "stdout",
128 | "output_type": "stream",
129 | "text": [
130 | "4\n"
131 | ]
132 | }
133 | ],
134 | "source": [
135 | "from math import *\n",
136 | "def q_distance(x,y):\n",
137 | " return abs(x-y).max()\n",
138 | " \n",
139 | "print(q_distance(x, y))\n"
140 | ]
141 | },
142 | {
143 | "cell_type": "markdown",
144 | "metadata": {},
145 | "source": [
146 | "## 夹角余弦距离计算"
147 | ]
148 | },
149 | {
150 | "cell_type": "markdown",
151 | "metadata": {},
152 | "source": [
153 | "计算两点之间的夹角余弦距离:"
154 | ]
155 | },
156 | {
157 | "cell_type": "code",
158 | "execution_count": 5,
159 | "metadata": {
160 | "collapsed": false
161 | },
162 | "outputs": [
163 | {
164 | "name": "stdout",
165 | "output_type": "stream",
166 | "text": [
167 | "0.993883734674\n"
168 | ]
169 | }
170 | ],
171 | "source": [
172 | "from math import *\n",
173 | "def cos_distance(x,y):\n",
174 | " return np.dot(x,y)/(np.linalg.norm(x)*np.linalg.norm(y))\n",
175 | " \n",
176 | "print(cos_distance(x, y))\n"
177 | ]
178 | },
179 | {
180 | "cell_type": "markdown",
181 | "metadata": {},
182 | "source": [
183 | "## 使用K近邻算法对 Iris 数据集进行分类"
184 | ]
185 | },
186 | {
187 | "cell_type": "markdown",
188 | "metadata": {},
189 | "source": [
190 | "本节我们将通过一个例子讲解 K 近邻对 Iris 数据集进行分类。Iris 数据集是一个常用的分类用数据集,以鸢尾花的特征作为数据来源,数据集包含 150 个样本,分为 3 类花种,每类 50 个样本,每个样本包含 4 个独立属性 ( 萼片长度、萼片宽度、花瓣长度、花瓣宽度 )。"
191 | ]
192 | },
193 | {
194 | "cell_type": "code",
195 | "execution_count": 13,
196 | "metadata": {
197 | "collapsed": false
198 | },
199 | "outputs": [],
200 | "source": [
201 | "# 首先,我们导入将用到的库。\n",
202 | "\n",
203 | "from sklearn import datasets\n",
204 | "from sklearn.neighbors import KNeighborsClassifier\n",
205 | "from sklearn import cross_validation\n",
206 | "from sklearn.metrics import classification_report\n",
207 | "from sklearn.metrics import confusion_matrix\n",
208 | "from sklearn.metrics import accuracy_score\n",
209 | "\n",
210 | "\n",
211 | "# 准备数据集,并分离训练集和验证集。\n",
212 | "\n",
213 | "iris = datasets.load_iris() \n",
214 | "X = iris.data \n",
215 | "Y = iris.target \n",
216 | "validation_size = 0.20\n",
217 | "seed = 1 \n",
218 | "X_train, X_validation, Y_train, Y_validation = cross_validation.train_test_split(X, Y, test_size=validation_size, random_state=seed) \n"
219 | ]
220 | },
221 | {
222 | "cell_type": "code",
223 | "execution_count": 16,
224 | "metadata": {
225 | "collapsed": false
226 | },
227 | "outputs": [
228 | {
229 | "name": "stdout",
230 | "output_type": "stream",
231 | "text": [
232 | "accuracy_score: 1.0\n",
233 | "混淆矩阵: \n",
234 | " [[11 0 0]\n",
235 | " [ 0 13 0]\n",
236 | " [ 0 0 6]]\n",
237 | "分类报告: \n",
238 | " precision recall f1-score support\n",
239 | "\n",
240 | " 0 1.00 1.00 1.00 11\n",
241 | " 1 1.00 1.00 1.00 13\n",
242 | " 2 1.00 1.00 1.00 6\n",
243 | "\n",
244 | "avg / total 1.00 1.00 1.00 30\n",
245 | "\n"
246 | ]
247 | }
248 | ],
249 | "source": [
250 | "# 创建 KNN 分类器,并拟合数据集。\n",
251 | "\n",
252 | "knn = KNeighborsClassifier()\n",
253 | "knn.fit(X_train, Y_train)\n",
254 | "\n",
255 | "# 在验证集上进行预测,并输出 accuracy score,混淆矩阵和分类报告。\n",
256 | "\n",
257 | "predictions = knn.predict(X_validation)\n",
258 | "print('accuracy_score:',accuracy_score(Y_validation, predictions))\n",
259 | "print('混淆矩阵: \\n',confusion_matrix(Y_validation, predictions))\n",
260 | "print('分类报告: \\n',classification_report(Y_validation, predictions))\n"
261 | ]
262 | }
263 | ],
264 | "metadata": {
265 | "kernelspec": {
266 | "display_name": "Python 3",
267 | "language": "python",
268 | "name": "python3"
269 | },
270 | "language_info": {
271 | "codemirror_mode": {
272 | "name": "ipython",
273 | "version": 3
274 | },
275 | "file_extension": ".py",
276 | "mimetype": "text/x-python",
277 | "name": "python",
278 | "nbconvert_exporter": "python",
279 | "pygments_lexer": "ipython3",
280 | "version": "3.5.1"
281 | }
282 | },
283 | "nbformat": 4,
284 | "nbformat_minor": 0
285 | }
286 |
--------------------------------------------------------------------------------
/code/ch14_逻辑回归/ch14.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# Python 机器学习实战 ——代码样例\n",
8 | "\n",
9 | "# 第十四章 逻辑回归"
10 | ]
11 | },
12 | {
13 | "cell_type": "markdown",
14 | "metadata": {},
15 | "source": [
16 | "## 使用逻辑回归进行二分类\n",
17 | "\n",
18 | "数据集介绍:乳腺癌数据集是一个经典并且简单的二分类数据集。一共有 569 个样本,其中 212 个样本为恶性 ( malignant, 0 ),357 个样本为良性 ( benign, 1 )。每个样本有 30 个特征,均为非负实数。30 个特征分为三类,前 10 个是相关指标的平均值,中间 10 个是指标的偏差,最后 10 个是指标的最差极值。"
19 | ]
20 | },
21 | {
22 | "cell_type": "code",
23 | "execution_count": 4,
24 | "metadata": {
25 | "collapsed": false
26 | },
27 | "outputs": [
28 | {
29 | "name": "stdout",
30 | "output_type": "stream",
31 | "text": [
32 | "混淆矩阵: \n",
33 | " [[50 4]\n",
34 | " [ 2 87]]\n"
35 | ]
36 | }
37 | ],
38 | "source": [
39 | "# 导入需要的库。\n",
40 | "\n",
41 | "import numpy as np\n",
42 | "from sklearn import datasets\n",
43 | "from sklearn.linear_model import LogisticRegression\n",
44 | "from sklearn.metrics import confusion_matrix\n",
45 | "from sklearn.model_selection import train_test_split\n",
46 | "\n",
47 | "# 导入数据并分为训练集和测试集。\n",
48 | "\n",
49 | "breast_cancer = datasets.load_breast_cancer()\n",
50 | "x = breast_cancer['data']\n",
51 | "y = breast_cancer['target']\n",
52 | "X_train, X_test, y_train, y_test = train_test_split(x, y, random_state=42) \n",
53 | "\n",
54 | "# 逻辑回归拟合。\n",
55 | "\n",
56 | "log_reg = LogisticRegression()\n",
57 | "log_reg.fit(X_train, y_train)\n",
58 | "\n",
59 | "# 测试集效果检验,输出混淆矩阵。\n",
60 | "\n",
61 | "y_predict = log_reg.predict(X_test)\n",
62 | "print('混淆矩阵: \\n',confusion_matrix(y_test, y_predict))\n"
63 | ]
64 | }
65 | ],
66 | "metadata": {
67 | "kernelspec": {
68 | "display_name": "Python 3",
69 | "language": "python",
70 | "name": "python3"
71 | },
72 | "language_info": {
73 | "codemirror_mode": {
74 | "name": "ipython",
75 | "version": 3
76 | },
77 | "file_extension": ".py",
78 | "mimetype": "text/x-python",
79 | "name": "python",
80 | "nbconvert_exporter": "python",
81 | "pygments_lexer": "ipython3",
82 | "version": "3.5.1"
83 | }
84 | },
85 | "nbformat": 4,
86 | "nbformat_minor": 0
87 | }
88 |
--------------------------------------------------------------------------------
/code/ch15_朴素贝叶斯/ch15.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# Python 机器学习实战 ——代码样例\n",
8 | "\n",
9 | "# 第十五章 朴素贝叶斯分类器"
10 | ]
11 | },
12 | {
13 | "cell_type": "markdown",
14 | "metadata": {},
15 | "source": [
16 | "## 使用朴素贝叶斯进行二分类\n",
17 | "\n",
18 | "本节我们将通过一个例子演示使用朴素贝叶斯分类器进行简单的分类。数据集依然使用上一节的逻辑回归中的乳腺癌数据集,一个经典的二分类数据集。\n"
19 | ]
20 | },
21 | {
22 | "cell_type": "code",
23 | "execution_count": 5,
24 | "metadata": {
25 | "collapsed": false
26 | },
27 | "outputs": [
28 | {
29 | "name": "stdout",
30 | "output_type": "stream",
31 | "text": [
32 | "accuracy_score: 0.941489361702\n",
33 | "混淆矩阵:\n",
34 | " [[ 61 6]\n",
35 | " [ 5 116]]\n",
36 | "分类报告:\n",
37 | " precision recall f1-score support\n",
38 | "\n",
39 | " 0 0.92 0.91 0.92 67\n",
40 | " 1 0.95 0.96 0.95 121\n",
41 | "\n",
42 | "avg / total 0.94 0.94 0.94 188\n",
43 | "\n"
44 | ]
45 | }
46 | ],
47 | "source": [
48 | "# 首先,导入需要的库。\n",
49 | "\n",
50 | "from sklearn.datasets import load_breast_cancer\n",
51 | "from sklearn.cross_validation import train_test_split \n",
52 | "from sklearn.naive_bayes import GaussianNB\n",
53 | "from sklearn.metrics import classification_report\n",
54 | "from sklearn.metrics import confusion_matrix\n",
55 | "from sklearn.metrics import accuracy_score\n",
56 | "\n",
57 | "# 准备数据集。\n",
58 | "\n",
59 | "data = load_breast_cancer()\n",
60 | "label_names = data['target_names']\n",
61 | "labels = data['target']\n",
62 | "feature_names = data['feature_names']\n",
63 | "features = data['data']\n",
64 | "\n",
65 | "# 分离训练集和验证集。\n",
66 | "\n",
67 | "train, test, train_labels, test_labels = train_test_split(features,labels,test_size=0.33,random_state=42) \n",
68 | "\n",
69 | "# 创建朴素贝叶斯分类器,并拟合数据集。\n",
70 | "\n",
71 | "gnb = GaussianNB()\n",
72 | "model = gnb.fit(train, train_labels)\n",
73 | "\n",
74 | "# 在验证集上进行预测,并输出 accuracy score,混淆矩阵和分类报告。\n",
75 | "\n",
76 | "preds = gnb.predict(test) \n",
77 | "print('accuracy_score:',accuracy_score(test_labels, preds))\n",
78 | "print('混淆矩阵:\\n',confusion_matrix(test_labels, preds))\n",
79 | "print('分类报告:\\n',classification_report(test_labels, preds))\n"
80 | ]
81 | }
82 | ],
83 | "metadata": {
84 | "kernelspec": {
85 | "display_name": "Python 3",
86 | "language": "python",
87 | "name": "python3"
88 | },
89 | "language_info": {
90 | "codemirror_mode": {
91 | "name": "ipython",
92 | "version": 3
93 | },
94 | "file_extension": ".py",
95 | "mimetype": "text/x-python",
96 | "name": "python",
97 | "nbconvert_exporter": "python",
98 | "pygments_lexer": "ipython3",
99 | "version": "3.5.1"
100 | }
101 | },
102 | "nbformat": 4,
103 | "nbformat_minor": 0
104 | }
105 |
--------------------------------------------------------------------------------
/code/ch17_支持向量机/imgs/digits 数据集.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chinapnr/How-to-Python-and-Machine-Learning-book-code/fbc8a0cb507eec3c0758e238e5a2eb9b99f987b6/code/ch17_支持向量机/imgs/digits 数据集.png
--------------------------------------------------------------------------------
/code/ch19_人工神经网络/model_structure.json:
--------------------------------------------------------------------------------
1 | {"class_name": "Sequential", "config": [{"class_name": "Convolution2D", "config": {"trainable": true, "init": "glorot_uniform", "border_mode": "valid", "name": "convolution2d_1", "b_constraint": null, "subsample": [1, 1], "W_constraint": null, "dim_ordering": "tf", "input_dtype": "float32", "b_regularizer": null, "activation": "linear", "W_regularizer": null, "nb_col": 3, "nb_row": 3, "batch_input_shape": [null, 28, 28, 1], "nb_filter": 32, "bias": true, "activity_regularizer": null}}, {"class_name": "Activation", "config": {"trainable": true, "name": "activation_1", "activation": "relu"}}, {"class_name": "Convolution2D", "config": {"trainable": true, "init": "glorot_uniform", "border_mode": "valid", "name": "convolution2d_2", "b_constraint": null, "subsample": [1, 1], "W_constraint": null, "dim_ordering": "tf", "nb_filter": 32, "b_regularizer": null, "activation": "linear", "W_regularizer": null, "nb_col": 3, "nb_row": 3, "bias": true, "activity_regularizer": null}}, {"class_name": "Activation", "config": {"trainable": true, "name": "activation_2", "activation": "relu"}}, {"class_name": "MaxPooling2D", "config": {"trainable": true, "border_mode": "valid", "strides": [2, 2], "pool_size": [2, 2], "name": "maxpooling2d_1", "dim_ordering": "tf"}}, {"class_name": "Flatten", "config": {"trainable": true, "name": "flatten_1"}}, {"class_name": "Dense", "config": {"trainable": true, "init": "glorot_uniform", "b_constraint": null, "bias": true, "W_constraint": null, "output_dim": 128, "input_dim": 4608, "b_regularizer": null, "activation": "linear", "W_regularizer": null, "name": "dense_1", "activity_regularizer": null}}, {"class_name": "Activation", "config": {"trainable": true, "name": "activation_3", "activation": "relu"}}, {"class_name": "Dense", "config": {"trainable": true, "init": "glorot_uniform", "b_constraint": null, "bias": true, "W_constraint": null, "output_dim": 10, "input_dim": 128, "b_regularizer": null, "activation": "linear", "W_regularizer": null, "name": "dense_2", "activity_regularizer": null}}, {"class_name": "Activation", "config": {"trainable": true, "name": "activation_4", "activation": "softmax"}}], "keras_version": "1.2.0"}
--------------------------------------------------------------------------------
/code/ch19_人工神经网络/model_weight.h5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chinapnr/How-to-Python-and-Machine-Learning-book-code/fbc8a0cb507eec3c0758e238e5a2eb9b99f987b6/code/ch19_人工神经网络/model_weight.h5
--------------------------------------------------------------------------------
/code/ch23_文本的主题分类/text_calssify.py:
--------------------------------------------------------------------------------
1 | # 本例实现一个对文本内容二分类的例子
2 |
3 | import numpy as np
4 | import pandas as pd
5 | from sklearn.feature_extraction.text import TfidfVectorizer
6 | from sklearn.metrics import classification_report, confusion_matrix
7 | from sklearn.feature_selection import SelectKBest
8 | from sklearn.feature_selection import chi2
9 | from sklearn.naive_bayes import MultinomialNB
10 | from sklearn.svm import SVC
11 | import re
12 | import jieba
13 | from sklearn.metrics import f1_score
14 |
15 | # 设置变量
16 |
17 | # 读取文件路径
18 | file_path = r'sample.txt'
19 |
20 | # 是否分词/不分词 (True=分词)
21 | cut_word = True
22 |
23 | # 去除特殊字符类型 all=所有
24 | stop_character_type = 'all'
25 |
26 | # 停用词词典
27 | stopword = False # 是否调用停用词词典
28 | stopwords_path = 'stopword.txt' # 停用词词典路径
29 |
30 | # 分词方法选择(目前仅支持jieba)
31 | cut_word_method = 'jieba'
32 |
33 | # 是否调用行业自定义词典
34 | load_userdict = False
35 |
36 | # 行业自定义词典路径
37 | userdict_path = r'userdict.txt'
38 |
39 | # 训练集切割比例
40 | train_ratio = 0.7 # 训练集的比例,取值[0-1]
41 |
42 | # 模型参数
43 | model = 'NB' # 选择分类算法 'NB'=朴素贝叶斯算法, 'SVM'=SVM算法
44 |
45 | analyzer = 'char' # 文本特征组成方式: string, {‘word’, ‘char’}
46 |
47 | ngram_range = (1, 2) # n_gram的上限和下限: tuple (min_n, max_n)
48 |
49 | min_df = 1 # 忽略出现次数小于该值的词项: float in range [0.0, 1.0] or int, default=1
50 |
51 | k = 2000 # 保留最有效的k个特征
52 |
53 | kernel = 'linear' # SVM算法的kernal: string, optional,{‘linear’, ‘poly’, ‘rbf’, ‘sigmoid’}
54 |
55 | svm_c = 1.0 # SVM算法的C值(误差项的惩罚系数)
56 |
57 | alpha = 0.1 # 朴素贝叶斯算法的alpha值
58 |
59 | # 1. 读取文本数据
60 | # 读取文件存入Pandas dataframe
61 | data = pd.read_table(file_path)
62 |
63 | # 2. 删除含缺失值的样本
64 | data = data.dropna()
65 |
66 | # 3. 分割训练集和测试集
67 | # 随机选取一定比例的数据作为训练样本
68 | print('总数据条数',len(data))
69 | train = data.sample(n=int(len(data)*train_ratio)) # 生成训练集
70 |
71 | # 剩余部分作为测试样本
72 | index = data.index.tolist()
73 | train_index = train.index.tolist() # 生成训练集的index
74 | test_index = [i for i in index if i not in train_index] # 生成测试集的index
75 | test = data.loc[test_index, ] # 生成测试集
76 |
77 | # 将训练集和测试集进一步拆分成文本和标签,并转成list
78 | text = data.columns[0] # 获取第一列列名,即存放文本内容的列
79 | label = data.columns[1] # 获取第二列列名,即存放标签的列(0,1两个类别)
80 | x_train = train[text].tolist() # 训练集的文本内容
81 | y_train = train[label].tolist() # 训练集的标签
82 | x_test = test[text].tolist() # 测试集的文本内容
83 | y_test = test[label].tolist() # 测试集的标签
84 |
85 |
86 | # 3. 判断使用分词/不分词方法
87 | if cut_word is True:
88 | print('使用分词方法')
89 |
90 | # 4. 选择要去除的特殊字符
91 | if stop_character_type == 'all': # 去除所有
92 | stop_character = u'[’()〈〉:.!"#$%&\'()*+,-./:;~<=>?@,。?↓★、�…【】《》?©“”▪►‘’!•[\\]^_`{|}~]+|[a-zA-Z0-9]'
93 | elif stop_character_type == 'chararter': # 去除符号
94 | stop_character = u'[’()〈〉:.!"#$%&\'()*+,-./:;~<=>?@,。?↓★、�…【】《》?©“”▪►‘’!•[\\]^_`{|}~]+'
95 | elif stop_character_type == 'english_word': # 去除英文
96 | stop_character = u'[a-zA-Z]'
97 | elif stop_character_type == 'number': # 去除数字
98 | stop_character = u'[0-9]'
99 | elif stop_character_type == 'url': # 去除url
100 | stop_character_type = u'http://[a-zA-Z0-9.?/&=:]*'
101 |
102 | # 去除停用词
103 | if stopword is True:
104 | with open(stopwords_path, 'r') as f:
105 | stopwords_set = {line.strip() for line in f} # 从停用词字典中获取所有停用词
106 | print('成功从以下路径获取停用词词典', stopwords_path)
107 | else:
108 | stopwords_set = {}
109 | print('停用词词典为空')
110 |
111 | # 构建jieba分词方法
112 | if cut_word_method == 'jieba':
113 | print('使用jieba分词')
114 | # 是否调用行业自定义词典, True=调用
115 | if load_userdict is True:
116 | jieba.load_userdict(f=userdict_path) # 调用自定义词典,帮助实现更精确的分词(如“汇付天下”等)
117 | print('使用jieba自定义词典')
118 | else:
119 | print('不使用jieba自定义词典')
120 |
121 | def jieba_cutword(paper, stopwords_set):
122 | r = re.compile(stop_character)
123 | paper = re.sub(r, '', paper)
124 | seg_words = jieba.cut(paper, cut_all=False)
125 | words = [word for word in seg_words if word not in stopwords_set and word != ' ']
126 | return " ".join(words)
127 |
128 | def my_sentence(paper_list):
129 | words = []
130 | for paper in paper_list:
131 | words.append(jieba_cutword(paper, stopwords_set))
132 | return words
133 |
134 | x_train = my_sentence(x_train) # 对数据执行jieba分词
135 | x_test = my_sentence(x_test)
136 |
137 |
138 | if cut_word is False:
139 | print('使用不分词方法')
140 |
141 | stop_character = ''
142 |
143 | # 4. 选择要去除的特殊字符
144 | if stop_character_type == 'all':
145 | stop_character = u'[’()〈〉:.!"#$%&\'()*+,-./:;~<=>?@,。?↓★、�…【】《》?©“”▪►‘’!•[\\]^_`{|}~]+|[a-zA-Z0-9]'
146 | elif stop_character_type == 'chararter':
147 | stop_character = u'[’()〈〉:.!"#$%&\'()*+,-./:;~<=>?@,。?↓★、�…【】《》?©“”▪►‘’!•[\\]^_`{|}~]+'
148 | elif stop_character_type == 'english_word':
149 | stop_character = u'[a-zA-Z]'
150 | elif stop_character_type == 'number':
151 | stop_character = u'[0-9]'
152 | elif stop_character_type == 'url':
153 | stop_character = u'http://[a-zA-Z0-9.?/&=:]*'
154 |
155 | r = re.compile(stop_character)
156 | x_train = [re.sub(r, '', text) for text in x_train]
157 | x_test = [re.sub(r, '', text) for text in x_test]
158 |
159 | # 5. 文本特征提取
160 | # 对训练集的文本提取特征
161 | vectorizer = TfidfVectorizer(analyzer=analyzer, ngram_range=ngram_range, min_df=min_df) # 构建TF-IDF特征提取器
162 | fea_train = vectorizer.fit_transform(x_train) # 对训练集文本提取特征
163 | chi_vectorizer = SelectKBest(chi2, k=k) # 构建特征筛选器,只保留最有效的k个特征
164 | trainvec = chi_vectorizer.fit_transform(fea_train, y_train) # 对训练集文本提取最有效的k个特征
165 |
166 |
167 | # 6. 模型建立
168 |
169 | clf = None
170 |
171 | if model == 'SVM': # 模型一:SVM算法
172 | clf = SVC(kernel=kernel, C=svm_c) # 可调参数
173 | clf.fit(trainvec, y_train)
174 |
175 | if model == 'NB': # 模型二:朴素贝叶斯算法
176 | clf = MultinomialNB(alpha=alpha) # 可调参数
177 | clf.fit(trainvec, y_train)
178 |
179 |
180 | # 7. 利用模型进行预测,并输出准确率表现
181 | # 对预测集进行特征转化
182 | testvec = chi_vectorizer.transform(vectorizer.transform(x_test))
183 |
184 | # 利用训练好的模型对测试样本预测
185 | y_pred = clf.predict(testvec)
186 |
187 | # 输出混淆矩阵和准确率报告
188 | print('模型预测结果:')
189 | print('混淆矩阵')
190 | print(confusion_matrix(y_test, y_pred))
191 | print(classification_report(y_test, y_pred))
192 | print('f1分数', f1_score(y_test, y_pred))
193 | print('Test Accuracy:%.2f' % clf.score(testvec, y_test))
194 |
--------------------------------------------------------------------------------
/code/ch24_利用机器翻译实现自然语言查询/train.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # encoding: utf-8
3 |
4 | import numpy as np
5 | import tensorflow as tf
6 | import math
7 | import os
8 | import sys
9 | import time
10 | import seq2seq_model
11 |
12 | def train():
13 | with tf.Session() as sess:
14 | # 1.创建一个模型
15 | model = seq2seq_model.create_model(sess, False) # forward_only= False 训练时false
16 | # 2.读入测试集
17 | dev_set = seq2seq_model.read_data("test_prepared.cn","test_prepared.sq")
18 | # 3.读入训练集
19 | train_set = seq2seq_model.read_data("train_prepared.cn","train_prepared.sq")
20 | print("data prepared")
21 | train_bucket_sizes = [len(train_set[b]) for b in range(len(seq2seq_model._buckets))]
22 | train_total_size = float(sum(train_bucket_sizes))
23 | train_buckets_scale = [sum(train_bucket_sizes[:i + 1]) / train_total_size for i in range(len(train_bucket_sizes))]
24 | print("buckets prepared")
25 | step_time = 0.0
26 | loss = 0.0
27 | current_step = 0
28 | previous_losses = []
29 | while True:
30 | # Choose a bucket according to data distribution. We pick a random number
31 | # in [0, 1] and use the corresponding interval in train_buckets_scale.
32 | # 随机生成一个0-1数,在生成bucket_id中使用
33 | random_number_01 = np.random.random_sample()
34 | bucket_id = min([i for i in range(len(train_buckets_scale)) if train_buckets_scale[i] > random_number_01])
35 | start_time = time.time()
36 | # Get a batch and make a step.
37 | encoder_inputs, decoder_inputs, target_weights = model.get_batch(train_set, bucket_id) # get a batch
38 | _, step_loss, _ = model.step(sess, encoder_inputs, decoder_inputs,target_weights, bucket_id, False) # make a step
39 | step_time += (time.time() - start_time) / seq2seq_model.FLAGS.steps_per_checkpoint # 平均一次的时间
40 | loss += step_loss / seq2seq_model.FLAGS.steps_per_checkpoint # 平均loss 200个batch的Loss的平均值
41 | current_step += 1
42 | if current_step % seq2seq_model.FLAGS.steps_per_checkpoint == 0:
43 | perplexity = math.exp(float(loss)) if loss < 300 else float("inf") # 总混淆度(加权平均?)
44 | print ("global step %d learning rate %.4f step-time %.2f perplexity "
45 | "%.2f" % (model.global_step.eval(), model.learning_rate.eval(),
46 | step_time, perplexity))
47 | # 如果损失值在最近3次内没有再降低,减小学习率
48 | if len(previous_losses) > 2 and loss > max(previous_losses[-3:]):
49 | sess.run(model.learning_rate_decay_op)
50 | previous_losses.append(loss)
51 | checkpoint_path = os.path.join('dir', "translate.ckpt")
52 | model.saver.save(sess, checkpoint_path, global_step=model.global_step)
53 | step_time, loss = 0.0, 0.0
54 | for bucket_id in range(len(seq2seq_model._buckets)):
55 | if len(dev_set[bucket_id]) == 0:
56 | print(" eval: empty bucket %d" % (bucket_id))
57 | continue
58 | encoder_inputs, decoder_inputs, target_weights = model.get_batch(dev_set, bucket_id)
59 | _, eval_loss, _ = model.step(sess, encoder_inputs, decoder_inputs,target_weights, bucket_id, True)
60 | eval_ppx = math.exp(float(eval_loss)) if eval_loss < 300 else float("inf")
61 | print(" eval: bucket %d perplexity %.2f" % (bucket_id, eval_ppx)) # 每个Bucket对应一个混淆度
62 | sys.stdout.flush()
63 |
64 | def main(_):
65 | train()
66 |
67 | if __name__ == "__main__":
68 | tf.app.run()
69 |
--------------------------------------------------------------------------------
/code/ch25_身份证汉字和数字识别/Dependency.txt:
--------------------------------------------------------------------------------
1 | tensorflow==1.2.0
2 | requests==2.10.0
3 | pandas==0.18.1
4 | Flask==0.11.1
5 | python_dateutil==2.6.1
6 | flask_sqlalchemy==2.2
7 | imutils==0.4.3
8 | keras==2.0.8
9 | gunicorn==19.7.1
10 | scikit_image==0.12.3
11 | h5py==2.6.0
12 | PyMySQL==0.7.11
13 | Pillow==3.2.0
14 | fishbase==1.0.12
15 | celery==4.1.1
16 | redis==2.10.6
17 | requests_toolbelt==0.8.0
18 | configparser==3.5.0
19 | opencv-python==3.4.1.15
20 | shapely==1.6.4.post1
21 |
--------------------------------------------------------------------------------
/code/ch25_身份证汉字和数字识别/back_all/__init__.py:
--------------------------------------------------------------------------------
1 |
2 |
--------------------------------------------------------------------------------
/code/ch25_身份证汉字和数字识别/back_all/back_rotate/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chinapnr/How-to-Python-and-Machine-Learning-book-code/fbc8a0cb507eec3c0758e238e5a2eb9b99f987b6/code/ch25_身份证汉字和数字识别/back_all/back_rotate/.DS_Store
--------------------------------------------------------------------------------
/code/ch25_身份证汉字和数字识别/back_all/back_rotate/__init__.py:
--------------------------------------------------------------------------------
1 |
2 |
--------------------------------------------------------------------------------
/code/ch25_身份证汉字和数字识别/back_all/east_part/__init__.py:
--------------------------------------------------------------------------------
1 |
2 |
--------------------------------------------------------------------------------
/code/ch25_身份证汉字和数字识别/back_all/east_part/cut_back_v3.py:
--------------------------------------------------------------------------------
1 | # -*- coding:utf-8 -*-
2 |
3 |
4 | import numpy as np
5 | import os
6 | import cv2
7 | import shutil
8 | #from matplotlib import pyplot as plt
9 |
10 |
11 | def check_dir(path):
12 | if not (os.path.exists(path) and os.path.isdir(path)):
13 | os.mkdir(path)
14 |
15 |
16 | def rotate(image, angle, center=None, scale=1.0):
17 | # 获取图像尺寸
18 | (h, w) = image.shape[:2]
19 |
20 | # 若未指定旋转中心,则将图像中心设为旋转中心
21 | if center is None:
22 | center = (w / 2, h / 2)
23 |
24 | # 执行旋转
25 | M = cv2.getRotationMatrix2D(center, angle, scale)
26 | rotated = cv2.warpAffine(image, M, (w, h))
27 |
28 | # 返回旋转后的图像
29 | return rotated
30 |
31 |
32 | def calc_rate(line):
33 | x1, y1, x2, y2, x3, y3, x4, y4 = line.split(',')
34 | width = abs(float(x2) - float(x1))
35 | height = abs(float(y1) - float(y4))
36 | return height, [int(d) for d in (x1, y1, x3, y3)]
37 | # return width*height, height, height/width
38 |
39 | # def do_cut_back_v2(src_path, f):
40 | def do_cut_back(img, line_box):
41 | line_height_list = []
42 | line_pts_list = []
43 |
44 | return_result = []
45 |
46 | # with open(os.path.join(src_path, f), 'r') as reader:
47 | # lines = reader.readlines()
48 |
49 | lines = [','.join([str(e) for e in d]) for d in line_box]
50 | if len(lines) != 3:
51 | # print(f)
52 | return None
53 |
54 | for line in lines:
55 | h, pts = calc_rate(line)
56 | line_height_list.append(h)
57 | line_pts_list.append(pts)
58 | height_max_index = np.argmax(line_height_list)
59 | x1, y1, x3, y3 = line_pts_list[height_max_index]
60 | all_y1_list = [d[1] for d in line_pts_list]
61 |
62 | little_y1_index_list = [i for i, d in enumerate(all_y1_list) if d > y1]
63 | # img = cv2.imread(os.path.join(src_path, f.replace('.txt', '.png')), 1)
64 |
65 | if len(little_y1_index_list) == 2:
66 |
67 | current_y1_list = [all_y1_list[d] for d in little_y1_index_list]
68 | current_y1_list_index = np.argsort(current_y1_list)
69 |
70 | for save_index, i in enumerate([little_y1_index_list[d] for d in current_y1_list_index]):
71 | current_x1, current_y1, current_x3, current_y3 = line_pts_list[i]
72 | return_result.append(img[current_y1 - 2: current_y3, (current_x1 - 2): (current_x3 + 8)])
73 | elif len(little_y1_index_list) == 0:
74 | useful_line_index = [d for d in range(3) if d not in [height_max_index]]
75 |
76 | current_y1_list = [all_y1_list[d] for d in useful_line_index]
77 | current_y1_list_index = np.argsort(current_y1_list).tolist()[::-1]
78 |
79 | for save_index, i in enumerate([useful_line_index[d] for d in current_y1_list_index]):
80 | current_x1, current_y1, current_x3, current_y3 = line_pts_list[i]
81 | line_img = img[current_y1: current_y3, (current_x1 - 2): (current_x3 + 8)]
82 | line_img = rotate(line_img, 180)
83 | return_result.append(line_img)
84 | return return_result
85 |
86 |
--------------------------------------------------------------------------------
/code/ch25_身份证汉字和数字识别/back_all/east_part/data_util.py:
--------------------------------------------------------------------------------
1 | '''
2 | this file is modified from keras implemention of data process multi-threading,
3 | see https://github.com/fchollet/keras/blob/master/keras/utils/data_utils.py
4 | '''
5 | import time
6 | import numpy as np
7 | import threading
8 | import multiprocessing
9 | try:
10 | import queue
11 | except ImportError:
12 | import Queue as queue
13 |
14 |
15 | class GeneratorEnqueuer():
16 | """Builds a queue out of a data generator.
17 |
18 | Used in `fit_generator`, `evaluate_generator`, `predict_generator`.
19 |
20 | # Arguments
21 | generator: a generator function which endlessly yields data
22 | use_multiprocessing: use multiprocessing if True, otherwise threading
23 | wait_time: time to sleep in-between calls to `put()`
24 | random_seed: Initial seed for workers,
25 | will be incremented by one for each workers.
26 | """
27 |
28 | def __init__(self, generator,
29 | use_multiprocessing=False,
30 | wait_time=0.05,
31 | random_seed=None):
32 | self.wait_time = wait_time
33 | self._generator = generator
34 | self._use_multiprocessing = use_multiprocessing
35 | self._threads = []
36 | self._stop_event = None
37 | self.queue = None
38 | self.random_seed = random_seed
39 |
40 | def start(self, workers=1, max_queue_size=10):
41 | """Kicks off threads which add data from the generator into the queue.
42 |
43 | # Arguments
44 | workers: number of worker threads
45 | max_queue_size: queue size
46 | (when full, threads could block on `put()`)
47 | """
48 |
49 | def data_generator_task():
50 | while not self._stop_event.is_set():
51 | try:
52 | if self._use_multiprocessing or self.queue.qsize() < max_queue_size:
53 | generator_output = next(self._generator)
54 | self.queue.put(generator_output)
55 | else:
56 | time.sleep(self.wait_time)
57 | except Exception:
58 | self._stop_event.set()
59 | raise
60 |
61 | try:
62 | if self._use_multiprocessing:
63 | self.queue = multiprocessing.Queue(maxsize=max_queue_size)
64 | self._stop_event = multiprocessing.Event()
65 | else:
66 | self.queue = queue.Queue()
67 | self._stop_event = threading.Event()
68 |
69 | for _ in range(workers):
70 | if self._use_multiprocessing:
71 | # Reset random seed else all children processes
72 | # share the same seed
73 | np.random.seed(self.random_seed)
74 | thread = multiprocessing.Process(target=data_generator_task)
75 | thread.daemon = True
76 | if self.random_seed is not None:
77 | self.random_seed += 1
78 | else:
79 | thread = threading.Thread(target=data_generator_task)
80 | self._threads.append(thread)
81 | thread.start()
82 | except:
83 | self.stop()
84 | raise
85 |
86 | def is_running(self):
87 | return self._stop_event is not None and not self._stop_event.is_set()
88 |
89 | def stop(self, timeout=None):
90 | """Stops running threads and wait for them to exit, if necessary.
91 |
92 | Should be called by the same thread which called `start()`.
93 |
94 | # Arguments
95 | timeout: maximum time to wait on `thread.join()`.
96 | """
97 | if self.is_running():
98 | self._stop_event.set()
99 |
100 | for thread in self._threads:
101 | if thread.is_alive():
102 | if self._use_multiprocessing:
103 | thread.terminate()
104 | else:
105 | thread.join(timeout)
106 |
107 | if self._use_multiprocessing:
108 | if self.queue is not None:
109 | self.queue.close()
110 |
111 | self._threads = []
112 | self._stop_event = None
113 | self.queue = None
114 |
115 | def get(self):
116 | """Creates a generator to extract data from the queue.
117 |
118 | Skip the data if it is `None`.
119 |
120 | # Returns
121 | A generator
122 | """
123 | while self.is_running():
124 | if not self.queue.empty():
125 | inputs = self.queue.get()
126 | if inputs is not None:
127 | yield inputs
128 | else:
129 | time.sleep(self.wait_time)
--------------------------------------------------------------------------------
/code/ch25_身份证汉字和数字识别/back_all/east_part/lanms/.gitignore:
--------------------------------------------------------------------------------
1 | adaptor.so
2 |
--------------------------------------------------------------------------------
/code/ch25_身份证汉字和数字识别/back_all/east_part/lanms/.ycm_extra_conf.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | #
3 | # Copyright (C) 2014 Google Inc.
4 | #
5 | # This file is part of YouCompleteMe.
6 | #
7 | # YouCompleteMe is free software: you can redistribute it and/or modify
8 | # it under the terms of the GNU General Public License as published by
9 | # the Free Software Foundation, either version 3 of the License, or
10 | # (at your option) any later version.
11 | #
12 | # YouCompleteMe is distributed in the hope that it will be useful,
13 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 | # GNU General Public License for more details.
16 | #
17 | # You should have received a copy of the GNU General Public License
18 | # along with YouCompleteMe. If not, see .
19 |
20 | import os
21 | import sys
22 | import glob
23 | import ycm_core
24 |
25 | # These are the compilation flags that will be used in case there's no
26 | # compilation database set (by default, one is not set).
27 | # CHANGE THIS LIST OF FLAGS. YES, THIS IS THE DROID YOU HAVE BEEN LOOKING FOR.
28 | sys.path.append(os.path.dirname(__file__))
29 |
30 |
31 | BASE_DIR = os.path.dirname(os.path.realpath(__file__))
32 |
33 | from plumbum.cmd import python_config
34 |
35 |
36 | flags = [
37 | '-Wall',
38 | '-Wextra',
39 | '-Wnon-virtual-dtor',
40 | '-Winvalid-pch',
41 | '-Wno-unused-local-typedefs',
42 | '-std=c++11',
43 | '-x', 'c++',
44 | '-Iinclude',
45 | ] + python_config('--cflags').split()
46 |
47 |
48 | # Set this to the absolute path to the folder (NOT the file!) containing the
49 | # compile_commands.json file to use that instead of 'flags'. See here for
50 | # more details: http://clang.llvm.org/docs/JSONCompilationDatabase.html
51 | #
52 | # Most projects will NOT need to set this to anything; you can just change the
53 | # 'flags' list of compilation flags.
54 | compilation_database_folder = ''
55 |
56 | if os.path.exists( compilation_database_folder ):
57 | database = ycm_core.CompilationDatabase( compilation_database_folder )
58 | else:
59 | database = None
60 |
61 | SOURCE_EXTENSIONS = [ '.cpp', '.cxx', '.cc', '.c', '.m', '.mm' ]
62 |
63 | def DirectoryOfThisScript():
64 | return os.path.dirname( os.path.abspath( __file__ ) )
65 |
66 |
67 | def MakeRelativePathsInFlagsAbsolute( flags, working_directory ):
68 | if not working_directory:
69 | return list( flags )
70 | new_flags = []
71 | make_next_absolute = False
72 | path_flags = [ '-isystem', '-I', '-iquote', '--sysroot=' ]
73 | for flag in flags:
74 | new_flag = flag
75 |
76 | if make_next_absolute:
77 | make_next_absolute = False
78 | if not flag.startswith( '/' ):
79 | new_flag = os.path.join( working_directory, flag )
80 |
81 | for path_flag in path_flags:
82 | if flag == path_flag:
83 | make_next_absolute = True
84 | break
85 |
86 | if flag.startswith( path_flag ):
87 | path = flag[ len( path_flag ): ]
88 | new_flag = path_flag + os.path.join( working_directory, path )
89 | break
90 |
91 | if new_flag:
92 | new_flags.append( new_flag )
93 | return new_flags
94 |
95 |
96 | def IsHeaderFile( filename ):
97 | extension = os.path.splitext( filename )[ 1 ]
98 | return extension in [ '.h', '.hxx', '.hpp', '.hh' ]
99 |
100 |
101 | def GetCompilationInfoForFile( filename ):
102 | # The compilation_commands.json file generated by CMake does not have entries
103 | # for header files. So we do our best by asking the db for flags for a
104 | # corresponding source file, if any. If one exists, the flags for that file
105 | # should be good enough.
106 | if IsHeaderFile( filename ):
107 | basename = os.path.splitext( filename )[ 0 ]
108 | for extension in SOURCE_EXTENSIONS:
109 | replacement_file = basename + extension
110 | if os.path.exists( replacement_file ):
111 | compilation_info = database.GetCompilationInfoForFile(
112 | replacement_file )
113 | if compilation_info.compiler_flags_:
114 | return compilation_info
115 | return None
116 | return database.GetCompilationInfoForFile( filename )
117 |
118 |
119 | # This is the entry point; this function is called by ycmd to produce flags for
120 | # a file.
121 | def FlagsForFile( filename, **kwargs ):
122 | if database:
123 | # Bear in mind that compilation_info.compiler_flags_ does NOT return a
124 | # python list, but a "list-like" StringVec object
125 | compilation_info = GetCompilationInfoForFile( filename )
126 | if not compilation_info:
127 | return None
128 |
129 | final_flags = MakeRelativePathsInFlagsAbsolute(
130 | compilation_info.compiler_flags_,
131 | compilation_info.compiler_working_dir_ )
132 | else:
133 | relative_to = DirectoryOfThisScript()
134 | final_flags = MakeRelativePathsInFlagsAbsolute( flags, relative_to )
135 |
136 | return {
137 | 'flags': final_flags,
138 | 'do_cache': True
139 | }
140 |
141 |
--------------------------------------------------------------------------------
/code/ch25_身份证汉字和数字识别/back_all/east_part/lanms/Makefile:
--------------------------------------------------------------------------------
1 | CXXFLAGS = -I include -std=c++11 -O3 $(shell python3-config --cflags)
2 | LDFLAGS = $(shell python3-config --ldflags)
3 |
4 | DEPS = lanms.h $(shell find include -xtype f)
5 | CXX_SOURCES = adaptor.cpp include/clipper/clipper.cpp
6 |
7 | LIB_SO = adaptor.so
8 |
9 | $(LIB_SO): $(CXX_SOURCES) $(DEPS)
10 | $(CXX) -o $@ $(CXXFLAGS) $(LDFLAGS) $(CXX_SOURCES) --shared -fPIC
11 |
12 | clean:
13 | rm -rf $(LIB_SO)
14 |
--------------------------------------------------------------------------------
/code/ch25_身份证汉字和数字识别/back_all/east_part/lanms/__init__.py:
--------------------------------------------------------------------------------
1 | import subprocess
2 | import os
3 | import numpy as np
4 |
5 | BASE_DIR = os.path.dirname(os.path.realpath(__file__))
6 |
7 | if subprocess.call(['make', '-C', BASE_DIR]) != 0: # return value
8 | raise RuntimeError('Cannot compile lanms: {}'.format(BASE_DIR))
9 |
10 |
11 | def merge_quadrangle_n9(polys, thres=0.3, precision=10000):
12 | from .adaptor import merge_quadrangle_n9 as nms_impl
13 | if len(polys) == 0:
14 | return np.array([], dtype='float32')
15 | p = polys.copy()
16 | p[:,:8] *= precision
17 | ret = np.array(nms_impl(p, thres), dtype='float32')
18 | ret[:,:8] /= precision
19 | return ret
20 |
21 |
--------------------------------------------------------------------------------
/code/ch25_身份证汉字和数字识别/back_all/east_part/lanms/__main__.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 |
4 | from . import merge_quadrangle_n9
5 |
6 | if __name__ == '__main__':
7 | # unit square with confidence 1
8 | q = np.array([0, 0, 0, 1, 1, 1, 1, 0, 1], dtype='float32')
9 |
10 | print(merge_quadrangle_n9(np.array([q, q + 0.1, q + 2])))
11 |
--------------------------------------------------------------------------------
/code/ch25_身份证汉字和数字识别/back_all/east_part/lanms/adaptor.cpp:
--------------------------------------------------------------------------------
1 | #include "pybind11/pybind11.h"
2 | #include "pybind11/numpy.h"
3 | #include "pybind11/stl.h"
4 | #include "pybind11/stl_bind.h"
5 |
6 | #include "lanms.h"
7 |
8 | namespace py = pybind11;
9 |
10 |
11 | namespace lanms_adaptor {
12 |
13 | std::vector> polys2floats(const std::vector &polys) {
14 | std::vector> ret;
15 | for (size_t i = 0; i < polys.size(); i ++) {
16 | auto &p = polys[i];
17 | auto &poly = p.poly;
18 | ret.emplace_back(std::vector{
19 | float(poly[0].X), float(poly[0].Y),
20 | float(poly[1].X), float(poly[1].Y),
21 | float(poly[2].X), float(poly[2].Y),
22 | float(poly[3].X), float(poly[3].Y),
23 | float(p.score),
24 | });
25 | }
26 |
27 | return ret;
28 | }
29 |
30 |
31 | /**
32 | *
33 | * \param quad_n9 an n-by-9 numpy array, where first 8 numbers denote the
34 | * quadrangle, and the last one is the score
35 | * \param iou_threshold two quadrangles with iou score above this threshold
36 | * will be merged
37 | *
38 | * \return an n-by-9 numpy array, the merged quadrangles
39 | */
40 | std::vector> merge_quadrangle_n9(
41 | py::array_t quad_n9,
42 | float iou_threshold) {
43 | auto pbuf = quad_n9.request();
44 | if (pbuf.ndim != 2 || pbuf.shape[1] != 9)
45 | throw std::runtime_error("quadrangles must have a shape of (n, 9)");
46 | auto n = pbuf.shape[0];
47 | auto ptr = static_cast(pbuf.ptr);
48 | return polys2floats(lanms::merge_quadrangle_n9(ptr, n, iou_threshold));
49 | }
50 |
51 | }
52 |
53 | PYBIND11_PLUGIN(adaptor) {
54 | py::module m("adaptor", "NMS");
55 |
56 | m.def("merge_quadrangle_n9", &lanms_adaptor::merge_quadrangle_n9,
57 | "merge quadrangels");
58 |
59 | return m.ptr();
60 | }
61 |
62 |
--------------------------------------------------------------------------------
/code/ch25_身份证汉字和数字识别/back_all/east_part/lanms/include/clipper/clipper.cpp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chinapnr/How-to-Python-and-Machine-Learning-book-code/fbc8a0cb507eec3c0758e238e5a2eb9b99f987b6/code/ch25_身份证汉字和数字识别/back_all/east_part/lanms/include/clipper/clipper.cpp
--------------------------------------------------------------------------------
/code/ch25_身份证汉字和数字识别/back_all/east_part/lanms/include/pybind11/buffer_info.h:
--------------------------------------------------------------------------------
1 | /*
2 | pybind11/buffer_info.h: Python buffer object interface
3 |
4 | Copyright (c) 2016 Wenzel Jakob
5 |
6 | All rights reserved. Use of this source code is governed by a
7 | BSD-style license that can be found in the LICENSE file.
8 | */
9 |
10 | #pragma once
11 |
12 | #include "common.h"
13 |
14 | NAMESPACE_BEGIN(pybind11)
15 |
16 | /// Information record describing a Python buffer object
17 | struct buffer_info {
18 | void *ptr = nullptr; // Pointer to the underlying storage
19 | ssize_t itemsize = 0; // Size of individual items in bytes
20 | ssize_t size = 0; // Total number of entries
21 | std::string format; // For homogeneous buffers, this should be set to format_descriptor::format()
22 | ssize_t ndim = 0; // Number of dimensions
23 | std::vector shape; // Shape of the tensor (1 entry per dimension)
24 | std::vector strides; // Number of entries between adjacent entries (for each per dimension)
25 |
26 | buffer_info() { }
27 |
28 | buffer_info(void *ptr, ssize_t itemsize, const std::string &format, ssize_t ndim,
29 | detail::any_container shape_in, detail::any_container strides_in)
30 | : ptr(ptr), itemsize(itemsize), size(1), format(format), ndim(ndim),
31 | shape(std::move(shape_in)), strides(std::move(strides_in)) {
32 | if (ndim != (ssize_t) shape.size() || ndim != (ssize_t) strides.size())
33 | pybind11_fail("buffer_info: ndim doesn't match shape and/or strides length");
34 | for (size_t i = 0; i < (size_t) ndim; ++i)
35 | size *= shape[i];
36 | }
37 |
38 | template
39 | buffer_info(T *ptr, detail::any_container shape_in, detail::any_container strides_in)
40 | : buffer_info(private_ctr_tag(), ptr, sizeof(T), format_descriptor::format(), static_cast(shape_in->size()), std::move(shape_in), std::move(strides_in)) { }
41 |
42 | buffer_info(void *ptr, ssize_t itemsize, const std::string &format, ssize_t size)
43 | : buffer_info(ptr, itemsize, format, 1, {size}, {itemsize}) { }
44 |
45 | template
46 | buffer_info(T *ptr, ssize_t size)
47 | : buffer_info(ptr, sizeof(T), format_descriptor::format(), size) { }
48 |
49 | explicit buffer_info(Py_buffer *view, bool ownview = true)
50 | : buffer_info(view->buf, view->itemsize, view->format, view->ndim,
51 | {view->shape, view->shape + view->ndim}, {view->strides, view->strides + view->ndim}) {
52 | this->view = view;
53 | this->ownview = ownview;
54 | }
55 |
56 | buffer_info(const buffer_info &) = delete;
57 | buffer_info& operator=(const buffer_info &) = delete;
58 |
59 | buffer_info(buffer_info &&other) {
60 | (*this) = std::move(other);
61 | }
62 |
63 | buffer_info& operator=(buffer_info &&rhs) {
64 | ptr = rhs.ptr;
65 | itemsize = rhs.itemsize;
66 | size = rhs.size;
67 | format = std::move(rhs.format);
68 | ndim = rhs.ndim;
69 | shape = std::move(rhs.shape);
70 | strides = std::move(rhs.strides);
71 | std::swap(view, rhs.view);
72 | std::swap(ownview, rhs.ownview);
73 | return *this;
74 | }
75 |
76 | ~buffer_info() {
77 | if (view && ownview) { PyBuffer_Release(view); delete view; }
78 | }
79 |
80 | private:
81 | struct private_ctr_tag { };
82 |
83 | buffer_info(private_ctr_tag, void *ptr, ssize_t itemsize, const std::string &format, ssize_t ndim,
84 | detail::any_container &&shape_in, detail::any_container &&strides_in)
85 | : buffer_info(ptr, itemsize, format, ndim, std::move(shape_in), std::move(strides_in)) { }
86 |
87 | Py_buffer *view = nullptr;
88 | bool ownview = false;
89 | };
90 |
91 | NAMESPACE_BEGIN(detail)
92 |
93 | template struct compare_buffer_info {
94 | static bool compare(const buffer_info& b) {
95 | return b.format == format_descriptor::format() && b.itemsize == (ssize_t) sizeof(T);
96 | }
97 | };
98 |
99 | template struct compare_buffer_info::value>> {
100 | static bool compare(const buffer_info& b) {
101 | return (size_t) b.itemsize == sizeof(T) && (b.format == format_descriptor::value ||
102 | ((sizeof(T) == sizeof(long)) && b.format == (std::is_unsigned::value ? "L" : "l")) ||
103 | ((sizeof(T) == sizeof(size_t)) && b.format == (std::is_unsigned::value ? "N" : "n")));
104 | }
105 | };
106 |
107 | NAMESPACE_END(detail)
108 | NAMESPACE_END(pybind11)
109 |
--------------------------------------------------------------------------------
/code/ch25_身份证汉字和数字识别/back_all/east_part/lanms/include/pybind11/complex.h:
--------------------------------------------------------------------------------
1 | /*
2 | pybind11/complex.h: Complex number support
3 |
4 | Copyright (c) 2016 Wenzel Jakob
5 |
6 | All rights reserved. Use of this source code is governed by a
7 | BSD-style license that can be found in the LICENSE file.
8 | */
9 |
10 | #pragma once
11 |
12 | #include "pybind11.h"
13 | #include
14 |
15 | /// glibc defines I as a macro which breaks things, e.g., boost template names
16 | #ifdef I
17 | # undef I
18 | #endif
19 |
20 | NAMESPACE_BEGIN(pybind11)
21 |
22 | template struct format_descriptor, detail::enable_if_t::value>> {
23 | static constexpr const char c = format_descriptor::c;
24 | static constexpr const char value[3] = { 'Z', c, '\0' };
25 | static std::string format() { return std::string(value); }
26 | };
27 |
28 | template constexpr const char format_descriptor<
29 | std::complex, detail::enable_if_t::value>>::value[3];
30 |
31 | NAMESPACE_BEGIN(detail)
32 |
33 | template struct is_fmt_numeric, detail::enable_if_t::value>> {
34 | static constexpr bool value = true;
35 | static constexpr int index = is_fmt_numeric::index + 3;
36 | };
37 |
38 | template class type_caster> {
39 | public:
40 | bool load(handle src, bool convert) {
41 | if (!src)
42 | return false;
43 | if (!convert && !PyComplex_Check(src.ptr()))
44 | return false;
45 | Py_complex result = PyComplex_AsCComplex(src.ptr());
46 | if (result.real == -1.0 && PyErr_Occurred()) {
47 | PyErr_Clear();
48 | return false;
49 | }
50 | value = std::complex((T) result.real, (T) result.imag);
51 | return true;
52 | }
53 |
54 | static handle cast(const std::complex &src, return_value_policy /* policy */, handle /* parent */) {
55 | return PyComplex_FromDoubles((double) src.real(), (double) src.imag());
56 | }
57 |
58 | PYBIND11_TYPE_CASTER(std::complex, _("complex"));
59 | };
60 | NAMESPACE_END(detail)
61 | NAMESPACE_END(pybind11)
62 |
--------------------------------------------------------------------------------
/code/ch25_身份证汉字和数字识别/back_all/east_part/lanms/include/pybind11/eval.h:
--------------------------------------------------------------------------------
1 | /*
2 | pybind11/exec.h: Support for evaluating Python expressions and statements
3 | from strings and files
4 |
5 | Copyright (c) 2016 Klemens Morgenstern and
6 | Wenzel Jakob
7 |
8 | All rights reserved. Use of this source code is governed by a
9 | BSD-style license that can be found in the LICENSE file.
10 | */
11 |
12 | #pragma once
13 |
14 | #include "pybind11.h"
15 |
16 | NAMESPACE_BEGIN(pybind11)
17 |
18 | enum eval_mode {
19 | /// Evaluate a string containing an isolated expression
20 | eval_expr,
21 |
22 | /// Evaluate a string containing a single statement. Returns \c none
23 | eval_single_statement,
24 |
25 | /// Evaluate a string containing a sequence of statement. Returns \c none
26 | eval_statements
27 | };
28 |
29 | template
30 | object eval(str expr, object global = globals(), object local = object()) {
31 | if (!local)
32 | local = global;
33 |
34 | /* PyRun_String does not accept a PyObject / encoding specifier,
35 | this seems to be the only alternative */
36 | std::string buffer = "# -*- coding: utf-8 -*-\n" + (std::string) expr;
37 |
38 | int start;
39 | switch (mode) {
40 | case eval_expr: start = Py_eval_input; break;
41 | case eval_single_statement: start = Py_single_input; break;
42 | case eval_statements: start = Py_file_input; break;
43 | default: pybind11_fail("invalid evaluation mode");
44 | }
45 |
46 | PyObject *result = PyRun_String(buffer.c_str(), start, global.ptr(), local.ptr());
47 | if (!result)
48 | throw error_already_set();
49 | return reinterpret_steal