├── .gitignore
├── Preprocessing dataset.html
├── Preprocessing dataset.ipynb
├── README.md
├── Training Model.html
├── Training Model.ipynb
├── _config.yml
├── model.h5
├── model.json
├── test.png
├── wechat_digit_recognition.py
└── wx.py
/.gitignore:
--------------------------------------------------------------------------------
1 | .idea
2 | *.png
3 | *.jpg
4 | itchat.pkl
5 | *.tgz
6 | English
7 | train
8 | valid
9 |
--------------------------------------------------------------------------------
/Preprocessing dataset.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# 下载数据集\n",
8 | "\n",
9 | "数据集来自 http://www.ee.surrey.ac.uk/CVSSP/demos/chars74k/\n",
10 | "\n",
11 | "我们下载的是**EnglishFnt.tgz**,是印刷体数字加大小写字母。\n",
12 | "\n",
13 | "* [tqdm](https://github.com/tqdm/tqdm) 是一个进度条的库。\n",
14 | "* [requests](http://docs.python-requests.org/en/master/) 是一个对人类友好的 HTTP 库。"
15 | ]
16 | },
17 | {
18 | "cell_type": "code",
19 | "execution_count": 1,
20 | "metadata": {
21 | "collapsed": false
22 | },
23 | "outputs": [
24 | {
25 | "name": "stderr",
26 | "output_type": "stream",
27 | "text": [
28 | "48651KB [01:38, 492.01KB/s] \n"
29 | ]
30 | }
31 | ],
32 | "source": [
33 | "import requests\n",
34 | "from tqdm import tqdm\n",
35 | "import os\n",
36 | "\n",
37 | "fileurl = 'http://www.ee.surrey.ac.uk/CVSSP/demos/chars74k/EnglishFnt.tgz'\n",
38 | "filename = 'EnglishFnt.tgz'\n",
39 | "if not os.path.exists(filename):\n",
40 | " r = requests.get(fileurl, stream=True)\n",
41 | " with open(filename, 'wb') as f:\n",
42 | " for chunk in tqdm(r.iter_content(1024), unit='KB', total=int(r.headers['Content-Length'])/1024): \n",
43 | " f.write(chunk)"
44 | ]
45 | },
46 | {
47 | "cell_type": "markdown",
48 | "metadata": {},
49 | "source": [
50 | "# 解压数据集\n",
51 | "\n",
52 | "* [tarfile](https://docs.python.org/2/library/tarfile.html) 是 Python 自带的操作 tar 文件的库。\n",
53 | "* [shutil](https://docs.python.org/2/library/shutil.html) 是 Python 自带的高级文件操作库。\n"
54 | ]
55 | },
56 | {
57 | "cell_type": "code",
58 | "execution_count": 2,
59 | "metadata": {
60 | "collapsed": false
61 | },
62 | "outputs": [
63 | {
64 | "name": "stdout",
65 | "output_type": "stream",
66 | "text": [
67 | "loading\n"
68 | ]
69 | },
70 | {
71 | "name": "stderr",
72 | "output_type": "stream",
73 | "text": [
74 | "100%|██████████| 63055/63055 [00:11<00:00, 5350.48it/s]\n"
75 | ]
76 | }
77 | ],
78 | "source": [
79 | "import tarfile\n",
80 | "import shutil\n",
81 | "\n",
82 | "def mkdir(path):\n",
83 | " if not os.path.exists(path):\n",
84 | " os.makedirs(path)\n",
85 | "\n",
86 | "def rmdir(path):\n",
87 | " if os.path.exists(path):\n",
88 | " shutil.rmtree(path)\n",
89 | "\n",
90 | "with tarfile.open(filename, 'r') as tfile:\n",
91 | " print 'loading'\n",
92 | " members = tfile.getmembers()\n",
93 | " for member in tqdm(members):\n",
94 | " if tarfile.TarInfo.isdir(member):\n",
95 | " mkdir(member.name)\n",
96 | " continue\n",
97 | " with open(member.name, 'wb') as f:\n",
98 | " f.write(tfile.extractfile(member).read())"
99 | ]
100 | },
101 | {
102 | "cell_type": "markdown",
103 | "metadata": {},
104 | "source": [
105 | "# 分类数据集\n",
106 | "\n",
107 | "数据集有数字和大小写字母,但是我们只需要0123456789和非数字。\n",
108 | "\n",
109 | "因此将 A~Z,a~z 的图片移到 A 的文件夹,再将其他空文件夹删除。"
110 | ]
111 | },
112 | {
113 | "cell_type": "code",
114 | "execution_count": 3,
115 | "metadata": {
116 | "collapsed": false
117 | },
118 | "outputs": [
119 | {
120 | "name": "stderr",
121 | "output_type": "stream",
122 | "text": [
123 | "100%|██████████| 51/51 [00:03<00:00, 12.79it/s]\n"
124 | ]
125 | }
126 | ],
127 | "source": [
128 | "notnumdir = 'English/Fnt/Sample011/'\n",
129 | "for i in tqdm(range(12, 63)):\n",
130 | " path = 'English/Fnt/Sample%03d/' % i\n",
131 | " for filename in os.listdir(path):\n",
132 | " os.rename(path+filename, notnumdir+filename)\n",
133 | " os.rmdir(path)"
134 | ]
135 | },
136 | {
137 | "cell_type": "markdown",
138 | "metadata": {},
139 | "source": [
140 | "# 预处理数据集\n",
141 | "\n",
142 | "我们需要将这里的图片裁切为28*28,以便于输入到神经网络中。\n",
143 | "\n",
144 | "## 首先测试一张图"
145 | ]
146 | },
147 | {
148 | "cell_type": "code",
149 | "execution_count": 4,
150 | "metadata": {
151 | "collapsed": false
152 | },
153 | "outputs": [],
154 | "source": [
155 | "import cv2\n",
156 | "import numpy as np\n",
157 | "\n",
158 | "def resize(rawimg): # resize img to 28*28\n",
159 | " fx = 28.0 / rawimg.shape[0]\n",
160 | " fy = 28.0 / rawimg.shape[1]\n",
161 | " fx = fy = min(fx, fy)\n",
162 | " img = cv2.resize(rawimg, None, fx=fx, fy=fy, interpolation=cv2.INTER_CUBIC)\n",
163 | " outimg = np.ones((28, 28), dtype=np.uint8) * 255\n",
164 | " w = img.shape[1]\n",
165 | " h = img.shape[0]\n",
166 | " x = (28 - w) / 2\n",
167 | " y = (28 - h) / 2\n",
168 | " outimg[y:y+h, x:x+w] = img\n",
169 | " return outimg\n",
170 | "\n",
171 | "def convert(imgpath):\n",
172 | " img = cv2.imread(imgpath)\n",
173 | " gray = cv2.imread(imgpath, cv2.IMREAD_GRAYSCALE)\n",
174 | " bw = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY_INV, 25, 25)\n",
175 | " img2, ctrs, hier = cv2.findContours(bw.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)\n",
176 | " rects = [cv2.boundingRect(ctr) for ctr in ctrs]\n",
177 | " x, y, w, h = rects[-1]\n",
178 | " roi = gray[y:y+h, x:x+w]\n",
179 | " return resize(roi)"
180 | ]
181 | },
182 | {
183 | "cell_type": "code",
184 | "execution_count": 5,
185 | "metadata": {
186 | "collapsed": false
187 | },
188 | "outputs": [
189 | {
190 | "data": {
191 | "text/plain": [
192 | ""
193 | ]
194 | },
195 | "execution_count": 5,
196 | "metadata": {},
197 | "output_type": "execute_result"
198 | },
199 | {
200 | "data": {
201 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAgQAAAEBCAYAAAAHJ724AAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAAPYQAAD2EBqD+naQAAIABJREFUeJzt3X90XHWd//HnOz+m+dWkaUtboMD2B8WqK5AgWBV0rbsF\n/Fr1688ox6Me9+svPHzz3f2i7LpfWTnruuyRsijssrrrqkA8SL9+gbPYakVZQEqPtIBAASutBUqS\ntvmdTJLJzPv7x50Jk2l+NnPnTpLX45x7krn3M5lXJjNz3/ncz/1cc3dERERkYSuJOoCIiIhETwWB\niIiIqCAQERERFQQiIiKCCgIRERFBBYGIiIiggkBERERQQSAiIiKoIBARERFUEIiIiAgRFwRm9gUz\nO2hmcTPbbWZvjDKPiIjIQhVZQWBmHwa+CXwVOB94AthpZsujyiQiIrJQWVQXNzKz3cCj7n5V+rYB\nLwI3ufv1kYQSERFZoCLpITCzcqAR+EVmnQeVyS5gUxSZRCR8OkwoUrzKInrc5UAp0Jazvg04J7ex\nmS0DtgCHgMGww4nMYxXAHwE73f14IR846zDh/wD2AM0Ehwk3uPuxcdrrfS+SH9N630dVEEzEgPGO\nYWwBbi9wFpH57GPAHQV+zGbgVnf/AYCZfRZ4F/ApYLzDhHrfi+TXpO/7qAqCY0ASWJmzfgUn9hpA\n8B8CtbW1nHfeeWM2bNmyhUsvvTSEiONrbm5m27ZtBXs8ZVCGk82wY8cOdu7cOWZdb28v+/btg/R7\nqlCyDhN+PbPO3d3MJjtMeAjgtttuY+PGjUXxXI9ntrlGRkZIJBKjX7OXoaEh2tvbaWtro62tjfb2\n9tHb7e3tDA6q4yRbTU0Np59++gnL6tWrOf300yktLR33fvP1tZWxf/9+rrjiCpjifR9JQeDuCTN7\nDNgM3AOjgwo3AzeNc5dBgPPOO48HHnigYDnHU1dXR0NDgzIoQ9FnaGho4K/+6q/GrNu7dy+NjY1Q\n+C74GR0mTBsE2LhxIw0NDUXxXI9ntrkSiQTDw8OjX7OXeDzO4sWLWbRoEWZGMpkkHo/T3d1NSYmm\nkclVWlpKVVUV9fX1rFy5kjPPPJO1a9eOLhMVBPP1tTWOSd/3UR4yuAH4frowyBxPrAL+I8JMIlJY\nEx0mXDBSqRQjIyMMDQ0Rj8cZHBxkcHCQeDxOf38/XV1d9PT0MDAwwODgIMPDwySTSaI6Q0zmr8gK\nAne/Mz3nwNcIDh08Dmxx96NRZRKR0Mz0MOGo5uZm6urq2LNnD1u3bgWgqamJpqamUIIWWjKZZHh4\nmP7+fvr6+sYsvb29tLe3c+zYMTo7O+nt7SUej5NIJFQQyLhaWlpoaWkZs667u3ta9410UKG73wLc\nEmUGEQnfSRwmHLVt2zYaGhrYunUr99xzT/hhCyyZTDI0NMTAwAA9PT10d3fT1dVFd3c3nZ2ddHR0\n0NHRQVdXF319faO9BKlUKuroUoTGK5azDhVOqtjOMpjUli1boo5QFP+VKIMyFFuGaZrVYcJi/T1n\nmyuVSjE8PMzAwAC9vb10dHRw/Pjx0SVTJPT09KiHICTz9bU1U5HNVDgTZtYAPPbYY48V5cAPkbki\n6z+FRnffW+jHN7PPA1fz6mHCL7r7byZouyDe9x0dHaNnEeQuR48eZWBggP7+fvr7+0/4fmRkJOr4\nRaWurm50AOGaNWvGDCicbFDhfDfd9/2c6iEQkblNhwlPlBlDkDlk0NHRQXt7O6+88gptbW0MDQ2d\nsCQSCR0ykLxTQSAiEqHcMQSdnZ0cPXqUV155hVdeeYWRkRGSyeTokrk9F3p3ZW5RQSAiEqFUKkUi\nkSAej9PX10dXVxfHjx+nvb2d1tbWGf2sYJzm+LdztxW7TMGT+1XCo4JARGSOMjNKS0spKSmhpKRk\n9Pvxvua2K9YCIZlMnjBjY2bRmIlwqSAQEZmjMgVBeXk5ZWVllJeXT/v7Yp3pMDNDY2YZGBggHo/j\n7ioIQqaCQERkjiopKRndyS9atGjKJRaLjX5fVlacH/+Dg4P09PSMLqWlpbg7iURC124IWXG+IkRE\nZEqZHoJYLEZFRQVVVVVUVlaOu+RuKy8vjzr+uPr7+zl+/DgVFRWjRUtmjIWESwWBiMgclekhyC4I\nqqurqampobq6esz3uetisVjU8cfV09NDRUXFaMGSSCQYGBgo2h6N+UTPsIjIHGVmJxQEixcvZvHi\nxdTW1o5+zSzZ6ysqKqKOP67Ozs7RCYSy52dQQRA+PcMiInPUeIcMampqqKurY8mSJScs2esrKyuj\njj+uY8eOjQ4gzBQD2T0GEh4VBCIic5SZjZ5KWFZWNjrAMBaLjQ4gzD2UkOklqKqqijr+uIaHh6mp\nqaGqqoqKigpisRhlZWVFe5rkfFKc552IiMi0uPsJSyqVGne9yGRUEIiIzAMqCmS2VBCIiMxR2dP6\njlcQ5BYF2fcRyaWCQERkDsvewU9UCKgYkOnQoMIF4MUXX+TFF1+kv7+fRCIRdZxZKS8vp6amhjPO\nOIPVq1dHHUekKOTu9CfrIRCZiAqCBeDhhx/m9ttv5+DBg3R1dUUdZ1aWLl3K2rVrueKKK/jABz4Q\ndRyRojHeYYOJxhKIjEcFwTyUSCQ4fPgwzz//PE8++SSPPPII+/bt4/jx43N+LvCOjg66urowMw4c\nOMBrXvMaNmzYwNlnn63zlGVBmu74ARUCMhUVBPNMIpGgu7ubffv2cd9997F9+3Z6enqijpU38Xh8\n9BDIrl27uPTSS7nssstYtmwZ9fX1RTsdq0gxUFEgk1FBMM8cPnyYxx9/nO3bt7N7924GBgaijhSa\nwcFBdu/ezcjICGbGm970JjZu3Bh1LBGROSnvBYGZXQO8D3gNEAd+DXzJ3Z/ParMIuAH4MLAI2Al8\n3t3b851noRgaGqK7u5snnniCnTt3smfPHg4dOhR1rFCNjIzw0ksvYWbEYjEqKytZsWIFixcvVk+B\niMgMhXHa4cXAt4CLgHcC5cDPzCx74uwbgXcB7wcuAU4DtoeQZcHo7e1l//793H///dx7770cOXIk\n6kgF09bWxq5du3jkkUc4cOAA/f39UUcSEZlz8t5D4O6XZ982s08A7UAj8JCZ1QKfAj7i7g+k23wS\n2G9mF7r7nnxnWgh6enr47W9/y4EDB+jo6GBkZCTqSAWTSCTo6enhhRde4KmnnmLVqlXU19dHHUtE\nZE4pxMRESwAHOtK3GwkKkV9kGrj7c8BhYFMB8sxLvb29PPvssxw5coTh4WFSqVTUkQomc2W0l19+\nmaeffpqOjg4SiYQGUImIzECoBYEFl6e6EXjI3Z9Jr14FDLt77tD3tvQ2OQmJRIKurq55PYhwKseO\nHeN3v/sdv/vd72htbV1QRZGIyGyF3UNwC/BaoGkabY2gJ0FmIJVK0dvby/Hjxzl27NiCLgh6eno4\ncuQIR48epaenRz0EIiIzENpph2b2beBy4GJ3zx7h1grEzKw2p5dgBUEvwYSam5upq6sbs66pqYmm\npunUG/NTKpWitbWVQ4cO0dbWtqAH1A0MDNDZ2Uk8HieZTC74gqClpYWWlpYx67q7uyNKIyLFLpSC\nIF0MvAd4m7sfztn8GDACbAZ+km6/ATgTeGSyn7tt2zYaGhryH3gOc3cGBgbo7e0lHo/P+WsVzEYq\nlWJ4eJi+vj4GBgYWfEEwXrG8d+9eGhsbI0okIsUsjHkIbiE4RLAV6DezlelN3e4+6O49ZvZvwA1m\n1gn0AjcBD+sMg5lzd4aHhxkcHJzxYEIzo7y8nOrqapYsWUIw5CN67k5PTw/9/f0MDQ1Ne8fu7iST\nSeLx+Jyfonm+MbOvAl/NWf2su782ijwicqIwegg+SzAW4Fc56z8J/CD9fTOQBO4imJhoB/CFELLI\nJMrLyznjjDN461vfygc/+EEqKyunvlMBxONxfvrTn/Lwww/z7LPPauc+fzxF0DOYqTwXzrmxInNA\nGPMQTDlQ0d2HgC+mF4lISUkJNTU1rFmzhre97W3U1NREHQmAvr4+Dhw4wDPPPENpaWnUcSR/Rtz9\naNQhRGR8hZiHQEQE4Gwze9nMfm9mt5nZGVEHEpFXqSAQkULYDXwC2EJwWHEN8F9mVh1lKBF5la52\nKCKhc/edWTefMrM9wB+ADwHfiyaViGRTQSAiBefu3Wb2PLB+qraaf0Rk+mYz/4gKAhEpODOrAdbx\n6plHE9L8IyLTN5v5RzSGQERCZ2b/aGaXmNlZZvZmgknJRoCWKe4qIgWiHgIRKYTVwB3AMuAo8BDw\nJnc/HmkqERmlgkBEQufuOuAvUuR0yEBERERUEIiIiIgKAhEREUEFgYiIiKCCQERERFBBICIiIqgg\nEBEREVQQiIiICCoIREREBBUEIiIiggoCERERQQWBiIiIoIJAREREUEEgIiIiFKAgMLNrzCxlZjdk\nrVtkZjeb2TEz6zWzu8xsRdhZREREZHyhFgRm9kbgz4EncjbdCLwLeD9wCXAasD3MLCIiIjKx0AoC\nM6sBbgM+DXRlra8FPgU0u/sD7r4P+CTwFjO7MKw8IiIiMrEwewhuBu519/tz1l8AlAG/yKxw9+eA\nw8CmEPOIiIjIBMrC+KFm9hHgPIKdf66VwLC79+SsbwNWhZFHREREJpf3gsDMVhOMEfhTd0/M5K6A\n5zuPiIiITC2MHoJG4BTgMTOz9LpS4BIzuxK4FFhkZrU5vQQrCHoJJtTc3ExdXd2YdU1NTTQ1NeUt\nvMh80dLSQktLy5h13d3dEaURkWIXRkGwC/jjnHX/AewHvgG8DCSAzcBPAMxsA3Am8MhkP3jbtm00\nNDTkOa7I/DResbx3714aGxsjSiQixSzvBYG79wPPZK8zs37guLvvT9/+N+AGM+sEeoGbgIfdfU++\n84iIiMjUQhlUOI7csQHNQBK4C1gE7AC+UKAsIiIikqMgBYG7vyPn9hDwxfQiIiIiEdO1DEREREQF\ngYjMnpldbGb3mNnL6WuXbB2nzdfM7IiZDZjZz81sfRRZRWR8KghEJB+qgccJxgKdMJ+ImX0JuBL4\nDHAh0A/sNLNYIUOKyMQKNahQROYxd99BMDiYrPlHsl0FXOfu96bbfJxg3pH3AncWKqeITEw9BCIS\nKjNbQzAtefb1S3qAR9H1S0SKhgoCEQnbKoLDCLkzker6JSJFRAWBiERF1y8RKSIaQyAiYWsl2Pmv\nZGwvwQpg31R31jVMRKZvNtcwUUEgIqFy94Nm1kpw/ZInAcysFrgIuHmq++saJiLTN5trmKggEJFZ\nM7NqYD1BTwDAWjM7F+hw9xcJLon+FTM7ABwCrgNeAu6OIK6IjEMFgYjkwwXALwnGBDjwzfT67wOf\ncvfrzawKuBVYAjwIXObuw1GEFZETqSAQkVlz9weYYpCyu18LXFuIPCIyczrLQERERFQQiIiIiAoC\nERERQQWBiIiIoIJAREREUEEgIiIiqCAQERERVBCIiIgIKghEREQEFQQiIiJCSAWBmZ1mZj80s2Nm\nNmBmT5hZQ06br5nZkfT2n5vZ+jCyiIiIyNTyXhCY2RLgYWAI2AJsBP4C6Mxq8yXgSuAzwIVAP7DT\nzGL5ziMiIiJTC+PiRl8GDrv7p7PW/SGnzVXAde5+L4CZfRxoA94L3BlCJhEREZlEGIcM3g38xszu\nNLM2M9trZqPFgZmtAVYBv8isc/ce4FFgUwh5REREZAphFARrgc8BzwF/BvwLcJOZXZHevorgeult\nOfdrS28TERGRAgvjkEEJsMfd/yZ9+wkzex1BkXDbJPczgkJBRERECiyMguAVYH/Ouv3Af09/30qw\n81/J2F6CFcC+yX5wc3MzdXV1Y9Y1NTXR1NQ0m7wi81JLSwstLS1j1nV3d0eURkSKXRgFwcPAOTnr\nziE9sNDdD5pZK7AZeBLAzGqBi4CbJ/vB27Zto6GhYbImIpI2XrG8d+9eGhsbI0okIsUsjIJgG/Cw\nmV1DcMbARcCngT/PanMj8BUzOwAcAq4DXgLuDiGPiIiITCHvBYG7/8bM3gd8A/gb4CBwlbv/KKvN\n9WZWBdwKLAEeBC5z9+F85xEREZGphdFDgLvfB9w3RZtrgWvDeHwRERGZGV3LQERERFQQiIiIiAoC\nERERQQWBiIiIoIJAREREUEEgIiIiqCAQkTwws4vN7B4ze9nMUma2NWf799Lrs5dJT00WkcJSQSAi\n+VANPA58gYkvUvZTgmuYrEovugiJSBEJZWIiEVlY3H0HsAPAzGyCZkPufrRwqURkJtRDICKF8nYz\nazOzZ83sFjNbGnUgEXmVeghEpBB+CmwnuLbJOuDvgfvMbJO7T3SIQUQKSAWBiITO3e/Muvm0mf0W\n+D3wduCXkYQSkTFUEIhIwbn7QTM7BqxnioKgubmZurq6MeuamppoatKYRJFcLS0ttLS0jFnX3d09\nrfuqIBCRgjOz1cAy4JWp2m7bto2GhobwQ4nMA+MVy3v37qWxsXHK+6ogEJFZM7Nqgv/2M2cYrDWz\nc4GO9PJVgjEErel2/wA8D+wsfFoRGY8KAhHJhwsIuv49vXwzvf77wOeBNwAfB5YARwgKgf/j7onC\nRxWR8aggEJFZc/cHmPw05ksLlUVETo7mIRAREREVBCIiIqKCQERERFBBICIiIqggEBEREUIoCMys\nxMyuM7MXzGzAzA6Y2VfGafc1MzuSbvNzM1uf7ywiIiIyPWH0EHwZ+AzBucevAa4GrjazKzMNzOxL\nwJXpdhcC/cBOM4uFkEdERESmEMY8BJuAu9PXRwc4bGYfJdjxZ1wFXOfu9wKY2ceBNuC9QPZFUERE\nRKQAwugh+DWw2czOBkhPX/oW4L707TXAKuAXmTu4ew/wKEExISIiIgUWRg/BN4Ba4FkzSxIUHX/t\n7j9Kb19FMLVpW8792tLbREREpMDCKAg+DHwU+AjwDHAe8E9mdsTdfzjJ/YygUBAREZECC6MguB74\nurv/OH37aTP7I+Aa4IcEVzszYCVjewlWAPsm+8G6LrrI9M3muugisvCEURBUceJ/+inS4xXc/aCZ\ntQKbgScBzKwWuAi4ebIfrOuii0zfbK6LLiILTxgFwb3AX5vZi8DTQAPQDHw3q82NwFfM7ABwCLgO\neAm4O4Q8IiIiMoUwCoIrCXbwNxMcBjgC/HN6HQDufr2ZVQG3Elwf/UHgMncfDiGPiIiITCHvBYG7\n9wP/K71M1u5a4Np8P76IiIjMnK5lICIiIioIRERERAWBiIiIoIJAREREUEEgIiIiqCAQERERVBCI\niIgIKghEREQEFQQiIiKCCgIRERFBBYGIiIiggkBE8sDMrjGzPWbWY2ZtZvYTM9uQ02aRmd1sZsfM\nrNfM7jKzFVFlFpGxVBCISD5cDHwLuAh4J1AO/MzMKrPa3Ai8C3g/cAlwGrC9wDlFZAJhXP5YRBYY\nd788+7aZfQJoBxqBh8ysFvgU8BF3fyDd5pPAfjO70N33FDiyiORQD4GIhGEJ4EBH+nYjwT8gv8g0\ncPfngMPApoKnE5ETqCAQkbwyMyM4PPCQuz+TXr0KGHb3npzmbeltIhIxHTIQkXy7BXgt8NZptDWC\nngQRiZgKAhHJGzP7NnA5cLG7H8na1ArEzKw2p5dgBUEvwYSam5upq6sbs66pqYmmpqY8pRaZP1pa\nWmhpaRmzrru7e1r3VUEgInmRLgbeA7zN3Q/nbH4MGAE2Az9Jt98AnAk8MtnP3bZtGw0NDfkPLDIP\njVcs7927l8bGxinvq4JARGbNzG4BmoCtQL+ZrUxv6nb3QXfvMbN/A24ws06gF7gJeFhnGIgUBxUE\nIpIPnyUYC/CrnPWfBH6Q/r4ZSAJ3AYuAHcAXCpRPRKaggkBEZs3dpzxjyd2HgC+mFxEpMjM+7dDM\nLjaze8zsZTNLmdnWcdp8zcyOmNmAmf3czNbnbK83s9vNrNvMOs3su2ZWPZtfRERERE7eycxDUA08\nTtDVd8LpQmb2JeBK4DPAhUA/sNPMYlnN7gA2EgwwehfBNKa3nkQWERERyYMZHzJw9x0Ex/4yE5Dk\nugq4zt3vTbf5OMFpRe8F7jSzjcAWoNHd96XbfBH4TzP7S3dvPanfRERERE5aXmcqNLM1BLOOZU9P\n2gM8yqvTk74J6MwUA2m7CHobLspnHhEREZmefE9dvIpgx5470Uj29KSrCC56MsrdkwRznmsKUxER\nkQgU6loG05meVFOYioiIRCTfpx22EuzYVzK2l2AFsC+rzYrsO5lZKVCPpjAVyZvZTGEqIgtPXgsC\ndz9oZq0EZw88CZC+DvpFwM3pZo8AS8zs/KxxBJsJColHJ/v5msJUZPpmM4WpiCw8My4I0vMFrCfY\ngQOsNbNzgQ53f5HgsqdfMbMDwCHgOuAl4G4Ad3/WzHYC3zGzzwEx4FtAi84wEBERicbJ9BBcAPyS\n4Hi/A99Mr/8+8Cl3v97MqgjmFVgCPAhc5u7DWT/jo8C3Cc4uSBFMZXrVSf0GIiIiMmsnMw/BA0wx\nGNHdrwWunWR7F3DFTB9bREREwlGoswxERESkiOniRgtYKpWir6+PQ4cO8eCDD1JZWRl1JADi8TgH\nDhygq6uLZDIZdRwRkQVBBcEClkgkePHFF7nnnnt48MEHGX8m6sJzd7q7u+nv72doaCjqOCIiC4IK\ngjnOzIjFYlRUVBCLxSgpmf5RIHdneHiYjo4OOjo6QkxZGGZGaWkplZWVVFRURB1HRGRO0RiCOc7M\nqKqqYvHixVRWVlJeXh51pMiUlJRQXl5OTU0NVVVVRdPjISIyF6ggmONKSkpYuXIlZ511FitXrqS6\nujrqSJGpqqqivr6eyspKSktLVRCIiMyACoI5rqSkhNraWpYvX87y5cupqqqKOlJkamtrOe200zjl\nlFNYvHixCgIRkRlQQTBPlJeXs2TJkqI5UyAKy5Yt4+yzz2b9+vWsWrVqRuMpREQWOn1izhM1NTWc\nc845nH766TMeXDjXZQYTrl69mte97nUsW7aMWCymHgIRkRlYOHuNea6uro43vOENrFu3jvr6+gU1\nuLCsrIza2lrWrFnD61//ehYvXhx1JBGROUenHc4TixcvZuPGjbzjHe9geHiY+++/nxdeeCHqWAWx\natUqNm3axKZNm1i/fj01NTVRRxIRmXNUEMwTixYtYsWKFZx33nm4O729vSSTSV5++WVGRkaijheK\nsrIyVq5cSUNDA1u2bOHcc89l+fLlUccSEZmTVBDMM2eccQa1tbW4O1VVVWzfvp2enp6oY4WioqKC\nTZs2cemll3L55ZdTX18fdSQRkTlLBcE8E4vFWLJkCQ0NDVRXV3P22Weze/duHnvsMY4fP87g4GDU\nEWelsrKSZcuW0djYyIUXXsjGjRvZsGEDS5cuJRaLRR1PRGTOUkEwD5WXl7Nu3TrWrVvH5Zdfzo9+\n9CNGRkY4ePAgXV1dUceblaVLl7Ju3To+9rGP8cEPfjDqOCKRM7Nxl5KSknHXi0xEBcEC8OY3v5nV\nq1fT399PIpGIOs6sxGIxqqurOeuss6KOIlJUMjv73GJgvMJAZDwqCBaAM888kzPPPDPqGDKPmdk1\nwPuA1wBx4NfAl9z9+aw2vwIuybqbA7e6++cLGHVemqiHILcYEJmM5iEQkXy4GPgWcBHwTqAc+JmZ\nZU+d6cC/AiuBVcCpwNUFzjnvZO/oJyoEsnsPRCaiHgIRmTV3vzz7tpl9AmgHGoGHsjYNuPvRAkab\n17J39NMZQ5B9H5Fc6iEQkTAsIegR6MhZ/zEzO2pmvzWzr+f0IMgsTOeQgYoBmYx6CEQkryzY69wI\nPOTuz2Rtuh34A3AEeANwPbAB+EDBQ84j0+kdyLQTmYwKAhHJt1uA1wJvyV7p7t/Nuvm0mbUCu8xs\njbsfLGTA+cLdSaVSpFIpkskkyWSSkZEREokEiUSC4eFhhoaGGBwcZGBggFgsRiwWo7y8nGQyGXX8\ncfX19TEwMEA8HmdoaIhEIkEymcTdo4427824IDCzi4H/TXBs8FTgve5+T3pbGfB3wGXAWqAb2AV8\n2d1fyfoZ9cC3gf8GpIDtwFXu3j+r30ZEImVm3wYuBy7Ofs9P4FHAgPXAhAVBc3MzdXV1Y9Y1NTXR\n1NQ0y7Rzn7uTTCYZHh4e3emXlpZiZrg7IyMjY7b19fXR3d1NZ2cnFRUVUccfV2dnJy+//DLt7e10\ndHTQ29tLPB6ft1Ow51tLSwstLS1j1nV3d0/rvifTQ1ANPA78O8GOPFsVcB7wt8CTQD1wE3A3cGFW\nuzsIRhpvBmLAfwC3AlecRB4RKQLpYuA9wNvc/fA07nI+wTiDSQuHbdu20dDQkIeE80/uTj+7GMgU\nCkNDQ8Tj8dFioKamhpqamqKd2bOnp4fW1lba29vp7OwcLQjm+hwqhTJesbx3714aGxunvO+MCwJ3\n3wHsgNFjhdnbeoAt2evM7ErgUTNb7e4vmdnGdJtGd9+XbvNF4D/N7C/dvXWmmUQkWmZ2C9AEbAX6\nzWxlelO3uw+a2Vrgo8B9wHHgXOAG4AF3fyqKzPNB5lBBIpFgcHBwTDGQWZfpGaisrKSyspKqqioq\nKyuL9hLpfX19dHR0jC49PT0MDg6qh6AACjGGIDPaODNn7puAzkwxkLYr3eYigt4EEZlbPkvwHv5V\nzvpPAj8AhgnmJ7iKoJfxReDHBIcY5SRl9xBMVAwsWrRo3KWsrDiHkMXjcXp6ekYXHTIonFBfEWa2\nCPgGcIe796VXryI4P3mUuyfNrCO9TUTmGHef9BRmd38JeHth0iwcqVSKkZGREw4TDA4OUl5ePrqU\nlZWNuV1eXk5JSXGedT48PEw8Hh8dWJj5qoIgfKEVBOkBhj8m+K9hOlOTWrqtiIhMQ6YIyO4ZKCkp\nobS0dPTrRN8X62mImd8jc5ZE5nuNIQhfKAVBVjFwBvCOrN4BgFZgRU77UoIBiG2T/VyNNhaZvtmM\nNpa5IXPIwMxGd5i5UxlPtK6YufvoaYaZ73XaYfjyXhBkFQNrgT9x986cJo8AS8zs/KxxBJsJegge\nnexna7SxyPTNZrSxFE5JSQnl5eVUVFRQXV1NXV0dS5cupa+vb/Qc/ImW7J1mhnaccrJOZh6CaoLz\nhjNl5lqykImeAAAKXklEQVQzO5dgitIjBKcinkcwx0B51mjjDndPuPuzZrYT+I6ZfY7gtMNvAS06\nw0BEFprS0lIWLVpEVVUVtbW11NfXMzAwQCKRwMxGTx3MLJnbqVRKO3/Jq5PpIbgA+CXB8X4Hvple\n/32C+QfenV7/eHp9ZmzAnwD/lV73UYKJiXYRTEx0F8HoYxGRBaW0tJRYLEZ1dTW1tbWj59y7O2Vl\nZfT39zMwMDD6dWBggFQqRSKRIJVKRR1f5pGTmYfgASa/KNKUQ1fdvQtNQiQiQklJCbFYjMrKSmpr\naxkeHiaZTGJmlJeXj55+lzkzIJVKjZ5mKJJPxXkiqojIApE5ZFBdXT36X392kdDR0UEsFqOkpAR3\nJ5FIEI/HVRBI3hXniagT2LFjR9QRThi1rQzKELViyFAIxfp7zjZX5pBB9hiCU045hVNPPZXTTz+d\nVatWsXz5cpYsWUJNTc3oLIPFOo/AXDRfX1szNadeUTt37ow6QlG8cJRBGYotQyEU6++Zj4IgM6hw\n8eLFLF26dExBsHLlSpYvX059fT2LFy8eLQjUQ5A/8/W1NVM6ZCAiEqGSkhLKysqoqKgYPQVx0aJF\nDA8PU1lZydDQEAMDA/T09FBZWUksFqOsrEwFgeTdnOohEBERkXCoIBAREZE5c8igAqC3t5e9e/dG\nGqS7u1sZlGHOZti/f3/m24rQAuVPBbyauRie6/HMNtfIyAiJRGL0a/YyNDREe3s7bW1ttLW1cfz4\ncXp7exkcHNQcBONIJpMMDAzQ2dlJVVUVZWVlJJNJ4vE4XV1dlJaWjnu/+fraypju+97mwkxXZvZR\n4Paoc4jMIx9z9zuiDjEZve9F8m7S9/1cKQiWAVuAQ8BgtGlE5rQK4I+Ane5+POIsk9L7XiRvpvW+\nnxMFgYiIiIRLgwpFREREBYGIiIioIBARERFUEIiIiAhzpCAwsy+Y2UEzi5vZbjN7Y4iPdY2Z7TGz\nHjNrM7OfmNmGnDaLzOxmMztmZr1mdpeZrQg5U8rMbihkBjM7zcx+mH6MATN7wswactp8zcyOpLf/\n3MzW5/HxS8zsOjN7If3zD5jZV8Zpl7cMZnaxmd1jZi+nn/OtM308M6s3s9vNrNvMOs3su2ZWnY8M\nZlZmZv9gZk+aWV+6zffN7NR8Zig2hfwMmGaer6b/NtnLMxHkmPXrtdCZzOx74zx394WZKf24xfrZ\nPlWmX+U8V0kzuyWMPEVfEJjZh4FvAl8FzgeeAHaa2fKQHvJi4FvARcA7gXLgZ2ZWmdXmRuBdwPuB\nS4DTgO1hhEl/8P05we+dLdQMZrYEeBgYIjj1ayPwF0BnVpsvAVcCnwEuBPoJ/jaxPMX4cvpnfx54\nDXA1cLWZXRlihmrgceALwAmn4Ezz8e4geL42E/yNLgFuzVOGKuA84G8J3g/vA84B7s5pN9sMRSOC\nz4DpegpYCaxKL2+NIEM+Xq8FzZT2U8Y+d00h5skoqs/2GWRy4F959fk6leCzMP/cvagXYDfwT1m3\nDXgJuLpAj78cSAFvTd+uJdhJvi+rzTnpNhfm+bFrgOeAdwC/BG4oVAbgG8ADU7Q5AjRn3a4F4sCH\n8pThXuA7OevuAn5QiAzp53PrTH5ngp1wCjg/q80WYARYlY8M47S5AEgCq8PIEPUS9WfABJm+CuyN\n+rmZ6rUS9nv0JDN9D/i/RfB8RfbZPt1M6XWjn/1hL0XdQ2Bm5UAj8IvMOg+eoV3ApgLFWEJQoXWk\nbzcSTPmcnek54HAImW4G7nX3+3PWX1CADO8GfmNmd6a7svaa2aczG81sDUG1mp2hB3g0jxl+DWw2\ns7PTj3ku8BbgvgJmGDXNx3sT0Onu+7LuuovgNXRRvjOlZV6jXRFmCEWRfAZM5Ox0t/jvzew2Mzsj\n4jxjFPr9MUNvT3+uPGtmt5jZ0ggyRPnZPt1MGR8zs6Nm9lsz+3pOD0LeFPu1DJYDpUBbzvo2gsot\nVGZmBF1ID7l75vjgKmA4/cbKzbQqj4/9EYKu4QvG2byyABnWAp8j6Kr9O4IdyU1mNujut6Ufxxn/\nb5OvDN8gqNqfNbMkwSGuv3b3H6W3FyJDtuk83iqgPXujuyfNrCOMTGa2iOB5usPd+6LIELJIPwMm\nsRv4BEEP3qnAtcB/mdnr3b0/wlzZCv3+mK6fEnTDHwTWAX8P3Gdmm9LFXuii/GyfYSYIpu/+A0Fv\nzxuA64ENwAfynaHYC4KJGBMfm8qnW4DXMr1jg3nLZGarCV4Yf+ruiZncNV8ZCHa+e9z9b9K3nzCz\n1xEUCbcVKMOHgY8CHwGeISiQ/snMjrj7DwuUYTqm83h5z2RmZcCP0z/389O5S74zRCjS38Xdd2bd\nfMrM9hB8aH+IoEu8mEX93N2ZdfNpM/st8Hvg7QTd44UQyWf7FDKZ3pK90t2/m3XzaTNrBXaZ2Rp3\nP5jPAEV9yAA4RnBsdGXO+hWcWPXmlZl9G7gceLu7H8na1ArEzKw2xEyNwCnAY2aWMLME8DbgKjMb\nTj/OopAzvALsz1m3Hzgz/X0rwRslzL/N9cDfu/uP3f1pd78d2AZcU8AM2abzeK3p26PMrBSoz2em\nrGLgDODPsnoHCpahQCL7DJgJd+8GngdCHcE/Q4V+f5yU9E7tGAV67iL+bJ9OplemaP4owd81789X\nURcE6f+OHyMYKQ2MdqtsJji+HIr0H+c9wJ+4++GczY8RDM7KzrSBYEf5SJ4i7AL+mOA/4nPTy28I\n/jPPfJ8IOcPDnNglew7Bf0GZN3FrToZagkML+frbVHFiZZ4i/botUIZR03y8R4AlZnZ+1l03E7yB\nH81HjqxiYC2w2d07c5qEnqFQovoMmCkzqyHo/p7qw7xgCv3+OFnpHtFlFOC5K4LP9plmGs/5BJ+L\n+X++CjFycZajLj9EMCr24wSnnt0KHAdOCenxbiE4te5igso6s1TktDlI0MXVSLDzfDDk52HMSNOw\nMxCMXRgi+G98HUHXfS/wkaw2V6f/Fu8mKGD+H/A7IJanDN8jGNBzOXAWwSl27cDXw8pAcMrUuQTF\nWAr4n+nbZ0z38QgGPf4GeCNB999zwA/zkYHgePrdBIXZH+e8RsvzlaGYlkJ/Bkwz0z8SnJZ2FvBm\n4OcE/0UuK3COWb9eC5kpve16gqLkLIKd728Ieh/Lw8qUzlV0n+1TZSIo+r8CNKSfr63AAeD+UPIU\n8sU7iyft8wSXQI0TVGoXhPhYKYIuytzl41ltFhGcO3qMYCf5Y2BFyM/B/YwtCELPQLAjfhIYAJ4G\nPjVOm2sJBrsMADuB9Xl8/GrghvQbtD/9Qfa3QFlYGQgOzYz3Gvj36T4ewUjh24Du9Jv9O0BVPjKk\nPxRyt2VuX5KvDMW2FPIzYJp5WghOfYwTFK13AGsiyDHr12shMxFchncHQc/FIPAC8M8UoLibIFOk\nn+1TZQJWA78Cjqb/fs8RDMKsCSOPLn8sIiIixT2GQERERApDBYGIiIioIBAREREVBCIiIoIKAhER\nEUEFgYiIiKCCQERERFBBICIiIqggEBEREVQQiIiICCoIREREBBUEIiIiAvx/SawLn9+R+wUAAAAA\nSUVORK5CYII=\n",
202 | "text/plain": [
203 | ""
204 | ]
205 | },
206 | "metadata": {},
207 | "output_type": "display_data"
208 | }
209 | ],
210 | "source": [
211 | "import matplotlib.pyplot as plt\n",
212 | "\n",
213 | "%matplotlib inline\n",
214 | "\n",
215 | "imgpath = 'English/Fnt/Sample001/img001-00001.png'\n",
216 | "img = cv2.imread(imgpath)\n",
217 | "rsz = convert(imgpath)\n",
218 | "\n",
219 | "plt.subplot(1, 2, 1)\n",
220 | "plt.imshow(img, cmap='gray')\n",
221 | "plt.subplot(1, 2, 2)\n",
222 | "plt.imshow(rsz, cmap='gray')"
223 | ]
224 | },
225 | {
226 | "cell_type": "markdown",
227 | "metadata": {},
228 | "source": [
229 | "## 预处理所有图片"
230 | ]
231 | },
232 | {
233 | "cell_type": "code",
234 | "execution_count": 6,
235 | "metadata": {
236 | "collapsed": false
237 | },
238 | "outputs": [
239 | {
240 | "name": "stderr",
241 | "output_type": "stream",
242 | "text": [
243 | "train/0/: 100%|██████████| 1016/1016 [00:00<00:00, 1150.59it/s]\n",
244 | "train/1/: 100%|██████████| 1016/1016 [00:00<00:00, 1232.67it/s]\n",
245 | "train/2/: 100%|██████████| 1016/1016 [00:00<00:00, 1201.22it/s]\n",
246 | "train/3/: 100%|██████████| 1016/1016 [00:00<00:00, 1186.00it/s]\n",
247 | "train/4/: 100%|██████████| 1016/1016 [00:00<00:00, 1176.58it/s]\n",
248 | "train/5/: 100%|██████████| 1016/1016 [00:01<00:00, 936.41it/s]\n",
249 | "train/6/: 100%|██████████| 1016/1016 [00:01<00:00, 897.20it/s]\n",
250 | "train/7/: 100%|██████████| 1016/1016 [00:01<00:00, 997.21it/s]\n",
251 | "train/8/: 100%|██████████| 1016/1016 [00:01<00:00, 973.86it/s]\n",
252 | "train/9/: 100%|██████████| 1016/1016 [00:01<00:00, 961.00it/s]\n",
253 | "train/10/: 100%|██████████| 52832/52832 [00:52<00:00, 999.49it/s] \n"
254 | ]
255 | }
256 | ],
257 | "source": [
258 | "rmdir('train')\n",
259 | "\n",
260 | "for i in range(11):\n",
261 | " path = 'English/Fnt/Sample%03d/' % (i+1)\n",
262 | " trainpath = 'train/%d/' % i\n",
263 | " mkdir(trainpath)\n",
264 | " for filename in tqdm(os.listdir(path), desc=trainpath):\n",
265 | " try:\n",
266 | " cv2.imwrite(trainpath + filename, convert(path + filename))\n",
267 | " except:\n",
268 | " pass"
269 | ]
270 | },
271 | {
272 | "cell_type": "markdown",
273 | "metadata": {},
274 | "source": [
275 | "# 分离出验证数据集\n",
276 | "\n",
277 | "http://scikit-learn.org/stable/modules/generated/sklearn.model_selection.train_test_split.html"
278 | ]
279 | },
280 | {
281 | "cell_type": "code",
282 | "execution_count": 11,
283 | "metadata": {
284 | "collapsed": false
285 | },
286 | "outputs": [],
287 | "source": [
288 | "from sklearn.model_selection import train_test_split\n",
289 | "for i in range(11):\n",
290 | " trainpath = 'train/%d/' % i\n",
291 | " validpath = 'valid/%d/' % i\n",
292 | " mkdir(validpath)\n",
293 | " imgs = os.listdir(trainpath)\n",
294 | " trainimgs, validimgs = train_test_split(imgs, test_size=0.1)\n",
295 | " for filename in validimgs:\n",
296 | " os.rename(trainpath+filename, validpath+filename)"
297 | ]
298 | },
299 | {
300 | "cell_type": "code",
301 | "execution_count": null,
302 | "metadata": {
303 | "collapsed": true
304 | },
305 | "outputs": [],
306 | "source": []
307 | }
308 | ],
309 | "metadata": {
310 | "kernelspec": {
311 | "display_name": "Python 2",
312 | "language": "python",
313 | "name": "python2"
314 | },
315 | "language_info": {
316 | "codemirror_mode": {
317 | "name": "ipython",
318 | "version": 2
319 | },
320 | "file_extension": ".py",
321 | "mimetype": "text/x-python",
322 | "name": "python",
323 | "nbconvert_exporter": "python",
324 | "pygments_lexer": "ipython2",
325 | "version": "2.7.12"
326 | }
327 | },
328 | "nbformat": 4,
329 | "nbformat_minor": 1
330 | }
331 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # 微信数字识别小程序
2 |
3 | 这是一个可以实现一个自动识别图片上的数字(仅支持白底黑字)的微信机器人。
4 |
5 | 个人号代码:[wechat_digit_recognition.py](wechat_digit_recognition.py)
6 |
7 | 公众号代码:[wx.py](wx.py)
8 |
9 | # 构建模型
10 |
11 | ## 数据集处理
12 |
13 | 代码:[Preprocessing dataset.ipynb](Preprocessing dataset.ipynb)
14 |
15 | 在线预览:[https://ypwhs.github.io/wechat_digit_recognition/Preprocessing dataset.html](https://ypwhs.github.io/wechat_digit_recognition/Preprocessing dataset.html)
16 |
17 | ## 训练模型
18 |
19 | 代码:[Training Model.ipynb](Training Model.ipynb)
20 |
21 | 在线预览:[https://ypwhs.github.io/wechat_digit_recognition/Training Model.html](https://ypwhs.github.io/wechat_digit_recognition/Training Model.html)
22 |
23 | # 服务端的配置
24 |
25 | ## 需要的库
26 |
27 | * [ItChat 1.1.11](https://github.com/littlecodersh/ItChat) (个人号需要)
28 | * [wechatpy 1.2.16](https://github.com/jxtech/wechatpy) (公众号需要)
29 | * [requests 2.1.11](https://github.com/kennethreitz/requests) (公众号需要)
30 | * [OpenCV 3.1.0](https://github.com/opencv/opencv)
31 | * [TensorFlow 0.10.0rc0](https://github.com/tensorflow/tensorflow/tree/v0.10.0rc0)
32 | * [Keras 1.1.0](https://github.com/fchollet/keras)
33 |
34 | OpenCV 建议用 brew 安装,如果你用 macOS。
35 |
36 | ```shell
37 | brew install opencv3 --HEAD
38 | ```
39 |
40 | ## 思路
41 |
42 | ### 粗提取数字
43 |
44 | 将图片转灰度,自适应二值化,提取轮廓,寻找最小矩形边界,判断是否满足预设条件,如宽、高,宽高比。
45 |
46 | 
47 |
48 | ```python
49 | img = cv2.imread(imgpath)
50 | gray = cv2.imread(imgpath, cv2.IMREAD_GRAYSCALE)
51 | bw = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY_INV, 25, 25)
52 | img2, ctrs, hier = cv2.findContours(bw.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
53 | rects = [cv2.boundingRect(ctr) for ctr in ctrs]
54 |
55 | for rect in rects:
56 | x, y, w, h = rect
57 | roi = gray[y:y + h, x:x + w]
58 | hw = float(h) / w
59 | if (w < 200) & (h < 200) & (h > 10) & (w > 10) & (1.1 < hw) & (hw < 5):
60 | res = resize(roi)
61 | ...
62 |
63 | ```
64 | * [基本操作](http://docs.opencv.org/3.1.0/d3/df2/tutorial_py_basic_ops.html)
65 | * [二值化](http://docs.opencv.org/3.1.0/d7/d4d/tutorial_py_thresholding.html)
66 | * [提取轮廓](http://docs.opencv.org/3.1.0/dd/d49/tutorial_py_contour_features.html)
67 |
68 | ### 缩放
69 |
70 | 将满足条件的图片缩放至最大边长为28的小图,然后将其放入一个28\*28的白色图像的中心位置。这样做的原因是神经网络只接受28\*28的数据。
71 |
72 | 
73 |
74 | ```python
75 | def resize(rawimg):
76 | fx = 28.0 / rawimg.shape[0]
77 | fy = 28.0 / rawimg.shape[1]
78 | fx = fy = min(fx, fy)
79 | img = cv2.resize(rawimg, None, fx=fx, fy=fy, interpolation=cv2.INTER_CUBIC)
80 | outimg = np.ones((28, 28), dtype=np.uint8) * 255
81 | w = img.shape[1]
82 | h = img.shape[0]
83 | x = (28 - w) / 2
84 | y = (28 - h) / 2
85 | outimg[y:y+h, x:x+w] = img
86 | return outimg
87 |
88 | ```
89 |
90 | ### 识别
91 |
92 | 将处理好的图片送入深度神经网络中运算,得到识别的结果。11类是因为0~9代表各个数字,10代表非数字。
93 |
94 | 
95 |
96 | 网络结构如下: 784->512->512->11
97 |
98 | ```
99 | ____________________________________________________________________________________________________
100 | Layer (type) Output Shape Param # Connected to
101 | ====================================================================================================
102 | dense_1 (Dense) (None, 512) 401920 dense_input_1[0][0]
103 | ____________________________________________________________________________________________________
104 | activation_1 (Activation) (None, 512) 0 dense_1[0][0]
105 | ____________________________________________________________________________________________________
106 | dropout_1 (Dropout) (None, 512) 0 activation_1[0][0]
107 | ____________________________________________________________________________________________________
108 | dense_2 (Dense) (None, 512) 262656 dropout_1[0][0]
109 | ____________________________________________________________________________________________________
110 | activation_2 (Activation) (None, 512) 0 dense_2[0][0]
111 | ____________________________________________________________________________________________________
112 | dropout_2 (Dropout) (None, 512) 0 activation_2[0][0]
113 | ____________________________________________________________________________________________________
114 | dense_3 (Dense) (None, 11) 5643 dropout_2[0][0]
115 | ____________________________________________________________________________________________________
116 | activation_3 (Activation) (None, 11) 0 dense_3[0][0]
117 | ====================================================================================================
118 | Total params: 670219
119 | ____________________________________________________________________________________________________
120 | ```
121 |
122 | 识别出来以后用方框标记出来,然后将识别好的数字打印在图上。
123 |
124 | ```python
125 | if (w < 200) & (h < 200) & (h > 10) & (w > 10) & (1.1 < hw) & (hw < 5):
126 | res = resize(roi)
127 | res = cv2.bitwise_not(res)
128 | res = np.resize(res, (1, 784))
129 |
130 | predictions = model.predict(res)
131 | predictions = np.argmax(predictions)
132 | if predictions != 10:
133 | cv2.rectangle(img, (x, y), (x+w, y+h), (0, 255, 0), 1)
134 | cv2.putText(img, '{:.0f}'.format(predictions), (x, y), cv2.FONT_HERSHEY_DUPLEX, h/25.0, (255, 0, 0))
135 |
136 | ```
137 |
138 | * [图像基本运算](http://docs.opencv.org/3.1.0/d0/d86/tutorial_py_image_arithmetics.html)
139 | * [绘图函数](http://docs.opencv.org/3.1.0/dc/da5/tutorial_py_drawing_functions.html)
140 | * [Keras model](https://keras.io/models/model/)
141 | * [Keras 中文版 模型介绍](http://keras-cn.readthedocs.io/en/latest/getting_started/sequential_model/)
142 |
143 |
144 | ### 个人号
145 |
146 | 收到任何人发过来的图片以后,程序自动下载图片,然后识别,保存标记识别好的数字的图片,发送给刚才发图片的人。
147 |
148 | ```python
149 | @itchat.msg_register([PICTURE])
150 | def download_files(msg):
151 | friend = itchat.search_friends(userName=msg['FromUserName'])
152 | print time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()), friend['NickName'], msg['Type']
153 | filename = msg['FileName']
154 | convertfilename = filename.replace('.', '.convert.')
155 | msg['Text'](filename) # download image
156 | if cv2.imread(filename) is not None:
157 | cv2.imwrite(convertfilename, convert(filename))
158 | itchat.send('@img@%s' % convertfilename, msg['FromUserName'])
159 | ```
160 |
161 | * [接收消息与文件](http://itchat.readthedocs.io/zh/latest/3.Handler/)
162 | * [回复](http://itchat.readthedocs.io/zh/latest/5.Reply/)
163 |
164 | 
165 |
166 | ### 公众号
167 |
168 | 首先需要配置 apache 支持 python cgi 应用,然后在公众号后台配置服务器,得到 token 和 EncodingAESKey。当有人发送消息时,会自动将消息 POST 到预设的地址(比如:[http://w.luckiestcat.com/wx.py](http://w.luckiestcat.com/wx.py)),我们通过一系列代码下载图片,然后识别保存识别后的图片到服务器上,然后发送给刚才发图片的人。
169 |
170 | ```python
171 | msg = parse_message(body_text)
172 | reply = ''
173 | if msg.type == 'text':
174 | reply = create_reply('Text:' + msg.content.encode('utf-8'), message=msg)
175 | elif msg.type == 'image':
176 | reply = create_reply('图片', message=msg)
177 | try:
178 | r = requests.get(msg.image) # download image
179 | filename = 'img/' + str(int(time.time())) + '.jpg';
180 | convertfilename = filename.replace('.', '.convert.')
181 | with open(filename, 'w') as f:
182 | f.write(r.content)
183 | if cv2.imread(filename) is not None:
184 | # load model
185 | with open('model.json', 'r') as f:
186 | model = model_from_json(f.read())
187 | model.load_weights('model.h5')
188 |
189 | cv2.imwrite(convertfilename, convert(filename))
190 | url = 'http://w.luckiestcat.com/' + convertfilename
191 | reply = ArticlesReply(message=msg, articles=[{
192 | 'title': u'识别成功',
193 | 'url': url,
194 | 'description': u'',
195 | 'image': url
196 | }])
197 | except:
198 | reply = create_reply('识别失败', message=msg)
199 |
200 | print reply
201 |
202 | ```
203 |
204 |
205 |
--------------------------------------------------------------------------------
/_config.yml:
--------------------------------------------------------------------------------
1 | theme: jekyll-theme-cayman
--------------------------------------------------------------------------------
/model.h5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ypwhs/wechat_digit_recognition/91d2ced1cb572891595f00eecefb00af2ac73398/model.h5
--------------------------------------------------------------------------------
/model.json:
--------------------------------------------------------------------------------
1 | {"class_name": "Sequential", "keras_version": "1.1.0", "config": [{"class_name": "Flatten", "config": {"batch_input_shape": [null, 28, 28, 1], "trainable": true, "name": "flatten_1", "input_dtype": "float32"}}, {"class_name": "Dense", "config": {"W_constraint": null, "b_constraint": null, "name": "dense_1", "activity_regularizer": null, "trainable": true, "init": "glorot_uniform", "bias": true, "input_dim": null, "b_regularizer": null, "W_regularizer": null, "activation": "linear", "output_dim": 512}}, {"class_name": "Activation", "config": {"activation": "relu", "trainable": true, "name": "activation_1"}}, {"class_name": "Dropout", "config": {"p": 0.2, "trainable": true, "name": "dropout_1"}}, {"class_name": "Dense", "config": {"W_constraint": null, "b_constraint": null, "name": "dense_2", "activity_regularizer": null, "trainable": true, "init": "glorot_uniform", "bias": true, "input_dim": null, "b_regularizer": null, "W_regularizer": null, "activation": "linear", "output_dim": 512}}, {"class_name": "Activation", "config": {"activation": "relu", "trainable": true, "name": "activation_2"}}, {"class_name": "Dropout", "config": {"p": 0.2, "trainable": true, "name": "dropout_2"}}, {"class_name": "Dense", "config": {"W_constraint": null, "b_constraint": null, "name": "dense_3", "activity_regularizer": null, "trainable": true, "init": "glorot_uniform", "bias": true, "input_dim": null, "b_regularizer": null, "W_regularizer": null, "activation": "linear", "output_dim": 11}}, {"class_name": "Activation", "config": {"activation": "softmax", "trainable": true, "name": "activation_3"}}]}
--------------------------------------------------------------------------------
/test.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ypwhs/wechat_digit_recognition/91d2ced1cb572891595f00eecefb00af2ac73398/test.png
--------------------------------------------------------------------------------
/wechat_digit_recognition.py:
--------------------------------------------------------------------------------
1 | # coding: utf-8
2 | import itchat
3 | from itchat.content import *
4 | import time
5 | import cv2
6 | import numpy as np
7 | from keras.models import model_from_json
8 |
9 | # load model
10 | with open('model.json', 'r') as f:
11 | model = model_from_json(f.read())
12 | model.load_weights('model.h5')
13 | model.summary()
14 |
15 |
16 | def resize(rawimg): # resize img to 28*28
17 | fx = 28.0 / rawimg.shape[0]
18 | fy = 28.0 / rawimg.shape[1]
19 | fx = fy = min(fx, fy)
20 | img = cv2.resize(rawimg, None, fx=fx, fy=fy, interpolation=cv2.INTER_CUBIC)
21 | outimg = np.ones((28, 28), dtype=np.uint8) * 255
22 | w = img.shape[1]
23 | h = img.shape[0]
24 | x = (28 - w) / 2
25 | y = (28 - h) / 2
26 | outimg[y:y+h, x:x+w] = img
27 | return outimg
28 |
29 |
30 | def convert(imgpath): # read digits
31 | img = cv2.imread(imgpath)
32 | gray = cv2.imread(imgpath, cv2.IMREAD_GRAYSCALE)
33 | bw = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY_INV, 25, 25)
34 | img2, ctrs, hier = cv2.findContours(bw.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
35 | rects = [cv2.boundingRect(ctr) for ctr in ctrs]
36 |
37 | for rect in rects:
38 | x, y, w, h = rect
39 | roi = gray[y:y+h, x:x+w]
40 | hw = float(h) / w
41 | if (w < 200) & (h < 200) & (h > 10) & (w > 10) & (1.1 < hw) & (hw < 5):
42 | res = resize(roi)
43 | res = np.resize(res, (1, 28, 28, 1))
44 |
45 | predictions = model.predict(res)
46 | predictions = np.argmax(predictions)
47 | if predictions != 10:
48 | cv2.rectangle(img, (x, y), (x+w, y+h), (0, 255, 0), 1)
49 | cv2.putText(img, '{:.0f}'.format(predictions), (x, y), cv2.FONT_HERSHEY_DUPLEX, h/25.0, (255, 0, 0))
50 | return img
51 |
52 |
53 | @itchat.msg_register([TEXT])
54 | def general_reply(msg):
55 | friend = itchat.search_friends(userName=msg['FromUserName'])
56 | print time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()), friend['NickName'], msg['Type'], msg['Text']
57 | # itchat.send('%s: %s' % (msg['Type'], msg['Text']), msg['FromUserName'])
58 |
59 |
60 | @itchat.msg_register([PICTURE])
61 | def download_files(msg):
62 | friend = itchat.search_friends(userName=msg['FromUserName'])
63 | print time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()), friend['NickName'], msg['Type']
64 | filename = msg['FileName']
65 | convertfilename = filename.replace('.', '.convert.')
66 | msg['Text'](filename) # download image
67 | if cv2.imread(filename) is not None:
68 | cv2.imwrite(convertfilename, convert(filename))
69 | itchat.send('@img@%s' % convertfilename, msg['FromUserName'])
70 |
71 | itchat.auto_login(hotReload=True)
72 | itchat.run()
73 |
--------------------------------------------------------------------------------
/wx.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/python
2 | # -*- coding: utf-8 -*-
3 | import os
4 | import sys
5 | import cgitb
6 | import time
7 | import requests
8 | import urlparse
9 | from wechatpy import parse_message
10 | from wechatpy.utils import check_signature
11 | from wechatpy.exceptions import InvalidSignatureException
12 | from wechatpy.replies import create_reply, ArticlesReply
13 |
14 | import time
15 | import cv2
16 | import numpy as np
17 | from keras.models import model_from_json
18 |
19 | token='your token'
20 | encoding_aes_key='your key'
21 |
22 |
23 | def resize(rawimg): # resize img to 28*28
24 | fx = 28.0 / rawimg.shape[0]
25 | fy = 28.0 / rawimg.shape[1]
26 | fx = fy = min(fx, fy)
27 | img = cv2.resize(rawimg, None, fx=fx, fy=fy, interpolation=cv2.INTER_CUBIC)
28 | outimg = np.ones((28, 28), dtype=np.uint8) * 255
29 | w = img.shape[1]
30 | h = img.shape[0]
31 | x = (28 - w) / 2
32 | y = (28 - h) / 2
33 | outimg[y:y+h, x:x+w] = img
34 | return outimg
35 |
36 |
37 | def convert(imgpath): # read digits
38 | img = cv2.imread(imgpath)
39 | gray = cv2.imread(imgpath, cv2.IMREAD_GRAYSCALE)
40 | bw = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY_INV, 25, 25)
41 | img2, ctrs, hier = cv2.findContours(bw.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
42 | rects = [cv2.boundingRect(ctr) for ctr in ctrs]
43 |
44 | for rect in rects:
45 | x, y, w, h = rect
46 | roi = gray[y:y+h, x:x+w]
47 | hw = float(h) / w
48 | if (w < 200) & (h < 200) & (h > 10) & (w > 10) & (1.1 < hw) & (hw < 5):
49 | res = resize(roi)
50 | res = np.resize(res, (1, 28, 28, 1))
51 |
52 | predictions = model.predict(res)
53 | predictions = np.argmax(predictions)
54 | if predictions != 10:
55 | cv2.rectangle(img, (x, y), (x+w, y+h), (0, 255, 0), 1)
56 | cv2.putText(img, '{:.0f}'.format(predictions), (x, y), cv2.FONT_HERSHEY_DUPLEX, h/25.0, (255, 0, 0))
57 | return img
58 |
59 |
60 | # 启用调试
61 | cgitb.enable()
62 |
63 | # 获取 POST 内容
64 | body_text = sys.stdin.read()
65 |
66 | # 如果含参数,解析各个参数
67 | print "Content-Type: text/html"
68 | print ""
69 |
70 | # 获取 URL 参数
71 | query_string = os.environ.get("QUERY_STRING")
72 |
73 | if query_string == '':
74 | print '本页面仅允许微信访问'
75 | sys.exit(0)
76 |
77 | try:
78 | arguments = urlparse.parse_qs(query_string)
79 | signature = arguments['signature'][0]
80 | timestamp = arguments['timestamp'][0]
81 | nonce = arguments['nonce'][0]
82 | except:
83 | print 'arguments error'
84 | sys.exit(0)
85 |
86 | # 校验时间戳。5 分钟以前的 timestamp 自动拒绝
87 | current_timestamp = int(time.time())
88 |
89 | if (current_timestamp - int(timestamp)) > 300:
90 | print 'Incorrect timestamp'
91 | sys.exit(0)
92 |
93 | # 接口检测部分
94 | try:
95 | check_signature(token, signature, timestamp, nonce)
96 | except InvalidSignatureException:
97 | print 'error'
98 | sys.exit(0)
99 |
100 | if 'echostr' in arguments:
101 | echostr = arguments['echostr'][0]
102 | print echostr
103 | sys.exit(0)
104 |
105 | msg = parse_message(body_text)
106 | reply = ''
107 | if msg.type == 'text':
108 | reply = create_reply('Text:' + msg.content.encode('utf-8'), message=msg)
109 | elif msg.type == 'image':
110 | reply = create_reply('图片', message=msg)
111 | try:
112 | r = requests.get(msg.image) # download image
113 | filename = 'img/' + str(int(time.time())) + '.jpg';
114 | convertfilename = filename.replace('.', '.convert.')
115 | with open(filename, 'w') as f:
116 | f.write(r.content)
117 | if cv2.imread(filename) is not None:
118 | # load model
119 | with open('model.json', 'r') as f:
120 | model = model_from_json(f.read())
121 | model.load_weights('model.h5')
122 |
123 | cv2.imwrite(convertfilename, convert(filename))
124 | url = 'http://w.luckiestcat.com/' + convertfilename
125 | reply = ArticlesReply(message=msg, articles=[{
126 | 'title': u'识别成功',
127 | 'url': url,
128 | 'description': u'',
129 | 'image': url
130 | }])
131 | except:
132 | reply = create_reply('识别失败', message=msg)
133 |
134 | print reply
135 |
--------------------------------------------------------------------------------