├── .gitignore ├── Preprocessing dataset.html ├── Preprocessing dataset.ipynb ├── README.md ├── Training Model.html ├── Training Model.ipynb ├── _config.yml ├── model.h5 ├── model.json ├── test.png ├── wechat_digit_recognition.py └── wx.py /.gitignore: -------------------------------------------------------------------------------- 1 | .idea 2 | *.png 3 | *.jpg 4 | itchat.pkl 5 | *.tgz 6 | English 7 | train 8 | valid 9 | -------------------------------------------------------------------------------- /Preprocessing dataset.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# 下载数据集\n", 8 | "\n", 9 | "数据集来自 http://www.ee.surrey.ac.uk/CVSSP/demos/chars74k/\n", 10 | "\n", 11 | "我们下载的是**EnglishFnt.tgz**,是印刷体数字加大小写字母。\n", 12 | "\n", 13 | "* [tqdm](https://github.com/tqdm/tqdm) 是一个进度条的库。\n", 14 | "* [requests](http://docs.python-requests.org/en/master/) 是一个对人类友好的 HTTP 库。" 15 | ] 16 | }, 17 | { 18 | "cell_type": "code", 19 | "execution_count": 1, 20 | "metadata": { 21 | "collapsed": false 22 | }, 23 | "outputs": [ 24 | { 25 | "name": "stderr", 26 | "output_type": "stream", 27 | "text": [ 28 | "48651KB [01:38, 492.01KB/s] \n" 29 | ] 30 | } 31 | ], 32 | "source": [ 33 | "import requests\n", 34 | "from tqdm import tqdm\n", 35 | "import os\n", 36 | "\n", 37 | "fileurl = 'http://www.ee.surrey.ac.uk/CVSSP/demos/chars74k/EnglishFnt.tgz'\n", 38 | "filename = 'EnglishFnt.tgz'\n", 39 | "if not os.path.exists(filename):\n", 40 | " r = requests.get(fileurl, stream=True)\n", 41 | " with open(filename, 'wb') as f:\n", 42 | " for chunk in tqdm(r.iter_content(1024), unit='KB', total=int(r.headers['Content-Length'])/1024): \n", 43 | " f.write(chunk)" 44 | ] 45 | }, 46 | { 47 | "cell_type": "markdown", 48 | "metadata": {}, 49 | "source": [ 50 | "# 解压数据集\n", 51 | "\n", 52 | "* [tarfile](https://docs.python.org/2/library/tarfile.html) 是 Python 自带的操作 tar 文件的库。\n", 53 | "* [shutil](https://docs.python.org/2/library/shutil.html) 是 Python 自带的高级文件操作库。\n" 54 | ] 55 | }, 56 | { 57 | "cell_type": "code", 58 | "execution_count": 2, 59 | "metadata": { 60 | "collapsed": false 61 | }, 62 | "outputs": [ 63 | { 64 | "name": "stdout", 65 | "output_type": "stream", 66 | "text": [ 67 | "loading\n" 68 | ] 69 | }, 70 | { 71 | "name": "stderr", 72 | "output_type": "stream", 73 | "text": [ 74 | "100%|██████████| 63055/63055 [00:11<00:00, 5350.48it/s]\n" 75 | ] 76 | } 77 | ], 78 | "source": [ 79 | "import tarfile\n", 80 | "import shutil\n", 81 | "\n", 82 | "def mkdir(path):\n", 83 | " if not os.path.exists(path):\n", 84 | " os.makedirs(path)\n", 85 | "\n", 86 | "def rmdir(path):\n", 87 | " if os.path.exists(path):\n", 88 | " shutil.rmtree(path)\n", 89 | "\n", 90 | "with tarfile.open(filename, 'r') as tfile:\n", 91 | " print 'loading'\n", 92 | " members = tfile.getmembers()\n", 93 | " for member in tqdm(members):\n", 94 | " if tarfile.TarInfo.isdir(member):\n", 95 | " mkdir(member.name)\n", 96 | " continue\n", 97 | " with open(member.name, 'wb') as f:\n", 98 | " f.write(tfile.extractfile(member).read())" 99 | ] 100 | }, 101 | { 102 | "cell_type": "markdown", 103 | "metadata": {}, 104 | "source": [ 105 | "# 分类数据集\n", 106 | "\n", 107 | "数据集有数字和大小写字母,但是我们只需要0123456789和非数字。\n", 108 | "\n", 109 | "因此将 A~Z,a~z 的图片移到 A 的文件夹,再将其他空文件夹删除。" 110 | ] 111 | }, 112 | { 113 | "cell_type": "code", 114 | "execution_count": 3, 115 | "metadata": { 116 | "collapsed": false 117 | }, 118 | "outputs": [ 119 | { 120 | "name": "stderr", 121 | "output_type": "stream", 122 | "text": [ 123 | "100%|██████████| 51/51 [00:03<00:00, 12.79it/s]\n" 124 | ] 125 | } 126 | ], 127 | "source": [ 128 | "notnumdir = 'English/Fnt/Sample011/'\n", 129 | "for i in tqdm(range(12, 63)):\n", 130 | " path = 'English/Fnt/Sample%03d/' % i\n", 131 | " for filename in os.listdir(path):\n", 132 | " os.rename(path+filename, notnumdir+filename)\n", 133 | " os.rmdir(path)" 134 | ] 135 | }, 136 | { 137 | "cell_type": "markdown", 138 | "metadata": {}, 139 | "source": [ 140 | "# 预处理数据集\n", 141 | "\n", 142 | "我们需要将这里的图片裁切为28*28,以便于输入到神经网络中。\n", 143 | "\n", 144 | "## 首先测试一张图" 145 | ] 146 | }, 147 | { 148 | "cell_type": "code", 149 | "execution_count": 4, 150 | "metadata": { 151 | "collapsed": false 152 | }, 153 | "outputs": [], 154 | "source": [ 155 | "import cv2\n", 156 | "import numpy as np\n", 157 | "\n", 158 | "def resize(rawimg): # resize img to 28*28\n", 159 | " fx = 28.0 / rawimg.shape[0]\n", 160 | " fy = 28.0 / rawimg.shape[1]\n", 161 | " fx = fy = min(fx, fy)\n", 162 | " img = cv2.resize(rawimg, None, fx=fx, fy=fy, interpolation=cv2.INTER_CUBIC)\n", 163 | " outimg = np.ones((28, 28), dtype=np.uint8) * 255\n", 164 | " w = img.shape[1]\n", 165 | " h = img.shape[0]\n", 166 | " x = (28 - w) / 2\n", 167 | " y = (28 - h) / 2\n", 168 | " outimg[y:y+h, x:x+w] = img\n", 169 | " return outimg\n", 170 | "\n", 171 | "def convert(imgpath):\n", 172 | " img = cv2.imread(imgpath)\n", 173 | " gray = cv2.imread(imgpath, cv2.IMREAD_GRAYSCALE)\n", 174 | " bw = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY_INV, 25, 25)\n", 175 | " img2, ctrs, hier = cv2.findContours(bw.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)\n", 176 | " rects = [cv2.boundingRect(ctr) for ctr in ctrs]\n", 177 | " x, y, w, h = rects[-1]\n", 178 | " roi = gray[y:y+h, x:x+w]\n", 179 | " return resize(roi)" 180 | ] 181 | }, 182 | { 183 | "cell_type": "code", 184 | "execution_count": 5, 185 | "metadata": { 186 | "collapsed": false 187 | }, 188 | "outputs": [ 189 | { 190 | "data": { 191 | "text/plain": [ 192 | "" 193 | ] 194 | }, 195 | "execution_count": 5, 196 | "metadata": {}, 197 | "output_type": "execute_result" 198 | }, 199 | { 200 | "data": { 201 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAgQAAAEBCAYAAAAHJ724AAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAAPYQAAD2EBqD+naQAAIABJREFUeJzt3X90XHWd//HnOz+m+dWkaUtboMD2B8WqK5AgWBV0rbsF\n/Fr1688ox6Me9+svPHzz3f2i7LpfWTnruuyRsijssrrrqkA8SL9+gbPYakVZQEqPtIBAASutBUqS\ntvmdTJLJzPv7x50Jk2l+NnPnTpLX45x7krn3M5lXJjNz3/ncz/1cc3dERERkYSuJOoCIiIhETwWB\niIiIqCAQERERFQQiIiKCCgIRERFBBYGIiIiggkBERERQQSAiIiKoIBARERFUEIiIiAgRFwRm9gUz\nO2hmcTPbbWZvjDKPiIjIQhVZQWBmHwa+CXwVOB94AthpZsujyiQiIrJQWVQXNzKz3cCj7n5V+rYB\nLwI3ufv1kYQSERFZoCLpITCzcqAR+EVmnQeVyS5gUxSZRCR8OkwoUrzKInrc5UAp0Jazvg04J7ex\nmS0DtgCHgMGww4nMYxXAHwE73f14IR846zDh/wD2AM0Ehwk3uPuxcdrrfS+SH9N630dVEEzEgPGO\nYWwBbi9wFpH57GPAHQV+zGbgVnf/AYCZfRZ4F/ApYLzDhHrfi+TXpO/7qAqCY0ASWJmzfgUn9hpA\n8B8CtbW1nHfeeWM2bNmyhUsvvTSEiONrbm5m27ZtBXs8ZVCGk82wY8cOdu7cOWZdb28v+/btg/R7\nqlCyDhN+PbPO3d3MJjtMeAjgtttuY+PGjUXxXI9ntrlGRkZIJBKjX7OXoaEh2tvbaWtro62tjfb2\n9tHb7e3tDA6q4yRbTU0Np59++gnL6tWrOf300yktLR33fvP1tZWxf/9+rrjiCpjifR9JQeDuCTN7\nDNgM3AOjgwo3AzeNc5dBgPPOO48HHnigYDnHU1dXR0NDgzIoQ9FnaGho4K/+6q/GrNu7dy+NjY1Q\n+C74GR0mTBsE2LhxIw0NDUXxXI9ntrkSiQTDw8OjX7OXeDzO4sWLWbRoEWZGMpkkHo/T3d1NSYmm\nkclVWlpKVVUV9fX1rFy5kjPPPJO1a9eOLhMVBPP1tTWOSd/3UR4yuAH4frowyBxPrAL+I8JMIlJY\nEx0mXDBSqRQjIyMMDQ0Rj8cZHBxkcHCQeDxOf38/XV1d9PT0MDAwwODgIMPDwySTSaI6Q0zmr8gK\nAne/Mz3nwNcIDh08Dmxx96NRZRKR0Mz0MOGo5uZm6urq2LNnD1u3bgWgqamJpqamUIIWWjKZZHh4\nmP7+fvr6+sYsvb29tLe3c+zYMTo7O+nt7SUej5NIJFQQyLhaWlpoaWkZs667u3ta9410UKG73wLc\nEmUGEQnfSRwmHLVt2zYaGhrYunUr99xzT/hhCyyZTDI0NMTAwAA9PT10d3fT1dVFd3c3nZ2ddHR0\n0NHRQVdXF319faO9BKlUKuroUoTGK5azDhVOqtjOMpjUli1boo5QFP+VKIMyFFuGaZrVYcJi/T1n\nmyuVSjE8PMzAwAC9vb10dHRw/Pjx0SVTJPT09KiHICTz9bU1U5HNVDgTZtYAPPbYY48V5cAPkbki\n6z+FRnffW+jHN7PPA1fz6mHCL7r7byZouyDe9x0dHaNnEeQuR48eZWBggP7+fvr7+0/4fmRkJOr4\nRaWurm50AOGaNWvGDCicbFDhfDfd9/2c6iEQkblNhwlPlBlDkDlk0NHRQXt7O6+88gptbW0MDQ2d\nsCQSCR0ykLxTQSAiEqHcMQSdnZ0cPXqUV155hVdeeYWRkRGSyeTokrk9F3p3ZW5RQSAiEqFUKkUi\nkSAej9PX10dXVxfHjx+nvb2d1tbWGf2sYJzm+LdztxW7TMGT+1XCo4JARGSOMjNKS0spKSmhpKRk\n9Pvxvua2K9YCIZlMnjBjY2bRmIlwqSAQEZmjMgVBeXk5ZWVllJeXT/v7Yp3pMDNDY2YZGBggHo/j\n7ioIQqaCQERkjiopKRndyS9atGjKJRaLjX5fVlacH/+Dg4P09PSMLqWlpbg7iURC124IWXG+IkRE\nZEqZHoJYLEZFRQVVVVVUVlaOu+RuKy8vjzr+uPr7+zl+/DgVFRWjRUtmjIWESwWBiMgclekhyC4I\nqqurqampobq6esz3uetisVjU8cfV09NDRUXFaMGSSCQYGBgo2h6N+UTPsIjIHGVmJxQEixcvZvHi\nxdTW1o5+zSzZ6ysqKqKOP67Ozs7RCYSy52dQQRA+PcMiInPUeIcMampqqKurY8mSJScs2esrKyuj\njj+uY8eOjQ4gzBQD2T0GEh4VBCIic5SZjZ5KWFZWNjrAMBaLjQ4gzD2UkOklqKqqijr+uIaHh6mp\nqaGqqoqKigpisRhlZWVFe5rkfFKc552IiMi0uPsJSyqVGne9yGRUEIiIzAMqCmS2VBCIiMxR2dP6\njlcQ5BYF2fcRyaWCQERkDsvewU9UCKgYkOnQoMIF4MUXX+TFF1+kv7+fRCIRdZxZKS8vp6amhjPO\nOIPVq1dHHUekKOTu9CfrIRCZiAqCBeDhhx/m9ttv5+DBg3R1dUUdZ1aWLl3K2rVrueKKK/jABz4Q\ndRyRojHeYYOJxhKIjEcFwTyUSCQ4fPgwzz//PE8++SSPPPII+/bt4/jx43N+LvCOjg66urowMw4c\nOMBrXvMaNmzYwNlnn63zlGVBmu74ARUCMhUVBPNMIpGgu7ubffv2cd9997F9+3Z6enqijpU38Xh8\n9BDIrl27uPTSS7nssstYtmwZ9fX1RTsdq0gxUFEgk1FBMM8cPnyYxx9/nO3bt7N7924GBgaijhSa\nwcFBdu/ezcjICGbGm970JjZu3Bh1LBGROSnvBYGZXQO8D3gNEAd+DXzJ3Z/ParMIuAH4MLAI2Al8\n3t3b851noRgaGqK7u5snnniCnTt3smfPHg4dOhR1rFCNjIzw0ksvYWbEYjEqKytZsWIFixcvVk+B\niMgMhXHa4cXAt4CLgHcC5cDPzCx74uwbgXcB7wcuAU4DtoeQZcHo7e1l//793H///dx7770cOXIk\n6kgF09bWxq5du3jkkUc4cOAA/f39UUcSEZlz8t5D4O6XZ982s08A7UAj8JCZ1QKfAj7i7g+k23wS\n2G9mF7r7nnxnWgh6enr47W9/y4EDB+jo6GBkZCTqSAWTSCTo6enhhRde4KmnnmLVqlXU19dHHUtE\nZE4pxMRESwAHOtK3GwkKkV9kGrj7c8BhYFMB8sxLvb29PPvssxw5coTh4WFSqVTUkQomc2W0l19+\nmaeffpqOjg4SiYQGUImIzECoBYEFl6e6EXjI3Z9Jr14FDLt77tD3tvQ2OQmJRIKurq55PYhwKseO\nHeN3v/sdv/vd72htbV1QRZGIyGyF3UNwC/BaoGkabY2gJ0FmIJVK0dvby/Hjxzl27NiCLgh6eno4\ncuQIR48epaenRz0EIiIzENpph2b2beBy4GJ3zx7h1grEzKw2p5dgBUEvwYSam5upq6sbs66pqYmm\npunUG/NTKpWitbWVQ4cO0dbWtqAH1A0MDNDZ2Uk8HieZTC74gqClpYWWlpYx67q7uyNKIyLFLpSC\nIF0MvAd4m7sfztn8GDACbAZ+km6/ATgTeGSyn7tt2zYaGhryH3gOc3cGBgbo7e0lHo/P+WsVzEYq\nlWJ4eJi+vj4GBgYWfEEwXrG8d+9eGhsbI0okIsUsjHkIbiE4RLAV6DezlelN3e4+6O49ZvZvwA1m\n1gn0AjcBD+sMg5lzd4aHhxkcHJzxYEIzo7y8nOrqapYsWUIw5CN67k5PTw/9/f0MDQ1Ne8fu7iST\nSeLx+Jyfonm+MbOvAl/NWf2su782ijwicqIwegg+SzAW4Fc56z8J/CD9fTOQBO4imJhoB/CFELLI\nJMrLyznjjDN461vfygc/+EEqKyunvlMBxONxfvrTn/Lwww/z7LPPauc+fzxF0DOYqTwXzrmxInNA\nGPMQTDlQ0d2HgC+mF4lISUkJNTU1rFmzhre97W3U1NREHQmAvr4+Dhw4wDPPPENpaWnUcSR/Rtz9\naNQhRGR8hZiHQEQE4Gwze9nMfm9mt5nZGVEHEpFXqSAQkULYDXwC2EJwWHEN8F9mVh1lKBF5la52\nKCKhc/edWTefMrM9wB+ADwHfiyaViGRTQSAiBefu3Wb2PLB+qraaf0Rk+mYz/4gKAhEpODOrAdbx\n6plHE9L8IyLTN5v5RzSGQERCZ2b/aGaXmNlZZvZmgknJRoCWKe4qIgWiHgIRKYTVwB3AMuAo8BDw\nJnc/HmkqERmlgkBEQufuOuAvUuR0yEBERERUEIiIiIgKAhEREUEFgYiIiKCCQERERFBBICIiIqgg\nEBEREVQQiIiICCoIREREBBUEIiIiggoCERERQQWBiIiIoIJAREREUEEgIiIiFKAgMLNrzCxlZjdk\nrVtkZjeb2TEz6zWzu8xsRdhZREREZHyhFgRm9kbgz4EncjbdCLwLeD9wCXAasD3MLCIiIjKx0AoC\nM6sBbgM+DXRlra8FPgU0u/sD7r4P+CTwFjO7MKw8IiIiMrEwewhuBu519/tz1l8AlAG/yKxw9+eA\nw8CmEPOIiIjIBMrC+KFm9hHgPIKdf66VwLC79+SsbwNWhZFHREREJpf3gsDMVhOMEfhTd0/M5K6A\n5zuPiIiITC2MHoJG4BTgMTOz9LpS4BIzuxK4FFhkZrU5vQQrCHoJJtTc3ExdXd2YdU1NTTQ1NeUt\nvMh80dLSQktLy5h13d3dEaURkWIXRkGwC/jjnHX/AewHvgG8DCSAzcBPAMxsA3Am8MhkP3jbtm00\nNDTkOa7I/DResbx3714aGxsjSiQixSzvBYG79wPPZK8zs37guLvvT9/+N+AGM+sEeoGbgIfdfU++\n84iIiMjUQhlUOI7csQHNQBK4C1gE7AC+UKAsIiIikqMgBYG7vyPn9hDwxfQiIiIiEdO1DEREREQF\ngYjMnpldbGb3mNnL6WuXbB2nzdfM7IiZDZjZz81sfRRZRWR8KghEJB+qgccJxgKdMJ+ImX0JuBL4\nDHAh0A/sNLNYIUOKyMQKNahQROYxd99BMDiYrPlHsl0FXOfu96bbfJxg3pH3AncWKqeITEw9BCIS\nKjNbQzAtefb1S3qAR9H1S0SKhgoCEQnbKoLDCLkzker6JSJFRAWBiERF1y8RKSIaQyAiYWsl2Pmv\nZGwvwQpg31R31jVMRKZvNtcwUUEgIqFy94Nm1kpw/ZInAcysFrgIuHmq++saJiLTN5trmKggEJFZ\nM7NqYD1BTwDAWjM7F+hw9xcJLon+FTM7ABwCrgNeAu6OIK6IjEMFgYjkwwXALwnGBDjwzfT67wOf\ncvfrzawKuBVYAjwIXObuw1GEFZETqSAQkVlz9weYYpCyu18LXFuIPCIyczrLQERERFQQiIiIiAoC\nERERQQWBiIiIoIJAREREUEEgIiIiqCAQERERVBCIiIgIKghEREQEFQQiIiJCSAWBmZ1mZj80s2Nm\nNmBmT5hZQ06br5nZkfT2n5vZ+jCyiIiIyNTyXhCY2RLgYWAI2AJsBP4C6Mxq8yXgSuAzwIVAP7DT\nzGL5ziMiIiJTC+PiRl8GDrv7p7PW/SGnzVXAde5+L4CZfRxoA94L3BlCJhEREZlEGIcM3g38xszu\nNLM2M9trZqPFgZmtAVYBv8isc/ce4FFgUwh5REREZAphFARrgc8BzwF/BvwLcJOZXZHevorgeult\nOfdrS28TERGRAgvjkEEJsMfd/yZ9+wkzex1BkXDbJPczgkJBRERECiyMguAVYH/Ouv3Af09/30qw\n81/J2F6CFcC+yX5wc3MzdXV1Y9Y1NTXR1NQ0m7wi81JLSwstLS1j1nV3d0eURkSKXRgFwcPAOTnr\nziE9sNDdD5pZK7AZeBLAzGqBi4CbJ/vB27Zto6GhYbImIpI2XrG8d+9eGhsbI0okIsUsjIJgG/Cw\nmV1DcMbARcCngT/PanMj8BUzOwAcAq4DXgLuDiGPiIiITCHvBYG7/8bM3gd8A/gb4CBwlbv/KKvN\n9WZWBdwKLAEeBC5z9+F85xEREZGphdFDgLvfB9w3RZtrgWvDeHwRERGZGV3LQERERFQQiIiIiAoC\nERERQQWBiIiIoIJAREREUEEgIiIiqCAQkTwws4vN7B4ze9nMUma2NWf799Lrs5dJT00WkcJSQSAi\n+VANPA58gYkvUvZTgmuYrEovugiJSBEJZWIiEVlY3H0HsAPAzGyCZkPufrRwqURkJtRDICKF8nYz\nazOzZ83sFjNbGnUgEXmVeghEpBB+CmwnuLbJOuDvgfvMbJO7T3SIQUQKSAWBiITO3e/Muvm0mf0W\n+D3wduCXkYQSkTFUEIhIwbn7QTM7BqxnioKgubmZurq6MeuamppoatKYRJFcLS0ttLS0jFnX3d09\nrfuqIBCRgjOz1cAy4JWp2m7bto2GhobwQ4nMA+MVy3v37qWxsXHK+6ogEJFZM7Nqgv/2M2cYrDWz\nc4GO9PJVgjEErel2/wA8D+wsfFoRGY8KAhHJhwsIuv49vXwzvf77wOeBNwAfB5YARwgKgf/j7onC\nRxWR8aggEJFZc/cHmPw05ksLlUVETo7mIRAREREVBCIiIqKCQERERFBBICIiIqggEBEREUIoCMys\nxMyuM7MXzGzAzA6Y2VfGafc1MzuSbvNzM1uf7ywiIiIyPWH0EHwZ+AzBucevAa4GrjazKzMNzOxL\nwJXpdhcC/cBOM4uFkEdERESmEMY8BJuAu9PXRwc4bGYfJdjxZ1wFXOfu9wKY2ceBNuC9QPZFUERE\nRKQAwugh+DWw2czOBkhPX/oW4L707TXAKuAXmTu4ew/wKEExISIiIgUWRg/BN4Ba4FkzSxIUHX/t\n7j9Kb19FMLVpW8792tLbREREpMDCKAg+DHwU+AjwDHAe8E9mdsTdfzjJ/YygUBAREZECC6MguB74\nurv/OH37aTP7I+Aa4IcEVzszYCVjewlWAPsm+8G6LrrI9M3muugisvCEURBUceJ/+inS4xXc/aCZ\ntQKbgScBzKwWuAi4ebIfrOuii0zfbK6LLiILTxgFwb3AX5vZi8DTQAPQDHw3q82NwFfM7ABwCLgO\neAm4O4Q8IiIiMoUwCoIrCXbwNxMcBjgC/HN6HQDufr2ZVQG3Elwf/UHgMncfDiGPiIiITCHvBYG7\n9wP/K71M1u5a4Np8P76IiIjMnK5lICIiIioIRERERAWBiIiIoIJAREREUEEgIiIiqCAQERERVBCI\niIgIKghEREQEFQQiIiKCCgIRERFBBYGIiIiggkBE8sDMrjGzPWbWY2ZtZvYTM9uQ02aRmd1sZsfM\nrNfM7jKzFVFlFpGxVBCISD5cDHwLuAh4J1AO/MzMKrPa3Ai8C3g/cAlwGrC9wDlFZAJhXP5YRBYY\nd788+7aZfQJoBxqBh8ysFvgU8BF3fyDd5pPAfjO70N33FDiyiORQD4GIhGEJ4EBH+nYjwT8gv8g0\ncPfngMPApoKnE5ETqCAQkbwyMyM4PPCQuz+TXr0KGHb3npzmbeltIhIxHTIQkXy7BXgt8NZptDWC\nngQRiZgKAhHJGzP7NnA5cLG7H8na1ArEzKw2p5dgBUEvwYSam5upq6sbs66pqYmmpqY8pRaZP1pa\nWmhpaRmzrru7e1r3VUEgInmRLgbeA7zN3Q/nbH4MGAE2Az9Jt98AnAk8MtnP3bZtGw0NDfkPLDIP\njVcs7927l8bGxinvq4JARGbNzG4BmoCtQL+ZrUxv6nb3QXfvMbN/A24ws06gF7gJeFhnGIgUBxUE\nIpIPnyUYC/CrnPWfBH6Q/r4ZSAJ3AYuAHcAXCpRPRKaggkBEZs3dpzxjyd2HgC+mFxEpMjM+7dDM\nLjaze8zsZTNLmdnWcdp8zcyOmNmAmf3czNbnbK83s9vNrNvMOs3su2ZWPZtfRERERE7eycxDUA08\nTtDVd8LpQmb2JeBK4DPAhUA/sNPMYlnN7gA2EgwwehfBNKa3nkQWERERyYMZHzJw9x0Ex/4yE5Dk\nugq4zt3vTbf5OMFpRe8F7jSzjcAWoNHd96XbfBH4TzP7S3dvPanfRERERE5aXmcqNLM1BLOOZU9P\n2gM8yqvTk74J6MwUA2m7CHobLspnHhEREZmefE9dvIpgx5470Uj29KSrCC56MsrdkwRznmsKUxER\nkQgU6loG05meVFOYioiIRCTfpx22EuzYVzK2l2AFsC+rzYrsO5lZKVCPpjAVyZvZTGEqIgtPXgsC\ndz9oZq0EZw88CZC+DvpFwM3pZo8AS8zs/KxxBJsJColHJ/v5msJUZPpmM4WpiCw8My4I0vMFrCfY\ngQOsNbNzgQ53f5HgsqdfMbMDwCHgOuAl4G4Ad3/WzHYC3zGzzwEx4FtAi84wEBERicbJ9BBcAPyS\n4Hi/A99Mr/8+8Cl3v97MqgjmFVgCPAhc5u7DWT/jo8C3Cc4uSBFMZXrVSf0GIiIiMmsnMw/BA0wx\nGNHdrwWunWR7F3DFTB9bREREwlGoswxERESkiOniRgtYKpWir6+PQ4cO8eCDD1JZWRl1JADi8TgH\nDhygq6uLZDIZdRwRkQVBBcEClkgkePHFF7nnnnt48MEHGX8m6sJzd7q7u+nv72doaCjqOCIiC4IK\ngjnOzIjFYlRUVBCLxSgpmf5RIHdneHiYjo4OOjo6QkxZGGZGaWkplZWVVFRURB1HRGRO0RiCOc7M\nqKqqYvHixVRWVlJeXh51pMiUlJRQXl5OTU0NVVVVRdPjISIyF6ggmONKSkpYuXIlZ511FitXrqS6\nujrqSJGpqqqivr6eyspKSktLVRCIiMyACoI5rqSkhNraWpYvX87y5cupqqqKOlJkamtrOe200zjl\nlFNYvHixCgIRkRlQQTBPlJeXs2TJkqI5UyAKy5Yt4+yzz2b9+vWsWrVqRuMpREQWOn1izhM1NTWc\nc845nH766TMeXDjXZQYTrl69mte97nUsW7aMWCymHgIRkRlYOHuNea6uro43vOENrFu3jvr6+gU1\nuLCsrIza2lrWrFnD61//ehYvXhx1JBGROUenHc4TixcvZuPGjbzjHe9geHiY+++/nxdeeCHqWAWx\natUqNm3axKZNm1i/fj01NTVRRxIRmXNUEMwTixYtYsWKFZx33nm4O729vSSTSV5++WVGRkaijheK\nsrIyVq5cSUNDA1u2bOHcc89l+fLlUccSEZmTVBDMM2eccQa1tbW4O1VVVWzfvp2enp6oY4WioqKC\nTZs2cemll3L55ZdTX18fdSQRkTlLBcE8E4vFWLJkCQ0NDVRXV3P22Weze/duHnvsMY4fP87g4GDU\nEWelsrKSZcuW0djYyIUXXsjGjRvZsGEDS5cuJRaLRR1PRGTOUkEwD5WXl7Nu3TrWrVvH5Zdfzo9+\n9CNGRkY4ePAgXV1dUceblaVLl7Ju3To+9rGP8cEPfjDqOCKRM7Nxl5KSknHXi0xEBcEC8OY3v5nV\nq1fT399PIpGIOs6sxGIxqqurOeuss6KOIlJUMjv73GJgvMJAZDwqCBaAM888kzPPPDPqGDKPmdk1\nwPuA1wBx4NfAl9z9+aw2vwIuybqbA7e6++cLGHVemqiHILcYEJmM5iEQkXy4GPgWcBHwTqAc+JmZ\nZU+d6cC/AiuBVcCpwNUFzjnvZO/oJyoEsnsPRCaiHgIRmTV3vzz7tpl9AmgHGoGHsjYNuPvRAkab\n17J39NMZQ5B9H5Fc6iEQkTAsIegR6MhZ/zEzO2pmvzWzr+f0IMgsTOeQgYoBmYx6CEQkryzY69wI\nPOTuz2Rtuh34A3AEeANwPbAB+EDBQ84j0+kdyLQTmYwKAhHJt1uA1wJvyV7p7t/Nuvm0mbUCu8xs\njbsfLGTA+cLdSaVSpFIpkskkyWSSkZEREokEiUSC4eFhhoaGGBwcZGBggFgsRiwWo7y8nGQyGXX8\ncfX19TEwMEA8HmdoaIhEIkEymcTdo4427824IDCzi4H/TXBs8FTgve5+T3pbGfB3wGXAWqAb2AV8\n2d1fyfoZ9cC3gf8GpIDtwFXu3j+r30ZEImVm3wYuBy7Ofs9P4FHAgPXAhAVBc3MzdXV1Y9Y1NTXR\n1NQ0y7Rzn7uTTCYZHh4e3emXlpZiZrg7IyMjY7b19fXR3d1NZ2cnFRUVUccfV2dnJy+//DLt7e10\ndHTQ29tLPB6ft1Ow51tLSwstLS1j1nV3d0/rvifTQ1ANPA78O8GOPFsVcB7wt8CTQD1wE3A3cGFW\nuzsIRhpvBmLAfwC3AlecRB4RKQLpYuA9wNvc/fA07nI+wTiDSQuHbdu20dDQkIeE80/uTj+7GMgU\nCkNDQ8Tj8dFioKamhpqamqKd2bOnp4fW1lba29vp7OwcLQjm+hwqhTJesbx3714aGxunvO+MCwJ3\n3wHsgNFjhdnbeoAt2evM7ErgUTNb7e4vmdnGdJtGd9+XbvNF4D/N7C/dvXWmmUQkWmZ2C9AEbAX6\nzWxlelO3uw+a2Vrgo8B9wHHgXOAG4AF3fyqKzPNB5lBBIpFgcHBwTDGQWZfpGaisrKSyspKqqioq\nKyuL9hLpfX19dHR0jC49PT0MDg6qh6AACjGGIDPaODNn7puAzkwxkLYr3eYigt4EEZlbPkvwHv5V\nzvpPAj8AhgnmJ7iKoJfxReDHBIcY5SRl9xBMVAwsWrRo3KWsrDiHkMXjcXp6ekYXHTIonFBfEWa2\nCPgGcIe796VXryI4P3mUuyfNrCO9TUTmGHef9BRmd38JeHth0iwcqVSKkZGREw4TDA4OUl5ePrqU\nlZWNuV1eXk5JSXGedT48PEw8Hh8dWJj5qoIgfKEVBOkBhj8m+K9hOlOTWrqtiIhMQ6YIyO4ZKCkp\nobS0dPTrRN8X62mImd8jc5ZE5nuNIQhfKAVBVjFwBvCOrN4BgFZgRU77UoIBiG2T/VyNNhaZvtmM\nNpa5IXPIwMxGd5i5UxlPtK6YufvoaYaZ73XaYfjyXhBkFQNrgT9x986cJo8AS8zs/KxxBJsJegge\nnexna7SxyPTNZrSxFE5JSQnl5eVUVFRQXV1NXV0dS5cupa+vb/Qc/ImW7J1mhnaccrJOZh6CaoLz\nhjNl5lqykImeAAAKXklEQVQzO5dgitIjBKcinkcwx0B51mjjDndPuPuzZrYT+I6ZfY7gtMNvAS06\nw0BEFprS0lIWLVpEVVUVtbW11NfXMzAwQCKRwMxGTx3MLJnbqVRKO3/Jq5PpIbgA+CXB8X4Hvple\n/32C+QfenV7/eHp9ZmzAnwD/lV73UYKJiXYRTEx0F8HoYxGRBaW0tJRYLEZ1dTW1tbWj59y7O2Vl\nZfT39zMwMDD6dWBggFQqRSKRIJVKRR1f5pGTmYfgASa/KNKUQ1fdvQtNQiQiQklJCbFYjMrKSmpr\naxkeHiaZTGJmlJeXj55+lzkzIJVKjZ5mKJJPxXkiqojIApE5ZFBdXT36X392kdDR0UEsFqOkpAR3\nJ5FIEI/HVRBI3hXniagT2LFjR9QRThi1rQzKELViyFAIxfp7zjZX5pBB9hiCU045hVNPPZXTTz+d\nVatWsXz5cpYsWUJNTc3oLIPFOo/AXDRfX1szNadeUTt37ow6QlG8cJRBGYotQyEU6++Zj4IgM6hw\n8eLFLF26dExBsHLlSpYvX059fT2LFy8eLQjUQ5A/8/W1NVM6ZCAiEqGSkhLKysqoqKgYPQVx0aJF\nDA8PU1lZydDQEAMDA/T09FBZWUksFqOsrEwFgeTdnOohEBERkXCoIBAREZE5c8igAqC3t5e9e/dG\nGqS7u1sZlGHOZti/f3/m24rQAuVPBbyauRie6/HMNtfIyAiJRGL0a/YyNDREe3s7bW1ttLW1cfz4\ncXp7exkcHNQcBONIJpMMDAzQ2dlJVVUVZWVlJJNJ4vE4XV1dlJaWjnu/+fraypju+97mwkxXZvZR\n4Paoc4jMIx9z9zuiDjEZve9F8m7S9/1cKQiWAVuAQ8BgtGlE5rQK4I+Ane5+POIsk9L7XiRvpvW+\nnxMFgYiIiIRLgwpFREREBYGIiIioIBARERFUEIiIiAhzpCAwsy+Y2UEzi5vZbjN7Y4iPdY2Z7TGz\nHjNrM7OfmNmGnDaLzOxmMztmZr1mdpeZrQg5U8rMbihkBjM7zcx+mH6MATN7wswactp8zcyOpLf/\n3MzW5/HxS8zsOjN7If3zD5jZV8Zpl7cMZnaxmd1jZi+nn/OtM308M6s3s9vNrNvMOs3su2ZWnY8M\nZlZmZv9gZk+aWV+6zffN7NR8Zig2hfwMmGaer6b/NtnLMxHkmPXrtdCZzOx74zx394WZKf24xfrZ\nPlWmX+U8V0kzuyWMPEVfEJjZh4FvAl8FzgeeAHaa2fKQHvJi4FvARcA7gXLgZ2ZWmdXmRuBdwPuB\nS4DTgO1hhEl/8P05we+dLdQMZrYEeBgYIjj1ayPwF0BnVpsvAVcCnwEuBPoJ/jaxPMX4cvpnfx54\nDXA1cLWZXRlihmrgceALwAmn4Ezz8e4geL42E/yNLgFuzVOGKuA84G8J3g/vA84B7s5pN9sMRSOC\nz4DpegpYCaxKL2+NIEM+Xq8FzZT2U8Y+d00h5skoqs/2GWRy4F959fk6leCzMP/cvagXYDfwT1m3\nDXgJuLpAj78cSAFvTd+uJdhJvi+rzTnpNhfm+bFrgOeAdwC/BG4oVAbgG8ADU7Q5AjRn3a4F4sCH\n8pThXuA7OevuAn5QiAzp53PrTH5ngp1wCjg/q80WYARYlY8M47S5AEgCq8PIEPUS9WfABJm+CuyN\n+rmZ6rUS9nv0JDN9D/i/RfB8RfbZPt1M6XWjn/1hL0XdQ2Bm5UAj8IvMOg+eoV3ApgLFWEJQoXWk\nbzcSTPmcnek54HAImW4G7nX3+3PWX1CADO8GfmNmd6a7svaa2aczG81sDUG1mp2hB3g0jxl+DWw2\ns7PTj3ku8BbgvgJmGDXNx3sT0Onu+7LuuovgNXRRvjOlZV6jXRFmCEWRfAZM5Ox0t/jvzew2Mzsj\n4jxjFPr9MUNvT3+uPGtmt5jZ0ggyRPnZPt1MGR8zs6Nm9lsz+3pOD0LeFPu1DJYDpUBbzvo2gsot\nVGZmBF1ID7l75vjgKmA4/cbKzbQqj4/9EYKu4QvG2byyABnWAp8j6Kr9O4IdyU1mNujut6Ufxxn/\nb5OvDN8gqNqfNbMkwSGuv3b3H6W3FyJDtuk83iqgPXujuyfNrCOMTGa2iOB5usPd+6LIELJIPwMm\nsRv4BEEP3qnAtcB/mdnr3b0/wlzZCv3+mK6fEnTDHwTWAX8P3Gdmm9LFXuii/GyfYSYIpu/+A0Fv\nzxuA64ENwAfynaHYC4KJGBMfm8qnW4DXMr1jg3nLZGarCV4Yf+ruiZncNV8ZCHa+e9z9b9K3nzCz\n1xEUCbcVKMOHgY8CHwGeISiQ/snMjrj7DwuUYTqm83h5z2RmZcCP0z/389O5S74zRCjS38Xdd2bd\nfMrM9hB8aH+IoEu8mEX93N2ZdfNpM/st8Hvg7QTd44UQyWf7FDKZ3pK90t2/m3XzaTNrBXaZ2Rp3\nP5jPAEV9yAA4RnBsdGXO+hWcWPXmlZl9G7gceLu7H8na1ArEzKw2xEyNwCnAY2aWMLME8DbgKjMb\nTj/OopAzvALsz1m3Hzgz/X0rwRslzL/N9cDfu/uP3f1pd78d2AZcU8AM2abzeK3p26PMrBSoz2em\nrGLgDODPsnoHCpahQCL7DJgJd+8GngdCHcE/Q4V+f5yU9E7tGAV67iL+bJ9OplemaP4owd81789X\nURcE6f+OHyMYKQ2MdqtsJji+HIr0H+c9wJ+4++GczY8RDM7KzrSBYEf5SJ4i7AL+mOA/4nPTy28I\n/jPPfJ8IOcPDnNglew7Bf0GZN3FrToZagkML+frbVHFiZZ4i/botUIZR03y8R4AlZnZ+1l03E7yB\nH81HjqxiYC2w2d07c5qEnqFQovoMmCkzqyHo/p7qw7xgCv3+OFnpHtFlFOC5K4LP9plmGs/5BJ+L\n+X++CjFycZajLj9EMCr24wSnnt0KHAdOCenxbiE4te5igso6s1TktDlI0MXVSLDzfDDk52HMSNOw\nMxCMXRgi+G98HUHXfS/wkaw2V6f/Fu8mKGD+H/A7IJanDN8jGNBzOXAWwSl27cDXw8pAcMrUuQTF\nWAr4n+nbZ0z38QgGPf4GeCNB999zwA/zkYHgePrdBIXZH+e8RsvzlaGYlkJ/Bkwz0z8SnJZ2FvBm\n4OcE/0UuK3COWb9eC5kpve16gqLkLIKd728Ieh/Lw8qUzlV0n+1TZSIo+r8CNKSfr63AAeD+UPIU\n8sU7iyft8wSXQI0TVGoXhPhYKYIuytzl41ltFhGcO3qMYCf5Y2BFyM/B/YwtCELPQLAjfhIYAJ4G\nPjVOm2sJBrsMADuB9Xl8/GrghvQbtD/9Qfa3QFlYGQgOzYz3Gvj36T4ewUjh24Du9Jv9O0BVPjKk\nPxRyt2VuX5KvDMW2FPIzYJp5WghOfYwTFK13AGsiyDHr12shMxFchncHQc/FIPAC8M8UoLibIFOk\nn+1TZQJWA78Cjqb/fs8RDMKsCSOPLn8sIiIixT2GQERERApDBYGIiIioIBAREREVBCIiIoIKAhER\nEUEFgYiIiKCCQERERFBBICIiIqggEBEREVQQiIiICCoIREREBBUEIiIiAvx/SawLn9+R+wUAAAAA\nSUVORK5CYII=\n", 202 | "text/plain": [ 203 | "" 204 | ] 205 | }, 206 | "metadata": {}, 207 | "output_type": "display_data" 208 | } 209 | ], 210 | "source": [ 211 | "import matplotlib.pyplot as plt\n", 212 | "\n", 213 | "%matplotlib inline\n", 214 | "\n", 215 | "imgpath = 'English/Fnt/Sample001/img001-00001.png'\n", 216 | "img = cv2.imread(imgpath)\n", 217 | "rsz = convert(imgpath)\n", 218 | "\n", 219 | "plt.subplot(1, 2, 1)\n", 220 | "plt.imshow(img, cmap='gray')\n", 221 | "plt.subplot(1, 2, 2)\n", 222 | "plt.imshow(rsz, cmap='gray')" 223 | ] 224 | }, 225 | { 226 | "cell_type": "markdown", 227 | "metadata": {}, 228 | "source": [ 229 | "## 预处理所有图片" 230 | ] 231 | }, 232 | { 233 | "cell_type": "code", 234 | "execution_count": 6, 235 | "metadata": { 236 | "collapsed": false 237 | }, 238 | "outputs": [ 239 | { 240 | "name": "stderr", 241 | "output_type": "stream", 242 | "text": [ 243 | "train/0/: 100%|██████████| 1016/1016 [00:00<00:00, 1150.59it/s]\n", 244 | "train/1/: 100%|██████████| 1016/1016 [00:00<00:00, 1232.67it/s]\n", 245 | "train/2/: 100%|██████████| 1016/1016 [00:00<00:00, 1201.22it/s]\n", 246 | "train/3/: 100%|██████████| 1016/1016 [00:00<00:00, 1186.00it/s]\n", 247 | "train/4/: 100%|██████████| 1016/1016 [00:00<00:00, 1176.58it/s]\n", 248 | "train/5/: 100%|██████████| 1016/1016 [00:01<00:00, 936.41it/s]\n", 249 | "train/6/: 100%|██████████| 1016/1016 [00:01<00:00, 897.20it/s]\n", 250 | "train/7/: 100%|██████████| 1016/1016 [00:01<00:00, 997.21it/s]\n", 251 | "train/8/: 100%|██████████| 1016/1016 [00:01<00:00, 973.86it/s]\n", 252 | "train/9/: 100%|██████████| 1016/1016 [00:01<00:00, 961.00it/s]\n", 253 | "train/10/: 100%|██████████| 52832/52832 [00:52<00:00, 999.49it/s] \n" 254 | ] 255 | } 256 | ], 257 | "source": [ 258 | "rmdir('train')\n", 259 | "\n", 260 | "for i in range(11):\n", 261 | " path = 'English/Fnt/Sample%03d/' % (i+1)\n", 262 | " trainpath = 'train/%d/' % i\n", 263 | " mkdir(trainpath)\n", 264 | " for filename in tqdm(os.listdir(path), desc=trainpath):\n", 265 | " try:\n", 266 | " cv2.imwrite(trainpath + filename, convert(path + filename))\n", 267 | " except:\n", 268 | " pass" 269 | ] 270 | }, 271 | { 272 | "cell_type": "markdown", 273 | "metadata": {}, 274 | "source": [ 275 | "# 分离出验证数据集\n", 276 | "\n", 277 | "http://scikit-learn.org/stable/modules/generated/sklearn.model_selection.train_test_split.html" 278 | ] 279 | }, 280 | { 281 | "cell_type": "code", 282 | "execution_count": 11, 283 | "metadata": { 284 | "collapsed": false 285 | }, 286 | "outputs": [], 287 | "source": [ 288 | "from sklearn.model_selection import train_test_split\n", 289 | "for i in range(11):\n", 290 | " trainpath = 'train/%d/' % i\n", 291 | " validpath = 'valid/%d/' % i\n", 292 | " mkdir(validpath)\n", 293 | " imgs = os.listdir(trainpath)\n", 294 | " trainimgs, validimgs = train_test_split(imgs, test_size=0.1)\n", 295 | " for filename in validimgs:\n", 296 | " os.rename(trainpath+filename, validpath+filename)" 297 | ] 298 | }, 299 | { 300 | "cell_type": "code", 301 | "execution_count": null, 302 | "metadata": { 303 | "collapsed": true 304 | }, 305 | "outputs": [], 306 | "source": [] 307 | } 308 | ], 309 | "metadata": { 310 | "kernelspec": { 311 | "display_name": "Python 2", 312 | "language": "python", 313 | "name": "python2" 314 | }, 315 | "language_info": { 316 | "codemirror_mode": { 317 | "name": "ipython", 318 | "version": 2 319 | }, 320 | "file_extension": ".py", 321 | "mimetype": "text/x-python", 322 | "name": "python", 323 | "nbconvert_exporter": "python", 324 | "pygments_lexer": "ipython2", 325 | "version": "2.7.12" 326 | } 327 | }, 328 | "nbformat": 4, 329 | "nbformat_minor": 1 330 | } 331 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # 微信数字识别小程序 2 | 3 | 这是一个可以实现一个自动识别图片上的数字(仅支持白底黑字)的微信机器人。 4 | 5 | 个人号代码:[wechat_digit_recognition.py](wechat_digit_recognition.py) 6 | 7 | 公众号代码:[wx.py](wx.py) 8 | 9 | # 构建模型 10 | 11 | ## 数据集处理 12 | 13 | 代码:[Preprocessing dataset.ipynb](Preprocessing dataset.ipynb) 14 | 15 | 在线预览:[https://ypwhs.github.io/wechat_digit_recognition/Preprocessing dataset.html](https://ypwhs.github.io/wechat_digit_recognition/Preprocessing dataset.html) 16 | 17 | ## 训练模型 18 | 19 | 代码:[Training Model.ipynb](Training Model.ipynb) 20 | 21 | 在线预览:[https://ypwhs.github.io/wechat_digit_recognition/Training Model.html](https://ypwhs.github.io/wechat_digit_recognition/Training Model.html) 22 | 23 | # 服务端的配置 24 | 25 | ## 需要的库 26 | 27 | * [ItChat 1.1.11](https://github.com/littlecodersh/ItChat) (个人号需要) 28 | * [wechatpy 1.2.16](https://github.com/jxtech/wechatpy) (公众号需要) 29 | * [requests 2.1.11](https://github.com/kennethreitz/requests) (公众号需要) 30 | * [OpenCV 3.1.0](https://github.com/opencv/opencv) 31 | * [TensorFlow 0.10.0rc0](https://github.com/tensorflow/tensorflow/tree/v0.10.0rc0) 32 | * [Keras 1.1.0](https://github.com/fchollet/keras) 33 | 34 | OpenCV 建议用 brew 安装,如果你用 macOS。 35 | 36 | ```shell 37 | brew install opencv3 --HEAD 38 | ``` 39 | 40 | ## 思路 41 | 42 | ### 粗提取数字 43 | 44 | 将图片转灰度,自适应二值化,提取轮廓,寻找最小矩形边界,判断是否满足预设条件,如宽、高,宽高比。 45 | 46 | ![](https://raw.githubusercontent.com/ypwhs/resources/master/img1.png) 47 | 48 | ```python 49 | img = cv2.imread(imgpath) 50 | gray = cv2.imread(imgpath, cv2.IMREAD_GRAYSCALE) 51 | bw = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY_INV, 25, 25) 52 | img2, ctrs, hier = cv2.findContours(bw.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) 53 | rects = [cv2.boundingRect(ctr) for ctr in ctrs] 54 | 55 | for rect in rects: 56 | x, y, w, h = rect 57 | roi = gray[y:y + h, x:x + w] 58 | hw = float(h) / w 59 | if (w < 200) & (h < 200) & (h > 10) & (w > 10) & (1.1 < hw) & (hw < 5): 60 | res = resize(roi) 61 | ... 62 | 63 | ``` 64 | * [基本操作](http://docs.opencv.org/3.1.0/d3/df2/tutorial_py_basic_ops.html) 65 | * [二值化](http://docs.opencv.org/3.1.0/d7/d4d/tutorial_py_thresholding.html) 66 | * [提取轮廓](http://docs.opencv.org/3.1.0/dd/d49/tutorial_py_contour_features.html) 67 | 68 | ### 缩放 69 | 70 | 将满足条件的图片缩放至最大边长为28的小图,然后将其放入一个28\*28的白色图像的中心位置。这样做的原因是神经网络只接受28\*28的数据。 71 | 72 | ![](https://raw.githubusercontent.com/ypwhs/resources/master/img2.png) 73 | 74 | ```python 75 | def resize(rawimg): 76 | fx = 28.0 / rawimg.shape[0] 77 | fy = 28.0 / rawimg.shape[1] 78 | fx = fy = min(fx, fy) 79 | img = cv2.resize(rawimg, None, fx=fx, fy=fy, interpolation=cv2.INTER_CUBIC) 80 | outimg = np.ones((28, 28), dtype=np.uint8) * 255 81 | w = img.shape[1] 82 | h = img.shape[0] 83 | x = (28 - w) / 2 84 | y = (28 - h) / 2 85 | outimg[y:y+h, x:x+w] = img 86 | return outimg 87 | 88 | ``` 89 | 90 | ### 识别 91 | 92 | 将处理好的图片送入深度神经网络中运算,得到识别的结果。11类是因为0~9代表各个数字,10代表非数字。 93 | 94 | ![](https://raw.githubusercontent.com/ypwhs/resources/master/WechatIMG57.jpeg) 95 | 96 | 网络结构如下: 784->512->512->11 97 | 98 | ``` 99 | ____________________________________________________________________________________________________ 100 | Layer (type) Output Shape Param # Connected to 101 | ==================================================================================================== 102 | dense_1 (Dense) (None, 512) 401920 dense_input_1[0][0] 103 | ____________________________________________________________________________________________________ 104 | activation_1 (Activation) (None, 512) 0 dense_1[0][0] 105 | ____________________________________________________________________________________________________ 106 | dropout_1 (Dropout) (None, 512) 0 activation_1[0][0] 107 | ____________________________________________________________________________________________________ 108 | dense_2 (Dense) (None, 512) 262656 dropout_1[0][0] 109 | ____________________________________________________________________________________________________ 110 | activation_2 (Activation) (None, 512) 0 dense_2[0][0] 111 | ____________________________________________________________________________________________________ 112 | dropout_2 (Dropout) (None, 512) 0 activation_2[0][0] 113 | ____________________________________________________________________________________________________ 114 | dense_3 (Dense) (None, 11) 5643 dropout_2[0][0] 115 | ____________________________________________________________________________________________________ 116 | activation_3 (Activation) (None, 11) 0 dense_3[0][0] 117 | ==================================================================================================== 118 | Total params: 670219 119 | ____________________________________________________________________________________________________ 120 | ``` 121 | 122 | 识别出来以后用方框标记出来,然后将识别好的数字打印在图上。 123 | 124 | ```python 125 | if (w < 200) & (h < 200) & (h > 10) & (w > 10) & (1.1 < hw) & (hw < 5): 126 | res = resize(roi) 127 | res = cv2.bitwise_not(res) 128 | res = np.resize(res, (1, 784)) 129 | 130 | predictions = model.predict(res) 131 | predictions = np.argmax(predictions) 132 | if predictions != 10: 133 | cv2.rectangle(img, (x, y), (x+w, y+h), (0, 255, 0), 1) 134 | cv2.putText(img, '{:.0f}'.format(predictions), (x, y), cv2.FONT_HERSHEY_DUPLEX, h/25.0, (255, 0, 0)) 135 | 136 | ``` 137 | 138 | * [图像基本运算](http://docs.opencv.org/3.1.0/d0/d86/tutorial_py_image_arithmetics.html) 139 | * [绘图函数](http://docs.opencv.org/3.1.0/dc/da5/tutorial_py_drawing_functions.html) 140 | * [Keras model](https://keras.io/models/model/) 141 | * [Keras 中文版 模型介绍](http://keras-cn.readthedocs.io/en/latest/getting_started/sequential_model/) 142 | 143 | 144 | ### 个人号 145 | 146 | 收到任何人发过来的图片以后,程序自动下载图片,然后识别,保存标记识别好的数字的图片,发送给刚才发图片的人。 147 | 148 | ```python 149 | @itchat.msg_register([PICTURE]) 150 | def download_files(msg): 151 | friend = itchat.search_friends(userName=msg['FromUserName']) 152 | print time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()), friend['NickName'], msg['Type'] 153 | filename = msg['FileName'] 154 | convertfilename = filename.replace('.', '.convert.') 155 | msg['Text'](filename) # download image 156 | if cv2.imread(filename) is not None: 157 | cv2.imwrite(convertfilename, convert(filename)) 158 | itchat.send('@img@%s' % convertfilename, msg['FromUserName']) 159 | ``` 160 | 161 | * [接收消息与文件](http://itchat.readthedocs.io/zh/latest/3.Handler/) 162 | * [回复](http://itchat.readthedocs.io/zh/latest/5.Reply/) 163 | 164 | ![](https://raw.githubusercontent.com/ypwhs/resources/master/WechatIMG50.jpg) 165 | 166 | ### 公众号 167 | 168 | 首先需要配置 apache 支持 python cgi 应用,然后在公众号后台配置服务器,得到 token 和 EncodingAESKey。当有人发送消息时,会自动将消息 POST 到预设的地址(比如:[http://w.luckiestcat.com/wx.py](http://w.luckiestcat.com/wx.py)),我们通过一系列代码下载图片,然后识别保存识别后的图片到服务器上,然后发送给刚才发图片的人。 169 | 170 | ```python 171 | msg = parse_message(body_text) 172 | reply = '' 173 | if msg.type == 'text': 174 | reply = create_reply('Text:' + msg.content.encode('utf-8'), message=msg) 175 | elif msg.type == 'image': 176 | reply = create_reply('图片', message=msg) 177 | try: 178 | r = requests.get(msg.image) # download image 179 | filename = 'img/' + str(int(time.time())) + '.jpg'; 180 | convertfilename = filename.replace('.', '.convert.') 181 | with open(filename, 'w') as f: 182 | f.write(r.content) 183 | if cv2.imread(filename) is not None: 184 | # load model 185 | with open('model.json', 'r') as f: 186 | model = model_from_json(f.read()) 187 | model.load_weights('model.h5') 188 | 189 | cv2.imwrite(convertfilename, convert(filename)) 190 | url = 'http://w.luckiestcat.com/' + convertfilename 191 | reply = ArticlesReply(message=msg, articles=[{ 192 | 'title': u'识别成功', 193 | 'url': url, 194 | 'description': u'', 195 | 'image': url 196 | }]) 197 | except: 198 | reply = create_reply('识别失败', message=msg) 199 | 200 | print reply 201 | 202 | ``` 203 | 204 | 205 | -------------------------------------------------------------------------------- /_config.yml: -------------------------------------------------------------------------------- 1 | theme: jekyll-theme-cayman -------------------------------------------------------------------------------- /model.h5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ypwhs/wechat_digit_recognition/91d2ced1cb572891595f00eecefb00af2ac73398/model.h5 -------------------------------------------------------------------------------- /model.json: -------------------------------------------------------------------------------- 1 | {"class_name": "Sequential", "keras_version": "1.1.0", "config": [{"class_name": "Flatten", "config": {"batch_input_shape": [null, 28, 28, 1], "trainable": true, "name": "flatten_1", "input_dtype": "float32"}}, {"class_name": "Dense", "config": {"W_constraint": null, "b_constraint": null, "name": "dense_1", "activity_regularizer": null, "trainable": true, "init": "glorot_uniform", "bias": true, "input_dim": null, "b_regularizer": null, "W_regularizer": null, "activation": "linear", "output_dim": 512}}, {"class_name": "Activation", "config": {"activation": "relu", "trainable": true, "name": "activation_1"}}, {"class_name": "Dropout", "config": {"p": 0.2, "trainable": true, "name": "dropout_1"}}, {"class_name": "Dense", "config": {"W_constraint": null, "b_constraint": null, "name": "dense_2", "activity_regularizer": null, "trainable": true, "init": "glorot_uniform", "bias": true, "input_dim": null, "b_regularizer": null, "W_regularizer": null, "activation": "linear", "output_dim": 512}}, {"class_name": "Activation", "config": {"activation": "relu", "trainable": true, "name": "activation_2"}}, {"class_name": "Dropout", "config": {"p": 0.2, "trainable": true, "name": "dropout_2"}}, {"class_name": "Dense", "config": {"W_constraint": null, "b_constraint": null, "name": "dense_3", "activity_regularizer": null, "trainable": true, "init": "glorot_uniform", "bias": true, "input_dim": null, "b_regularizer": null, "W_regularizer": null, "activation": "linear", "output_dim": 11}}, {"class_name": "Activation", "config": {"activation": "softmax", "trainable": true, "name": "activation_3"}}]} -------------------------------------------------------------------------------- /test.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ypwhs/wechat_digit_recognition/91d2ced1cb572891595f00eecefb00af2ac73398/test.png -------------------------------------------------------------------------------- /wechat_digit_recognition.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | import itchat 3 | from itchat.content import * 4 | import time 5 | import cv2 6 | import numpy as np 7 | from keras.models import model_from_json 8 | 9 | # load model 10 | with open('model.json', 'r') as f: 11 | model = model_from_json(f.read()) 12 | model.load_weights('model.h5') 13 | model.summary() 14 | 15 | 16 | def resize(rawimg): # resize img to 28*28 17 | fx = 28.0 / rawimg.shape[0] 18 | fy = 28.0 / rawimg.shape[1] 19 | fx = fy = min(fx, fy) 20 | img = cv2.resize(rawimg, None, fx=fx, fy=fy, interpolation=cv2.INTER_CUBIC) 21 | outimg = np.ones((28, 28), dtype=np.uint8) * 255 22 | w = img.shape[1] 23 | h = img.shape[0] 24 | x = (28 - w) / 2 25 | y = (28 - h) / 2 26 | outimg[y:y+h, x:x+w] = img 27 | return outimg 28 | 29 | 30 | def convert(imgpath): # read digits 31 | img = cv2.imread(imgpath) 32 | gray = cv2.imread(imgpath, cv2.IMREAD_GRAYSCALE) 33 | bw = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY_INV, 25, 25) 34 | img2, ctrs, hier = cv2.findContours(bw.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) 35 | rects = [cv2.boundingRect(ctr) for ctr in ctrs] 36 | 37 | for rect in rects: 38 | x, y, w, h = rect 39 | roi = gray[y:y+h, x:x+w] 40 | hw = float(h) / w 41 | if (w < 200) & (h < 200) & (h > 10) & (w > 10) & (1.1 < hw) & (hw < 5): 42 | res = resize(roi) 43 | res = np.resize(res, (1, 28, 28, 1)) 44 | 45 | predictions = model.predict(res) 46 | predictions = np.argmax(predictions) 47 | if predictions != 10: 48 | cv2.rectangle(img, (x, y), (x+w, y+h), (0, 255, 0), 1) 49 | cv2.putText(img, '{:.0f}'.format(predictions), (x, y), cv2.FONT_HERSHEY_DUPLEX, h/25.0, (255, 0, 0)) 50 | return img 51 | 52 | 53 | @itchat.msg_register([TEXT]) 54 | def general_reply(msg): 55 | friend = itchat.search_friends(userName=msg['FromUserName']) 56 | print time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()), friend['NickName'], msg['Type'], msg['Text'] 57 | # itchat.send('%s: %s' % (msg['Type'], msg['Text']), msg['FromUserName']) 58 | 59 | 60 | @itchat.msg_register([PICTURE]) 61 | def download_files(msg): 62 | friend = itchat.search_friends(userName=msg['FromUserName']) 63 | print time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()), friend['NickName'], msg['Type'] 64 | filename = msg['FileName'] 65 | convertfilename = filename.replace('.', '.convert.') 66 | msg['Text'](filename) # download image 67 | if cv2.imread(filename) is not None: 68 | cv2.imwrite(convertfilename, convert(filename)) 69 | itchat.send('@img@%s' % convertfilename, msg['FromUserName']) 70 | 71 | itchat.auto_login(hotReload=True) 72 | itchat.run() 73 | -------------------------------------------------------------------------------- /wx.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | # -*- coding: utf-8 -*- 3 | import os 4 | import sys 5 | import cgitb 6 | import time 7 | import requests 8 | import urlparse 9 | from wechatpy import parse_message 10 | from wechatpy.utils import check_signature 11 | from wechatpy.exceptions import InvalidSignatureException 12 | from wechatpy.replies import create_reply, ArticlesReply 13 | 14 | import time 15 | import cv2 16 | import numpy as np 17 | from keras.models import model_from_json 18 | 19 | token='your token' 20 | encoding_aes_key='your key' 21 | 22 | 23 | def resize(rawimg): # resize img to 28*28 24 | fx = 28.0 / rawimg.shape[0] 25 | fy = 28.0 / rawimg.shape[1] 26 | fx = fy = min(fx, fy) 27 | img = cv2.resize(rawimg, None, fx=fx, fy=fy, interpolation=cv2.INTER_CUBIC) 28 | outimg = np.ones((28, 28), dtype=np.uint8) * 255 29 | w = img.shape[1] 30 | h = img.shape[0] 31 | x = (28 - w) / 2 32 | y = (28 - h) / 2 33 | outimg[y:y+h, x:x+w] = img 34 | return outimg 35 | 36 | 37 | def convert(imgpath): # read digits 38 | img = cv2.imread(imgpath) 39 | gray = cv2.imread(imgpath, cv2.IMREAD_GRAYSCALE) 40 | bw = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY_INV, 25, 25) 41 | img2, ctrs, hier = cv2.findContours(bw.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) 42 | rects = [cv2.boundingRect(ctr) for ctr in ctrs] 43 | 44 | for rect in rects: 45 | x, y, w, h = rect 46 | roi = gray[y:y+h, x:x+w] 47 | hw = float(h) / w 48 | if (w < 200) & (h < 200) & (h > 10) & (w > 10) & (1.1 < hw) & (hw < 5): 49 | res = resize(roi) 50 | res = np.resize(res, (1, 28, 28, 1)) 51 | 52 | predictions = model.predict(res) 53 | predictions = np.argmax(predictions) 54 | if predictions != 10: 55 | cv2.rectangle(img, (x, y), (x+w, y+h), (0, 255, 0), 1) 56 | cv2.putText(img, '{:.0f}'.format(predictions), (x, y), cv2.FONT_HERSHEY_DUPLEX, h/25.0, (255, 0, 0)) 57 | return img 58 | 59 | 60 | # 启用调试 61 | cgitb.enable() 62 | 63 | # 获取 POST 内容 64 | body_text = sys.stdin.read() 65 | 66 | # 如果含参数,解析各个参数 67 | print "Content-Type: text/html" 68 | print "" 69 | 70 | # 获取 URL 参数 71 | query_string = os.environ.get("QUERY_STRING") 72 | 73 | if query_string == '': 74 | print '本页面仅允许微信访问' 75 | sys.exit(0) 76 | 77 | try: 78 | arguments = urlparse.parse_qs(query_string) 79 | signature = arguments['signature'][0] 80 | timestamp = arguments['timestamp'][0] 81 | nonce = arguments['nonce'][0] 82 | except: 83 | print 'arguments error' 84 | sys.exit(0) 85 | 86 | # 校验时间戳。5 分钟以前的 timestamp 自动拒绝 87 | current_timestamp = int(time.time()) 88 | 89 | if (current_timestamp - int(timestamp)) > 300: 90 | print 'Incorrect timestamp' 91 | sys.exit(0) 92 | 93 | # 接口检测部分 94 | try: 95 | check_signature(token, signature, timestamp, nonce) 96 | except InvalidSignatureException: 97 | print 'error' 98 | sys.exit(0) 99 | 100 | if 'echostr' in arguments: 101 | echostr = arguments['echostr'][0] 102 | print echostr 103 | sys.exit(0) 104 | 105 | msg = parse_message(body_text) 106 | reply = '' 107 | if msg.type == 'text': 108 | reply = create_reply('Text:' + msg.content.encode('utf-8'), message=msg) 109 | elif msg.type == 'image': 110 | reply = create_reply('图片', message=msg) 111 | try: 112 | r = requests.get(msg.image) # download image 113 | filename = 'img/' + str(int(time.time())) + '.jpg'; 114 | convertfilename = filename.replace('.', '.convert.') 115 | with open(filename, 'w') as f: 116 | f.write(r.content) 117 | if cv2.imread(filename) is not None: 118 | # load model 119 | with open('model.json', 'r') as f: 120 | model = model_from_json(f.read()) 121 | model.load_weights('model.h5') 122 | 123 | cv2.imwrite(convertfilename, convert(filename)) 124 | url = 'http://w.luckiestcat.com/' + convertfilename 125 | reply = ArticlesReply(message=msg, articles=[{ 126 | 'title': u'识别成功', 127 | 'url': url, 128 | 'description': u'', 129 | 'image': url 130 | }]) 131 | except: 132 | reply = create_reply('识别失败', message=msg) 133 | 134 | print reply 135 | --------------------------------------------------------------------------------