├── .gitignore
├── LICENSE
├── README.md
├── captcha_gen.py
├── captcha_scrawl.py
├── data_augment.py
├── demo_cnn.py
├── demo_online.py
├── readme_img
    ├── captcha_sample1.jpg
    ├── captcha_sample2.jpg
    ├── captcha_sample3.jpg
    ├── captcha_sample4.jpg
    ├── captcha_seperate1.png
    ├── captcha_seperate2.png
    ├── captcha_seperate3.png
    ├── csv.png
    ├── dataaugmentation.png
    ├── generate.png
    ├── head.gif
    ├── imitate6.png
    ├── imitate6_tensorboard.png
    ├── imitate_result.png
    └── old
    │   ├── 1.jpeg
    │   ├── 10.PNG
    │   ├── 11.png
    │   ├── 12.PNG
    │   ├── 2.jpeg
    │   ├── 3.jpeg
    │   ├── 4.jpeg
    │   ├── 5.PNG
    │   ├── 6.PNG
    │   ├── 7.PNG
    │   ├── 8.jpg
    │   └── 9.PNG
├── train_cnn_imitate_5.py
├── train_cnn_imitate_56.py
├── train_cnn_imitate_6.py
├── train_cnn_real_5.py
├── train_cnn_real_56.py
└── train_cnn_real_6.py


/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | *.egg-info/
 24 | .installed.cfg
 25 | *.egg
 26 | MANIFEST
 27 | 
 28 | # PyInstaller
 29 | #  Usually these files are written by a python script from a template
 30 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 31 | *.manifest
 32 | *.spec
 33 | 
 34 | # Installer logs
 35 | pip-log.txt
 36 | pip-delete-this-directory.txt
 37 | 
 38 | # Unit test / coverage reports
 39 | htmlcov/
 40 | .tox/
 41 | .coverage
 42 | .coverage.*
 43 | .cache
 44 | nosetests.xml
 45 | coverage.xml
 46 | *.cover
 47 | .hypothesis/
 48 | 
 49 | # Translations
 50 | *.mo
 51 | *.pot
 52 | 
 53 | # Django stuff:
 54 | *.log
 55 | .static_storage/
 56 | .media/
 57 | local_settings.py
 58 | 
 59 | # Flask stuff:
 60 | instance/
 61 | .webassets-cache
 62 | 
 63 | # Scrapy stuff:
 64 | .scrapy
 65 | 
 66 | # Sphinx documentation
 67 | docs/_build/
 68 | 
 69 | # PyBuilder
 70 | target/
 71 | 
 72 | # Jupyter Notebook
 73 | .ipynb_checkpoints
 74 | 
 75 | # pyenv
 76 | .python-version
 77 | 
 78 | # celery beat schedule file
 79 | celerybeat-schedule
 80 | 
 81 | # SageMath parsed files
 82 | *.sage.py
 83 | 
 84 | # Environments
 85 | .env
 86 | .venv
 87 | env/
 88 | venv/
 89 | ENV/
 90 | env.bak/
 91 | venv.bak/
 92 | 
 93 | # Spyder project settings
 94 | .spyderproject
 95 | .spyproject
 96 | 
 97 | # Rope project settings
 98 | .ropeproject
 99 | 
100 | # mkdocs documentation
101 | /site
102 | 
103 | # mypy
104 | .mypy_cache/
105 | 
106 | # data
107 | *.data
108 | *.bin
109 | *.npy
110 | *.csv
111 | *.big
112 | *.small
113 | /data
114 | /test
115 | /logs
116 | /developing
117 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "[]"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright 2017 Jason-ChengYing,Li
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # simple-railway-captcha-solver
  2 | 
  3 | 本專案自2018年後已經停止更新/維護，且台鐵已經在2019年改用reCAPTCHA代替傳統的驗證碼。
  4 | 
  5 | (這個專案是我在大學閒暇時的研究，當時code寫得很雜亂，後續issue也都沒時間回覆。但還是希望有幫助到想做類似project的人！)
  6 | 
  7 | **This project is no longer maintained. Archived this repo on 2022/11/18.**
  8 | 
  9 | ----
 10 | 
 11 | [Click here or scroll down for english version](#english-version)
 12 | 
 13 | ![image](./readme_img/head.gif)
 14 | 
 15 | 本專案利用簡單的Convolutional Neural Network來實作辨識台鐵訂票網站的驗證碼，訓練集及驗證集的部分以模仿驗證碼樣式的方式來產生、另外測試集的部分則自台鐵訂票網站擷取，再以手動方式標記約3000筆。
 16 | 
 17 | 目前模型單碼辨識率最高達到```99.39%```，在台鐵網站上以查詢剩餘車票功能來測試驗證碼，整體辨識成功率(全部字元正確)達到```91.57%```。
 18 | 
 19 | 底下有詳盡的說明。
 20 | 
 21 | |Name|Description|
 22 | |----|----|
 23 | |captcha_gen.py|模仿驗證碼樣式建立訓練集|
 24 | |captcha_scrawl.py|從台鐵網站取得真實驗證碼圖|
 25 | |train_cnn_imitate_5.py|建立CNN並以模仿驗證碼訓練(5碼辨識)|
 26 | |train_cnn_imitate_6.py|建立CNN並以模仿驗證碼訓練(6碼辨識)|
 27 | |train_cnn_imitate_56.py|建立CNN並以模仿驗證碼訓練(辨識是5碼or6碼)|
 28 | |data_augment.py|用於真實驗證碼的資料增強|
 29 | |train_cnn_real_5.py|建立CNN並以真實驗證碼訓練(5碼辨識)|
 30 | |train_cnn_real_6.py|建立CNN並以真實驗證碼訓練(6碼辨識)|
 31 | |train_cnn_real_56.py|建立CNN並以真實驗證碼訓練(辨識是5碼or6碼)|
 32 | |demo_cnn.py|Demo載入模型並以測試集評估辨識率|
 33 | |demo_online.py|Demo載入模型並在台鐵網站上評估辨識率|
 34 | 
 35 | ## 溫馨提醒
 36 | 鐵路法第65條中提到：```「...以不正方法將虛偽資料或不正指令輸入電腦或其相關設備而購買車票、取得訂票或取票憑證者，處五年以下有期徒刑或科或併科新臺幣三百萬元以下罰金。」```，我想使用程式辨識驗證碼來自動訂票，應該也在其中"不正方法"的範疇中。
 37 | 
 38 | 此專案僅供學術研究，所以請不要利用建立的模型辨識驗證碼去自動訂票，這是違法的喔。
 39 | 
 40 | ## 0.-Dependencies
 41 | |Name|Version|
 42 | |----|----|
 43 | |tensorflow|1.4.0|
 44 | |tensorflow-gpu|1.4.0|
 45 | |tensorflow-tensorboard|0.4.0rc3|
 46 | |Keras|2.1.2|
 47 | |h5py|2.7.1|
 48 | |Pillow|5.1.0|
 49 | |numpy|1.13.3|
 50 | 
 51 | ## 1.-Training set?
 52 | 要建立一個辨識驗證碼的CNN模型其實並非難事，難的是要如何取得標記好的訓練集呢?
 53 | 
 54 | ![image](./readme_img/captcha_sample1.jpg)![image](./readme_img/captcha_sample2.jpg)![image](./readme_img/captcha_sample3.jpg)
 55 | 
 56 | 在這邊我們會嘗試兩種方法(2.與3.)：
 57 | #### 2.-模仿驗證碼的樣式，自行產生訓練集和驗證集
 58 | 因為要手動標記上萬張驗證碼是非常費時的，所以我們可以試著模仿產生一些驗證碼看看。
 59 | 
 60 | 不過當然，我們產生的資料集必須非常接近真實的驗證碼，否則最後訓練完可能用在真實的驗證碼上效果會非常的差。
 61 | 
 62 | #### 3.-標記少量的驗證碼，以資料增強的方式擴充資料集
 63 | 因為不一定每一種驗證碼都可以很容易地找出他的樣式或規律等等，有時候我們還是得用手動方式標記一些驗證碼。
 64 | 
 65 | 在這種情況下，我們可以透過資料增強的方法來擴充我們的資料集，讓我們的網路有更多資料可以學習。
 66 | 
 67 | ```(註:在台鐵驗證碼的例子中，自從改版加入英文字後，因為英文字的出現機率遠低於數字(每digit大約只有1~5%)，所以在手動標記的真實資料集中，英文字的比例是極低的，造成數據很不平衡。且在真實驗證碼中，一張驗證碼最多只會出現一個英文字，即使我們用資料增強的方式，也很難去做到平衡數字及英文的資料比例。雖然似乎可以透過設定class weight的方式來改善，但是keras好像不支援以one-hot encoding表示的輸出設定class weight...。這部分暫時想不到方式解決，不過若只是要訓練一個勉強堪用的模型，用這個方式還是可行的，因為實際上台鐵驗證碼英文出現的比例也沒有非常高。)```
 68 | 
 69 | --------------------
 70 | 
 71 | ## 2.1-Generate training and validation set
 72 | 讓我們來模仿產生一些驗證碼吧！
 73 | 首先我們要先觀察驗證碼，你可以寫一支爬蟲程式(eg.```captcha_scrawl.py```)去擷取一兩百張驗證碼回來細細比對。我們不難發現台鐵的驗證碼不外乎由兩個主要元素組成：
 74 | - ```5 ~ 6碼```的數字及英文(不包含O和I)，大小似乎不一致，而且都有經過旋轉，另外顏色是浮動的。
 75 | - 背景是浮動的顏色，另外還有不少干擾的線條，看起來應該是矩形，由黑線和白線組成，且有部分會蓋到數字上面。
 76 | 
 77 | 進一步研究會發現:
 78 | - 數字的旋轉角度約在```-55 ~ 55度```間，大小約```25 ~ 27pt```。
 79 | - 字型的部分，仔細觀察會發現同一個字會有兩種不一樣的樣式，推測是有兩種字型隨機更替，其中一個很明顯是```Courier New-Bold```，另一個比對一下也不難發現即是```Times New Roman-Bold```。
 80 | - 背景和字型顏色的部分，可以用一些色彩均值化的手法快速的從數百張的驗證碼中得出每一張的背景及數字的顏色，進而我們就能算出顏色的範圍。這部分可以用OpenCV的k-means來實作，這邊就不再贅述。
 81 | 
 82 | 背景的R/G/B範圍約是在```180 ~ 250```間，文字的部分則是```10 ~ 140```間。
 83 | - 干擾的線條是矩形，有左、上是黑線條且右、下是白線條和倒過來，共兩種樣式(也可以當作是旋轉180度)，平均大約會出現```30 ~ 32個```隨機分布在圖中，長寬都大約落在```5 ~ 21px```間。
 84 | 另外，大約有4成的機會白線會蓋在數字上，黑線蓋在文字上的機率則更低。
 85 | 
 86 | 有了這些觀察，只差一點點就可以產生訓練集了，我們現在來觀察文字都落在圖片上的甚麼位置上:
 87 | 
 88 | ![image](./readme_img/captcha_seperate1.png)![image](./readme_img/captcha_seperate2.png)![image](./readme_img/captcha_seperate3.png)
 89 | 
 90 | 從這幾張圖中不難看出文字並非規則地分布在圖片上，我們可以猜測文字是旋轉後被隨機左移或右移了，甚至還會有重疊的情況，所以沒辦法用切割的方式一次處理一個文字。
 91 | 
 92 | 以上就是我們簡單觀察到的驗證碼規則，訓練集產生的部分實作在```captcha_gen.py```中，雖然寫得有點雜亂，不過沒甚麼特別的地方，就是照著上面的規則產生，可以試著以自己的方式實作看看。
 93 | 
 94 | ![image](./readme_img/captcha_sample4.jpg)
 95 | 
 96 | ```python
 97 | if __name__ == "__main__":
 98 |     generate(50000, "./data/56_imitate_train_set/",  ENGP=100, FIVEP=50, ENGNOLIMIT=True, filename="train")
 99 |     generate(10240, "./data/56_imitate_vali_set/",  ENGP=100, FIVEP=50, ENGNOLIMIT=True, filename="vali")
100 |     generate(50000, "./data/5_imitate_train_set/",  ENGP=100, FIVEP=100, ENGNOLIMIT=True, filename="train")
101 |     generate(10240, "./data/5_imitate_vali_set/",  ENGP=100, FIVEP=100, ENGNOLIMIT=True, filename="vali")
102 |     generate(50000, "./data/6_imitate_train_set/",  ENGP=100, FIVEP=0, ENGNOLIMIT=True, filename="train")
103 |     generate(10240, "./data/6_imitate_vali_set/",  ENGP=100, FIVEP=0, ENGNOLIMIT=True, filename="vali")
104 | ```
105 | 
106 | 最後會為我們預計建立的三個CNN(2.2.1會提到)各分別產生50000筆Training data和10240筆Validate data，答案則標記在csv檔中。
107 | 
108 | ![image](./readme_img/csv.png)![image](./readme_img/generate.png)
109 | 
110 | 
111 | ## 2.2.1-Building Convolution Neural Network
112 | 有了資料集，我們就可以來建立CNN了！
113 | 
114 | 在這邊我們會建立三個CNN，分別是```1.辨識5碼驗證碼圖片的CNN```、```2.辨識6碼驗證碼圖片的CNN``` 以及 ```3.辨識圖片是5碼or6碼驗證碼的CNN```。
115 | 
116 | 首先我們先來實作前兩個辨識驗證碼的CNN:輸入是```60*200```的圖片，共有3個channel(R/G/B)，所以是shape會是```(60, 200, 3)```。
117 | 
118 | 中間透過數層由ReLU函數激發的Convolution Layer擷取特徵，並以2x2的Max pooling layer採樣減少計算量、BatchNormalization layer做標準化 及 Dropout Layer隨機捨棄一些神經元(避免overfitting)，最後用Flatten Layer來把資料降到1維，輸出到全連接層：5/6個34神經元的Softmax regression分類器。
119 | 
120 | (註:34個代表數字0~9以及英文字母去除O及I的數量，另外5碼和6碼只差在最後有幾個Softmax regression分類器。)
121 | 
122 | 以六碼為例(train_cnn_imitate_6.py):
123 | ```python
124 | in = Input((60, 200, 3))
125 | out = in
126 | out = Conv2D(filters=32, kernel_size=(3, 3), padding='same', activation='relu')(out)
127 | out = Conv2D(filters=32, kernel_size=(3, 3), activation='relu')(out)
128 | out = BatchNormalization()(out)
129 | out = MaxPooling2D(pool_size=(2, 2))(out)
130 | out = Dropout(0.3)(out)
131 | out = Conv2D(filters=64, kernel_size=(3, 3), padding='same', activation='relu')(out)
132 | out = Conv2D(filters=64, kernel_size=(3, 3), activation='relu')(out)
133 | out = BatchNormalization()(out)
134 | out = MaxPooling2D(pool_size=(2, 2))(out)
135 | out = Dropout(0.3)(out)
136 | out = Conv2D(filters=128, kernel_size=(3, 3), padding='same', activation='relu')(out)
137 | out = Conv2D(filters=128, kernel_size=(3, 3), activation='relu')(out)
138 | out = BatchNormalization()(out)
139 | out = MaxPooling2D(pool_size=(2, 2))(out)
140 | out = Dropout(0.3)(out)
141 | out = Conv2D(filters=256, kernel_size=(3, 3), activation='relu')(out)
142 | out = BatchNormalization()(out)
143 | out = MaxPooling2D(pool_size=(2, 2))(out)
144 | out = Flatten()(out)
145 | out = Dropout(0.3)(out)
146 | out = [Dense(34, name='digit1', activation='softmax')(out),\
147 |     Dense(34, name='digit2', activation='softmax')(out),\
148 |     Dense(34, name='digit3', activation='softmax')(out),\
149 |     Dense(34, name='digit4', activation='softmax')(out),\
150 |     Dense(34, name='digit5', activation='softmax')(out),\
151 |     Dense(34, name='digit6', activation='softmax')(out)]
152 | model = Model(inputs=in, outputs=out)
153 | ```
154 | 
155 | 完成後要來compile模型，這邊loss使用```categorical_crossentropy```、optimizer*使用```Adam```，而metrics理所當然是```accuracy```了。
156 | ```python
157 | model.compile(loss='categorical_crossentropy', optimizer='Adamax', metrics=['accuracy'])
158 | ```
159 | 
160 | *關於optimizer的選擇，可以參考這兩篇，寫得不錯：
161 | 1. An overview of gradient descent optimization algorithms -  http://ruder.io/optimizing-gradient-descent/index.html
162 | 2. SGD，Adagrad，Adadelta，Adam等优化方法总结和比较 - http://ycszen.github.io/2016/08/24/SGD%EF%BC%8CAdagrad%EF%BC%8CAdadelta%EF%BC%8CAdam%E7%AD%89%E4%BC%98%E5%8C%96%E6%96%B9%E6%B3%95%E6%80%BB%E7%BB%93%E5%92%8C%E6%AF%94%E8%BE%83/
163 | **
164 | 
165 | --
166 | 
167 | 最後來看看model的summary輸出長甚麼樣子：
168 | ```python
169 | model.summary()
170 | 
171 | __________________________________________________________________________________________________
172 | Layer (type)                    Output Shape         Param #     Connected to
173 | ==================================================================================================
174 | input_1 (InputLayer)            (None, 60, 200, 3)   0
175 | __________________________________________________________________________________________________
176 | conv2d_1 (Conv2D)               (None, 60, 200, 32)  896         input_1[0][0]
177 | __________________________________________________________________________________________________
178 | conv2d_2 (Conv2D)               (None, 58, 198, 32)  9248        conv2d_1[0][0]
179 | __________________________________________________________________________________________________
180 | batch_normalization_1 (BatchNor (None, 58, 198, 32)  128         conv2d_2[0][0]
181 | __________________________________________________________________________________________________
182 | max_pooling2d_1 (MaxPooling2D)  (None, 29, 99, 32)   0           batch_normalization_1[0][0]
183 | __________________________________________________________________________________________________
184 | dropout_1 (Dropout)             (None, 29, 99, 32)   0           max_pooling2d_1[0][0]
185 | __________________________________________________________________________________________________
186 | conv2d_3 (Conv2D)               (None, 29, 99, 64)   18496       dropout_1[0][0]
187 | __________________________________________________________________________________________________
188 | conv2d_4 (Conv2D)               (None, 27, 97, 64)   36928       conv2d_3[0][0]
189 | __________________________________________________________________________________________________
190 | batch_normalization_2 (BatchNor (None, 27, 97, 64)   256         conv2d_4[0][0]
191 | __________________________________________________________________________________________________
192 | max_pooling2d_2 (MaxPooling2D)  (None, 13, 48, 64)   0           batch_normalization_2[0][0]
193 | __________________________________________________________________________________________________
194 | dropout_2 (Dropout)             (None, 13, 48, 64)   0           max_pooling2d_2[0][0]
195 | __________________________________________________________________________________________________
196 | conv2d_5 (Conv2D)               (None, 13, 48, 128)  73856       dropout_2[0][0]
197 | __________________________________________________________________________________________________
198 | conv2d_6 (Conv2D)               (None, 11, 46, 128)  147584      conv2d_5[0][0]
199 | __________________________________________________________________________________________________
200 | batch_normalization_3 (BatchNor (None, 11, 46, 128)  512         conv2d_6[0][0]
201 | __________________________________________________________________________________________________
202 | max_pooling2d_3 (MaxPooling2D)  (None, 5, 23, 128)   0           batch_normalization_3[0][0]
203 | __________________________________________________________________________________________________
204 | dropout_3 (Dropout)             (None, 5, 23, 128)   0           max_pooling2d_3[0][0]
205 | __________________________________________________________________________________________________
206 | conv2d_7 (Conv2D)               (None, 3, 21, 256)   295168      dropout_3[0][0]
207 | __________________________________________________________________________________________________
208 | batch_normalization_4 (BatchNor (None, 3, 21, 256)   1024        conv2d_7[0][0]
209 | __________________________________________________________________________________________________
210 | max_pooling2d_4 (MaxPooling2D)  (None, 1, 10, 256)   0           batch_normalization_4[0][0]
211 | __________________________________________________________________________________________________
212 | flatten_1 (Flatten)             (None, 2560)         0           max_pooling2d_4[0][0]
213 | __________________________________________________________________________________________________
214 | dropout_4 (Dropout)             (None, 2560)         0           flatten_1[0][0]
215 | __________________________________________________________________________________________________
216 | digit1 (Dense)                  (None, 34)           87074       dropout_4[0][0]
217 | __________________________________________________________________________________________________
218 | digit2 (Dense)                  (None, 34)           87074       dropout_4[0][0]
219 | __________________________________________________________________________________________________
220 | digit3 (Dense)                  (None, 34)           87074       dropout_4[0][0]
221 | __________________________________________________________________________________________________
222 | digit4 (Dense)                  (None, 34)           87074       dropout_4[0][0]
223 | __________________________________________________________________________________________________
224 | digit5 (Dense)                  (None, 34)           87074       dropout_4[0][0]
225 | __________________________________________________________________________________________________
226 | digit6 (Dense)                  (None, 34)           87074       dropout_4[0][0]
227 | ==================================================================================================
228 | Total params: 1,106,540
229 | Trainable params: 1,105,580
230 | Non-trainable params: 960
231 | ```
232 | 
233 | 架構以圖片呈現的話:
234 | 
235 | ![image](./readme_img/imitate6.png)
236 | 
237 | ## 2.2.2-Building Another Convolution Neural Network
238 | 前面提到，我們還要建立一個用來識別驗證碼是5碼還是6碼的模型，我們最後會讓資料先經過這個模型看看輸入的驗證碼是5碼還是6碼，再送入對應的模型去辨識出上面的文字。
239 | 
240 | 這個模型的架構基本上跟前面兩個是一樣的，只是Dropout Rate以及輸出不同。輸出的部份我們從數個softmax分類器改為一個sigmoid分類器，並將loss改為```binary_crossentropy```來compile模型。
241 | 
242 | 這部分實作於```train_cnn_imitate_56.py```，下面只大略列出不同處:
243 | 
244 | ```python
245 | out = Dense(1, name='6digit', activation='sigmoid')(tensor_out)
246 | model = Model(inputs=in, outputs=out)
247 | model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
248 | ```
249 | 
250 | 最後當這個sigmoid分類器輸出>0.5時，我們視為辨識出6碼的驗證碼，反之則為5碼。
251 | 
252 | ## 2.3-Load the training set
253 | 在訓練之前我們要先將資料載入到記憶體中，前面產生訓練集和驗證集的時候，我們是將驗證碼存成一張張編號好的圖片，並用csv檔記錄下了答案。
254 | 
255 | 這邊一樣以6碼的為例，首先我們先處理X的部分，也就是特徵值，這邊就是指我們的圖片。
256 | 而要輸入進CNN的資料必須是numpy array的形式，所以我們用Pillow來讀取圖片並轉為numpy格式：
257 | 
258 | ```python
259 | traincsv = open('./data/6_imitate_train_set/captcha_train.csv', 'r', encoding = 'utf8')
260 | for row in csv.reader(traincsv):
261 |     image = Image.open("./data/6_imitate_train_set/" + row[0] + ".jpg") # 讀取圖片
262 |     nparr = np.array(image) # 轉成np array
263 |     nparr = nparr / 255.0
264 | ```
265 | 
266 | 這時我們下```nparr.shape```，可以看到矩陣的大小是```(60, 200, 3)```，跟前面模型設計的Input是相同的。
267 | 
268 | 而我們計劃使用50000張圖片來訓練，所以最後輸入給CNN的矩陣大小會是```(50000, 60, 200, 3)```，這部分只要利用stack就可以把它們合併，整理成下面:
269 | 
270 | ```python
271 | train_data = np.stack([np.array(Image.open("./data/6_imitate_train_set/" + row[0] + ".jpg"))/255.0 for row in csv.reader(traincsv)])
272 | ```
273 | 
274 | 最後train_data的shape就會是```(50000, 60, 200, 3)```。
275 | 
276 | 接下來Y則是訓練集的標記，也就是我們訓練集的答案。
277 | 
278 | 因為我們的模型是多輸出的結構(6組softmax函數分類器)，所以Y要是一個含有6個numpy array的list，大概像是這樣：
279 | ```
280 | [[第一張第1個數字,...,最後一張第1個數字], [第一張第2個數字,...,最後一張第2個數字], [...], [...], [...], [...]]
281 | ```
282 | 而其中每個數字都是以one-hot encoding表示，例如0就是```[1, 0, 0, 0, ....,0]```，2就是```[0, 0, 1, 0, ....,0]```
283 | 
284 | ```python
285 | traincsv = open('./data/6_imitate_train_set/captcha_train.csv', 'r', encoding = 'utf8') # 讀取訓練集的標記
286 | read_label = [toonehot(row[1]) for row in csv.reader(traincsv)] # 將每一行的文字轉成one-hot encoding
287 | train_label = [[] for _ in range(6)] # 各組輸出的答案要放到train_label
288 | 
289 | for arr in read_label:
290 |     for index in range(6):
291 |         train_label[index].append(arr[index]) # 原本是[[第1字答案, ..., 第6字答案],......, [第1字答案, ..., 第6字答案]]
292 |                                               # 要轉成[[第1字答案,..., 第1字答案],..., [第6字答案,..., 第6字答案]]才符合Y的輸入
293 | train_label = [arr for arr in np.asarray(train_label)] # 最後要把6個numpy array 放在一個list
294 | ```
295 | 
296 | ## 2.4-Validation set
297 | 驗證集的載入方式跟訓練集相同，這邊略過。
298 | 
299 | ## 2.5-Callback
300 | 在這邊要用到三個callback:
301 | 
302 | ### 1.ModelCheckPoint
303 | 
304 | ```python
305 | checkpoint = ModelCheckpoint(filepath, monitor='val_digit6_acc', verbose=1, save_best_only=True, mode='max')
306 | ```
307 | 
308 | 用於儲存最佳辨識率的模型，每次epoch完會檢查一次，如果比先前最佳的acc高，就會儲存model到filepath。
309 | 
310 | 因為在多輸出模型中沒有像是各輸出平均的acc這種東西，觀察前幾epoch後發現```val_digit6_acc```上升最慢，因此用它當作checkpoint的monitor。
311 | (如果要自定義monitor可以自己寫callback，這部分留到未來有空再來實作。)
312 | 
313 | ### 2.Earlystopping
314 | 
315 | ```python
316 | earlystop = EarlyStopping(monitor='val_digit6_acc', patience=5, verbose=1, mode='auto')
317 | ```
318 | 
319 | 這邊的monitor設為val_digit6_acc，patience設為5，也就是在驗證集的val_digit6_acc連續5次不再下降時，就會提早結束訓練。(train_cnn_imitate_56是10)
320 | 
321 | ### 3.TensorBoard
322 | 
323 | ```python
324 | tensorBoard = TensorBoard(log_dir = "./logs", histogram_freq = 1)
325 | ```
326 | 
327 | TensorBoard可以讓我們更方便的以圖形化界面檢視訓練結果，要檢視時可以輸入```tensorboard --logdir=logs```來啟動。
328 | 
329 | 最後把他們三個放到list中即可。
330 | ```python
331 | callbacks_list = [tensorBoard, earlystop, checkpoint]
332 | ```
333 | 
334 | ## 2.6-Training the model
335 | 至此為止我們已經把所有需要的資料都準備好了，現在只需要一台好電腦就可以開始訓練了，建議使用GPU來訓練，不然要很久，真的很久....。
336 | 
337 | 若在訓練時出現Resource exhausted的錯誤，可以考慮調低一些參數(如batch_size)。
338 | 
339 | ```python
340 | model.fit(train_data, train_label, batch_size=400, epochs=50, verbose=2, validation_data=(vali_data, vali_label), callbacks=callbacks_list)
341 | ```
342 | 
343 | ## 2.7-Result
344 | 三個模型都在訓練約25~30epochs後達到EarlyStopping的條件停止，val_acc皆達到```0.99```以上(下圖為imitate_6的訓練結果)：
345 | 
346 | ![image](./readme_img/imitate6_tensorboard.png)
347 | 
348 | 接著利用手動標記的3000張真實驗證碼當測試集來評估模型的辨識率(demo_cnn.py)：
349 | 
350 | 辨識```5碼```驗證碼的模型(train_cnn_imitate_5)對真實驗證碼測試集的單碼辨識率達到平均約```98.87%```，一次辨識成功率(即一次5或6碼都辨識正確)達到約```94.55%```。
351 | 
352 | 辨識```6碼```驗證碼的模型(train_cnn_imitate_6)對真實驗證碼測試集的單碼辨識率達到平均約```98.44%```，一次辨識成功率(即一次5或6碼都辨識正確)達到約```90.45%```。
353 | 
354 | 分類驗證碼是5碼 or 6碼的模型(train_cnn_imitate_56)則達到約```98.13%```。
355 | 
356 | ![image](./readme_img/imitate_result.png)
357 | 
358 | ## 2.8-Online Test
359 | 模型都完成後，是時候到台鐵的網站上試試身手了！
360 | 
361 | 不過開頭有提到以不正方式訂票是違法的，所以我們這邊僅在查詢剩餘車票的頁面下做測試，並設定適當的延遲，避免對網站伺服器造成負擔。(實作於```demo_online.py```)
362 | 
363 | 這邊是以selenium控制chrome瀏覽器操作，這樣比較簡單，而且看的到畫面比較有感覺。
364 | 
365 | 另外因為台鐵的驗證碼是浮動的，所以驗證碼圖片是以先screenshot，再將驗證碼位置圖片crop下來方式來取得的：
366 | 
367 | ```python
368 | driver.save_screenshot('tmp.png')
369 | location = driver.find_element_by_id('idRandomPic').location
370 | x, y = location['x'] + 5, location['y'] + 5
371 | img = Image.open('tmp.png')
372 | captcha = img.crop((x, y, x+200, y+60))
373 | captcha.convert("RGB").save('captcha.jpg', 'JPEG')
374 | ```
375 | 
376 | 其中```location['x'] + 5, location['y'] + 5```是因為畫面上的驗證碼有用css加了一個寬度5的外框上去。
377 | 
378 | 最後執行了1000筆後，我們得到```91.57%```的成功率，大功告成！
379 | 
380 | --------------------
381 | 
382 | ## 3.1-Label training and dataset
383 | 在第三部份我們會用少量手動標記的驗證碼，透過資料增強(Data Augmentation)產生大量資料來訓練模型。
384 | 
385 | 首先我們需要取得一些真實的驗證碼，我們可以寫一支程式簡單地從台鐵網站上下載驗證碼圖片回來(實作於```captcha_scrawl.py```)：
386 | 
387 | ```python
388 | SAVEPATH = "./data/manual_label/"
389 | url = 'http://railway1.hinet.net/ImageOut.jsp'
390 | for i in range(1, 3000):
391 |     response = requests.get(url, stream=True)
392 |     with open(SAVEPATH + str(i) + '.jpg', 'wb') as out_file:
393 |         shutil.copyfileobj(response.raw, out_file)
394 |     del response
395 |     time.sleep(0.5)
396 | ```
397 | 
398 | 如此我們就得到了3000張驗證碼，其中大約有1500張是6碼，剩下的則是5碼。(在我寫Readme之前，5碼和6碼的比例大約是1:3，但現在測試卻是1:1，可能網站有更新過吧)
399 | 
400 | 之後我們可以仿照 2.1 的格式去標記驗證碼答案於csv中，也可以做個小工具來輔助標記(https://github.com/JasonLiTW/captcha-label-tool)。
401 | 
402 | ## 3.2-Data Augmentation
403 | 因為我們標記的驗證碼有點太少了，所以我們要透過Data Augmentation的方式來產生更多的訓練資料來用！
404 | 
405 | 我們使用Keras內建的ImageDataGenerator，他提供了非常多的功能，詳細可以看這裡:https://keras.io/preprocessing/image/
406 | 
407 | 我們這邊會用到的功能有：```rotation_range=5```(旋轉0~5度), ```shear_range=0.2```(斜變0~0.2度), ```zoom_range=0.05```(放大0~0.05倍)
408 | 
409 | ```python
410 | datagen = ImageDataGenerator(rotation_range=5,shear_range=0.2,zoom_range=0.05,fill_mode='nearest')
411 | ```
412 | 
413 | 這邊的datagen是一個generator，它會隨機對圖片做旋轉、斜變和放大。我們預計要讓每張圖片產生50張增強的圖片。詳細的部分實作於```data_augment.py```，依序修改參數對5碼及6碼的驗證碼執行，即可由原本3000張產生到變成150000張驗證碼圖片。
414 | 
415 | ![image](./readme_img/dataaugmentation.png)
416 | 
417 | 
418 | ## 3.3-Building Convolution Neural Network
419 | 這部分跟2.2的模型是完全相同的，這邊不再贅述。
420 | 
421 | ## 3.4-Load the training and validation set
422 | 以6碼的為例，我們現在有75000張驗證碼圖片，我們取其中前60000張為訓練集，後15000張為驗證集來訓練。(驗證集也是一樣，只是改成取後15000張。)
423 | 
424 | ```python
425 | traincsv = open('./data/6_real_train_set/captcha_train.csv', 'r', encoding = 'utf8')
426 | train_data = np.stack([np.array(Image.open("./data/6_real_train_set/" + row[0] + ".jpg"))/255.0 for row in csv.reader(traincsv)][:60000])
427 | traincsv = open('./data/6_real_train_set/captcha_train.csv', 'r', encoding = 'utf8')
428 | read_label = [toonehot(row[1]) for row in csv.reader(traincsv)][:60000]
429 | train_label = [[] for _ in range(6)]
430 | for arr in read_label:
431 |     for index in range(6):
432 |         train_label[index].append(arr[index])
433 | train_label = [arr for arr in np.asarray(train_label)]
434 | ```
435 | 
436 | 另外判斷是5碼or6碼的模型，則是各以5/6碼的前60000張各隨機挑選20000張=40000張當訓練集，剩下15000張各隨機挑選5000張=10000張當驗證集。
437 | 
438 | ## 3.5-Callback
439 | 跟2.5相同，略過。
440 | 
441 | ## 3.6-Training the model
442 | 跟2.6相同，略過。
443 | 
444 | ## 3.7-Result
445 | 三個模型都在訓練約20 epochs後達到EarlyStopping的條件停止，val_acc皆達到```0.99```以上。
446 | 
447 | 但由於在```demo_cnn.py```中使用的3000張手動標記驗證碼即為真實驗證碼模型的訓練集，所以沒辦法用來評估其辨識率，所以這邊我們會留到下一部分(3.8)到台鐵網站來評估。
448 | 
449 | ## 3.8-Online Test
450 | 最後我們一樣來到台鐵網站做測試，程式的部分同2.8，只要修改model路徑即可。
451 | 
452 | 執行了1000筆後，我們得到```51.63%```的整體成功率，雖然沒有上一部分的結果那麼好，但仍堪用了。
453 | 
454 | ```(註:在台鐵驗證碼的例子中，自從改版加入英文字後，因為英文字的出現機率遠低於數字(每digit大約只有1~5%)，所以在手動標記的真實資料集中，英文字的比例是極低的，造成數據很不平衡。且在真實驗證碼中，一張驗證碼最多只會出現一個英文字，即使我們用資料增強的方式，也很難去做到平衡數字及英文的資料比例。雖然似乎可以透過設定class weight的方式來改善，但是keras好像不支援以one-hot encoding表示的輸出設定class weight...。這部分暫時想不到方式解決，不過若只是要訓練一個勉強堪用的模型，用這個方式還是可行的，因為實際上台鐵驗證碼英文出現的比例也沒有非常高。)```
455 | 
456 | 有趣的是，在交叉測試後我們發現，在辨識是5碼or6碼的模型中，使用真實驗證碼(```train_cnn_real_56.py```)訓練出來的模型的準確率是比使用模仿的(```train_cnn_imitate_56.py```)還要稍微高一些些的。
457 | 
458 | 而另外兩個真實驗證碼訓練出來的模型(```train_cnn_real_5.py```及```train_cnn_real_6.py```)，在英文字母的辨識上辨識率可以說是接近0%，但其實每個digit的辨識率應該都還有90%左右，不過因為乘上同時5/6碼，整體的辨識率就很低了。
459 | 
460 | 
461 | ## 4.-Issue & Todo
462 | 1. 更新英文readme。
463 | 2. 重寫captcha_gen.py，有點亂。
464 | 3. 嘗試了使用Conditional-DCGAN產生驗證碼，但一直發生mode collapse，之後有空再弄看看了。
465 | 4. 嘗試了使用Capsule Network但效果不太好...等有空的時候再整理放上來。
466 | 
467 | 
468 | ## 5.-Reference
469 | 1. An overview of gradient descent optimization algorithms -  http://ruder.io/optimizing-gradient-descent/index.html
470 | 2. SGD，Adagrad，Adadelta，Adam等优化方法总结和比较 - http://ycszen.github.io/2016/08/24/SGD%EF%BC%8CAdagrad%EF%BC%8CAdadelta%EF%BC%8CAdam%E7%AD%89%E4%BC%98%E5%8C%96%E6%96%B9%E6%B3%95%E6%80%BB%E7%BB%93%E5%92%8C%E6%AF%94%E8%BE%83/
471 | 3. Going Deeper with Convolutions - http://arxiv.org/abs/1409.4842
472 | 
473 | ## 6.-Contact
474 | 如果有任何建議或問題，請不吝發Issue或mail(jason860421<at>gmail.com)給我。
475 | 
476 | 
477 | --------------------
478 | #### english version
479 | ## Note: English version currently is old version!
480 | # simple-railway-captcha-solver
481 | ![image](./readme_img/old/1.jpeg)
482 | This project uses simple convolution neural network to implement solving the captcha(as above, in Taiwan railway booking website).The training set is generated by imitating the style of captcha, and the validation set is crawling from the booking site and labeled manually for about 1000 records.
483 | 
484 | Currently, the accuracy of a single digit on the validation set is about ```98.84%```, and overall accuracy is ```91.13%``` (Successfully recognize 6-digits at once).
485 | 
486 | |Name|Description|
487 | |----|----|
488 | |captcha_gen.py|Generating training set by imitating the style of captcha.|
489 | |train_cnn.py  |Building the model and train it.|
490 | |demo_solver.py|Demo:Load the model and solve the captcha.|
491 | 
492 | ## Dependecies
493 | |Name|Version|
494 | |----|----|
495 | |tensorflow|1.4.0|
496 | |tensorflow-gpu|1.4.0|
497 | |tensorflow-tensorboard|0.4.0rc3|
498 | |Keras|2.1.2|
499 | |h5py|2.7.1|
500 | |Pillow|4.3.0|
501 | |numpy|1.13.3|
502 | 
503 | ## Training set?
504 | It is not difficult for building a CNN model to solve a captcha, but where and how do we get a labeled training set?
505 | 
506 | ![image](./readme_img/old/2.jpeg)![image](./readme_img/old/3.jpeg)![image](./readme_img/old/4.jpeg)
507 | 
508 | We can write a program to crawl thousands of captcha image, and labeled it manually, but it's a time-consuming job! Maybe we can try to generate some captcha image by imitating it.
509 | But of course, the image we generate should be really close to the real, otherwise, the accuracy on validation set will really bad.
510 | 
511 | ## Generate training set
512 | 
513 | Firstly we have to observe the captcha, it's easy to find that the captcha is made up of two primary elements:
514 | - ```5 ~ 6 digits``` number and the text size is not same. Furthermore, they are being rotated, and the color is floating.
515 | - The color of background is floating, and there have some white and black interference lines, and some of them will overlay on the number.
516 | 
517 | And more...:
518 | - The angle of rotation is between about ```-55 ~ 55 degrees```, and the size is about ```25 ~ 27pt```.
519 | - We can found that one number has not only one style, so we guess that there have two fonts randomly in change. The first one obviously is ```Courier New-Bold```, and the second one is ```Times New Roman-Bold```.(You can use software such as Photoshop to cross-comparison.)
520 | - About the range of background and text color, we can through the color quantization such as k-means to get color of every background and text, and so we can calculate the color range.(I used k-means in opencv to implement.)
521 | - The color range(R/G/B) of the background is between about ```180 ~ 250```, and text is between ```10 ~ 140```.
522 | - Those interference lines form a rectangle, they have two styles: left and up sides are black, right and down sides are white, and vice versa.(you can also treat them as be rotated 180 degrees).
523 | - The number of the rectangle is between about ```30 ~ 32```, randomly distribute on captcha image, and the width and height is between about ```5 ~ 21px```. Besides, there has 40% white line will overlay on the number, and about 20% by the black line.
524 | 
525 | With these observation, we are about to generate training set! Now, let's observe where the number place on the image:
526 | 
527 | ![image](./readme_img/old/5.PNG)![image](./readme_img/old/6.PNG)![image](./readme_img/old/7.PNG)
528 | 
529 | From these images we can find that the text(number) are not regularly distributed on the image, we can guess that the text is randomly moved left or right after a rotation. There has even some text overlap together, so we can't crop the image and process only one number at a time.
530 | 
531 | Above is the captcha rule we simply observed. The implement of training set generate is in ```captcha_gen.py```, you can try to implement it in your own way.
532 | 
533 | ![image](./readme_img/old/8.jpg)
534 | 
535 | The generator finally will output 50,000 captcha image and a csv labeled answer.
536 | 
537 | ![image](./readme_img/old/9.PNG)![image](./readme_img/old/10.PNG)
538 | 
539 | 
540 | ## Building Convolution Neural Network
541 | 
542 | Let's build a simple CNN model!
543 | 
544 | The input is ```60*200``` image, it has 3 channel(R/G/B), so the shape is ```(60, 200, 3)```.
545 | 
546 | Firstly, the input through many convolution layers activated by ReLU function to capture feature, and perform downsampling by Max pooling layer, and then get into Dropout layer(randomly drop out some unit to avoid overfitting) and Flatten layer. Finally, they output to the full connect layer: 6 Softmax regression classifiers, each with 10 neurons.
547 | 
548 | ```python
549 | tensor_in = Input((60, 200, 3))
550 | out = tensor_in
551 | out = Conv2D(filters=32, kernel_size=(3, 3), padding='same', activation='relu')(out)
552 | out = Conv2D(filters=32, kernel_size=(3, 3), activation='relu')(out)
553 | out = MaxPooling2D(pool_size=(2, 2))(out)
554 | out = Conv2D(filters=64, kernel_size=(3, 3), padding='same', activation='relu')(out)
555 | out = Conv2D(filters=64, kernel_size=(3, 3), activation='relu')(out)
556 | out = MaxPooling2D(pool_size=(2, 2))(out)
557 | out = Conv2D(filters=128, kernel_size=(3, 3), padding='same', activation='relu')(out)
558 | out = Conv2D(filters=128, kernel_size=(3, 3), activation='relu')(out)
559 | out = MaxPooling2D(pool_size=(2, 2))(out)
560 | out = Conv2D(filters=256, kernel_size=(3, 3), activation='relu')(out)
561 | out = MaxPooling2D(pool_size=(2, 2))(out)
562 | out = Flatten()(out)
563 | out = Dropout(0.5)(out)
564 | out = [Dense(10, name='digit1', activation='softmax')(out),\
565 |     Dense(10, name='digit2', activation='softmax')(out),\
566 |     Dense(10, name='digit3', activation='softmax')(out),\
567 |     Dense(10, name='digit4', activation='softmax')(out),\
568 |     Dense(10, name='digit5', activation='softmax')(out),\
569 |     Dense(10, name='digit6', activation='softmax')(out)]
570 | model = Model(inputs=tensor_in, outputs=out)
571 | ```
572 | 
573 | Now we can do the next step: compile the model: loss use ```categorical_crossentropy```, optimizer* use ```Adamax```, and metrics is ```accuracy```.
574 | ```python
575 | model.compile(loss='categorical_crossentropy', optimizer='Adamax', metrics=['accuracy'])
576 | ```
577 | 
578 | *About the choice of an optimizer, you can refer below:
579 | 1. An overview of gradient descent optimization algorithms -  http://ruder.io/optimizing-gradient-descent/index.html
580 | 2. SGD，Adagrad，Adadelta，Adam等优化方法总结和比较 - http://ycszen.github.io/2016/08/24/SGD%EF%BC%8CAdagrad%EF%BC%8CAdadelta%EF%BC%8CAdam%E7%AD%89%E4%BC%98%E5%8C%96%E6%96%B9%E6%B3%95%E6%80%BB%E7%BB%93%E5%92%8C%E6%AF%94%E8%BE%83/
581 | **
582 | 
583 | --
584 | 
585 | Okay! Now we have finished the design of the model, let's see the summary of model:
586 | 
587 | ```python
588 | model.summary()
589 | 
590 | __________________________________________________________________________________________________
591 | Layer (type)                    Output Shape         Param #     Connected to
592 | ======================================================
593 | input_1 (InputLayer)            (None, 60, 200, 3)   0
594 | __________________________________________________________________________________________________
595 | conv2d_1 (Conv2D)               (None, 60, 200, 32)  896         input_1[0][0]
596 | __________________________________________________________________________________________________
597 | conv2d_2 (Conv2D)               (None, 58, 198, 32)  9248        conv2d_1[0][0]
598 | __________________________________________________________________________________________________
599 | max_pooling2d_1 (MaxPooling2D)  (None, 29, 99, 32)   0           conv2d_2[0][0]
600 | __________________________________________________________________________________________________
601 | conv2d_3 (Conv2D)               (None, 29, 99, 64)   18496       max_pooling2d_1[0][0]
602 | __________________________________________________________________________________________________
603 | conv2d_4 (Conv2D)               (None, 27, 97, 64)   36928       conv2d_3[0][0]
604 | __________________________________________________________________________________________________
605 | max_pooling2d_2 (MaxPooling2D)  (None, 13, 48, 64)   0           conv2d_4[0][0]
606 | __________________________________________________________________________________________________
607 | conv2d_5 (Conv2D)               (None, 13, 48, 128)  73856       max_pooling2d_2[0][0]
608 | __________________________________________________________________________________________________
609 | conv2d_6 (Conv2D)               (None, 11, 46, 128)  147584      conv2d_5[0][0]
610 | __________________________________________________________________________________________________
611 | max_pooling2d_3 (MaxPooling2D)  (None, 5, 23, 128)   0           conv2d_6[0][0]
612 | __________________________________________________________________________________________________
613 | conv2d_7 (Conv2D)               (None, 3, 21, 256)   295168      max_pooling2d_3[0][0]
614 | __________________________________________________________________________________________________
615 | max_pooling2d_4 (MaxPooling2D)  (None, 1, 10, 256)   0           conv2d_7[0][0]
616 | __________________________________________________________________________________________________
617 | flatten_1 (Flatten)             (None, 2560)         0           max_pooling2d_4[0][0]
618 | __________________________________________________________________________________________________
619 | dropout_1 (Dropout)             (None, 2560)         0           flatten_1[0][0]
620 | __________________________________________________________________________________________________
621 | digit1 (Dense)                  (None, 10)           25610       dropout_1[0][0]
622 | __________________________________________________________________________________________________
623 | digit2 (Dense)                  (None, 10)           25610       dropout_1[0][0]
624 | __________________________________________________________________________________________________
625 | digit3 (Dense)                  (None, 10)           25610       dropout_1[0][0]
626 | __________________________________________________________________________________________________
627 | digit4 (Dense)                  (None, 10)           25610       dropout_1[0][0]
628 | __________________________________________________________________________________________________
629 | digit5 (Dense)                  (None, 10)           25610       dropout_1[0][0]
630 | __________________________________________________________________________________________________
631 | digit6 (Dense)                  (None, 10)           25610       dropout_1[0][0]
632 | =======================================================
633 | Total params: 735,836
634 | Trainable params: 735,836
635 | Non-trainable params: 0
636 | ```
637 | 
638 | ![image](./readme_img/old/11.png)
639 | 
640 | ## Load the training set
641 | Before train the model, we have to load the data into memory.
642 | 
643 | Firstly we have to process X part: feature(our captcha image).
644 | The data we input to CNN should be numpy array type, so we use Pillow to read image and convert it to numpy array.
645 | 
646 | ```python
647 | for index in range(1, 50001, 1)
648 |     image = Image.open("./data/train_set/" + str(index) + ".jpg") #Load our image
649 |     nparr = np.array(image) # Convert to numpy array
650 |     nparr = nparr / 255.0
651 | ```
652 | 
653 | The shape of nparr is ```(60, 200, 3)```, it's same as the input we just designed in the model.
654 | And we plan to use 50,000 captcha image to train the model, so the input shape to CNN will be ```(50000, 60, 200, 3)```. Use numpy.stack to merge them all:
655 | 
656 | ```python
657 | train_data = np.stack([np.array(Image.open("./data/train_set/" + str(index) + ".jpg"))/255.0 for index in range(1, 50001, 1)])
658 | ```
659 | 
660 | Now, the shape of train_data is ```(50000, 60, 200, 3)```。
661 | 
662 | The next is Y part, label: the answer of the training set.
663 | Because the model is multi-output(6 softmax regression classifier), so the Y should be a list containing 6 numpy array, like this:
664 | ```
665 | [[First digit of first image,..., First digit of last image], [Second digit of first image,..., Second digit of last image], [...], [...], [...], [...]]
666 | ```
667 | And every digit is present as one-hot encoding, for example 0 is ```[1, 0, 0, 0, ....,0]```, 2 is```[0, 0, 1, 0, ....,0]```
668 | 
669 | ```python
670 | traincsv = open('./data/train_set/train.csv', 'r', encoding = 'utf8')
671 | read_label = [toonehot(row[1]) for row in csv.reader(traincsv)]
672 | train_label = [[] for _ in range(6)]
673 | for arr in read_label:
674 |     for index in range(6):
675 |         train_label[index].append(arr[index])
676 | train_label = [arr for arr in np.asarray(train_label)]
677 | ```
678 | 
679 | ## Validation set
680 | The validation set is real captcha image crawl from the railway booking website and labeled manually. Load the data as same as above, and X(feature(image)) put in ```vali_data```, Y(label) in ```vali_label```.
681 | 
682 | ## Callback
683 | We are using 3 callbacks:
684 | 
685 | ### 1.ModelCheckPoint
686 | 
687 | ```python
688 | checkpoint = ModelCheckpoint(filepath, monitor='val_digit6_acc', verbose=1, save_best_only=True, mode='max')
689 | ```
690 | For saving best accuracy model, it will check after every epoch, and save the model to filepath if the accuracy is better than before.
691 | 
692 | ### 2.Earlystopping
693 | 
694 | ```python
695 | earlystop = EarlyStopping(monitor='val_loss', patience=2, verbose=1, mode='auto')
696 | ```
697 | The monitor set to ```val_loss```, ```patience``` set to 2, that is, if the loss of validation set didn't improve twice in a row, training will be stopped.
698 | 
699 | ### 3.TensorBoard
700 | 
701 | ```python
702 | tensorBoard = TensorBoard(log_dir = "./logs", histogram_freq = 1)
703 | ```
704 | TensorBoard is a great visualization tool, we can use it to view our training result.
705 | 
706 | ```python
707 | tensorboard --logdir=logs
708 | ```
709 | to start it.
710 | 
711 | 
712 | Finally, put them into a list.
713 | ```python
714 | callbacks_list = [tensorBoard, earlystop, checkpoint]
715 | ```
716 | 
717 | ## Training the model
718 | We have prepared everything we need so far, now we can start training the model!
719 | (If you got Resource exhausted error, try to reduce ```batch_size```.)
720 | 
721 | ```python
722 | model.fit(train_data, train_label, batch_size=400, epochs=50, verbose=2, validation_data=(vali_data, vali_label), callbacks=callbacks_list)
723 | ```
724 | 
725 | ## Result
726 | After 15 epochs, the training was stopped by EarlyStopping.
727 | The accuracy for a single digit on the validation set is about ```98.84%```, and overall accuracy is ```91.13%``` (Successfully recognize 6-digits at once).
728 | 
729 | ![image](./readme_img/old/12.PNG)
730 | 
731 | ## Issue & Todo
732 | 1. Currently unable to solve 5-digits captcha. Maybe implement by CNN + RNN.
733 | 2. Improve the grammar and everything in English version README.
734 | 3. Re-write captcha_gen.py in better way。
735 | 4. Try to use GAN(Generative Adversarial Network) generate the training set.
736 | 5. Try to create a new model with capsule network.
737 | 
738 | ## Reference
739 | 1. An overview of gradient descent optimization algorithms -  http://ruder.io/optimizing-gradient-descent/index.html
740 | 2. SGD，Adagrad，Adadelta，Adam等优化方法总结和比较 - http://ycszen.github.io/2016/08/24/SGD%EF%BC%8CAdagrad%EF%BC%8CAdadelta%EF%BC%8CAdam%E7%AD%89%E4%BC%98%E5%8C%96%E6%96%B9%E6%B3%95%E6%80%BB%E7%BB%93%E5%92%8C%E6%AF%94%E8%BE%83/
741 | 3. Going Deeper with Convolutions - http://arxiv.org/abs/1409.4842
742 | 


--------------------------------------------------------------------------------
/captcha_gen.py:
--------------------------------------------------------------------------------
  1 | from PIL import Image, ImageDraw, ImageFont
  2 | from random import randint
  3 | import csv
  4 | import numpy as np
  5 | FONTPATH = ["./data/font/times-bold.ttf", "./data/font/courier-bold.ttf"]
  6 | ENGSTR = "ABCDEFGHJKLMNPQRSTUVWXYZ" # 沒有O和I
  7 | LETTERSTR = "0123456789ABCDEFGHJKLMNPQRSTUVWXYZ"
  8 | 
  9 | 
 10 | class rect:
 11 |     def __init__(self):
 12 |         self.size = (randint(5, 21), randint(5, 21))
 13 |         self.location = (randint(1, 199), randint(1, 59))
 14 |         self.luoverlay = True if randint(1, 10) > 6 else False
 15 |         self.rdoverlay = False if self.luoverlay else True if randint(1, 10) > 8 else False
 16 |         self.lucolor = 0 if randint(0, 1) else 255
 17 |         self.rdcolor = 0 if self.lucolor == 255 else 255
 18 |         self.ludrawn = False
 19 |         self.rddrawn = False
 20 |         self.pattern = randint(0, 1)
 21 | 
 22 | 
 23 |     def draw(self, image, overlay):
 24 |         if((overlay or not self.luoverlay) and not self.ludrawn):
 25 |             self.ludrawn = True
 26 |             stp = self.location
 27 |             transparent = int(255 * 0.45 if self.lucolor == 0 else 255 * 0.8)
 28 |             color = (self.lucolor, self.lucolor, self.lucolor, transparent)
 29 |             uline = Image.new("RGBA", (self.size[0], 1), color)
 30 |             lline = Image.new("RGBA", (1, self.size[1]), color)
 31 |             image.paste(uline, stp, uline)
 32 |             image.paste(lline, stp, lline)
 33 |         if((overlay or not self.rdoverlay) and not self.rddrawn):
 34 |             self.rddrawn = True
 35 |             dstp = (self.location[0], self.location[1] + self.size[1])
 36 |             rstp = (self.location[0] + self.size[0], self.location[1])
 37 |             transparent = int(255 * 0.45 if self.rdcolor == 0 else 255 * 0.8)
 38 |             color = (self.rdcolor, self.rdcolor, self.rdcolor, transparent)
 39 |             dline = Image.new("RGBA", (self.size[0], 1), color)
 40 |             rline = Image.new("RGBA", (1, self.size[1]), color)
 41 |             image.paste(dline, dstp, dline)
 42 |             image.paste(rline, rstp, rline)
 43 | 
 44 | 
 45 | class captchatext:
 46 |     def __init__(self, priority, offset, captchalen, engletter, ENGNOLIMIT):
 47 |         self.engletter = engletter
 48 |         if ENGNOLIMIT:
 49 |             engletter = True if randint(1, 34) <= 24 else False
 50 |         if engletter:
 51 |             self.letter = ENGSTR[randint(0, len(ENGSTR) - 1)]
 52 |         else:
 53 |             self.letter = str(randint(0, 9))
 54 |         self.color = [randint(10, 140) for _ in range(3)]
 55 |         self.angle = randint(-55, 55)
 56 |         self.priority = priority
 57 |         self.offset = offset
 58 |         self.next_offset = 0
 59 |         self.captchalen = captchalen
 60 | 
 61 | 
 62 |     def draw(self, image):
 63 |         color = (self.color[0], self.color[1], self.color[2], 255)
 64 |         font = ImageFont.truetype(FONTPATH[randint(0, 1)], randint(25, 27) * 10)
 65 |         text = Image.new("RGBA", (font.getsize(self.letter)[0], 300), (0, 0, 0, 0))
 66 |         textdraw = ImageDraw.Draw(text)
 67 |         textdraw.text((0, 0), self.letter, font=font, fill=color)
 68 |         text = text.rotate(self.angle, expand=True)
 69 |         text = text.resize((int(text.size[0] / 10), int(text.size[1] / 10)))
 70 |         base = int(self.priority * (200 / self.captchalen))
 71 |         rand_min = (self.offset - base - 4) if (self.offset - base - 4) >= -15 else -15
 72 |         rand_min = 0 if self.priority == 0 else rand_min
 73 |         avg_dp = int(200 / self.captchalen)
 74 |         rand_max = (avg_dp - text.size[0]) if self.priority == self.captchalen - 1 else (avg_dp - text.size[0] + 10)
 75 |         try:
 76 |             displace = randint(rand_min, rand_max)
 77 |         except:
 78 |             displace = rand_max
 79 |         location = (base + displace, randint(3, 23))
 80 |         self.next_offset = location[0] + text.size[0]
 81 |         image.paste(text, location, text)
 82 | 
 83 | 
 84 | def generate(GENNUM, SAVEPATH, ENGP=25, FIVEP=0, ENGNOLIMIT=False, filename="train"):
 85 |     captchacsv = open(SAVEPATH + "captcha_{:s}.csv".format(filename), 'w', encoding = 'utf8', newline = '')
 86 |     lencsv = open(SAVEPATH + "len_{:s}.csv".format(filename), 'w', encoding = 'utf8', newline = '')
 87 |     letterlist = []
 88 |     lenlist = []
 89 |     for index in range(1, GENNUM + 1, 1):
 90 |         captchastr = ""
 91 |         captchalen = 5 if randint(1, 100) <= FIVEP else 6
 92 |         engat = randint(0, captchalen - 1) if randint(1, 100) <= ENGP else -1
 93 |         bgcolor = [randint(180, 250) for _ in range(3)]
 94 |         captcha = Image.new('RGBA', (200, 60), (bgcolor[0], bgcolor[1], bgcolor[2], 255))
 95 |         rectlist = [rect() for _ in range(32)]
 96 |         for obj in rectlist:
 97 |             obj.draw(image=captcha, overlay=False)
 98 |         offset = 0
 99 |         for i in range(captchalen):
100 |             newtext = captchatext(i, offset, captchalen, (True if engat == i else False), ENGNOLIMIT)
101 |             newtext.draw(image=captcha)
102 |             offset = newtext.next_offset
103 |             captchastr += str(newtext.letter)
104 |         letterlist.append([str(index).zfill(len(str(GENNUM))), captchastr])
105 |         lenlist.append([str(index).zfill(len(str(GENNUM))), captchalen])
106 |         for obj in rectlist:
107 |             obj.draw(image=captcha, overlay=True)
108 |         captcha.convert("RGB").save(SAVEPATH + str(index).zfill(len(str(GENNUM))) + ".jpg", "JPEG")
109 |     writer = csv.writer(captchacsv)
110 |     writer.writerows(letterlist)
111 |     writer = csv.writer(lencsv)
112 |     writer.writerows(lenlist)
113 |     captchacsv.close()
114 |     lencsv.close()
115 | 
116 | 
117 | if __name__ == "__main__":
118 |     generate(50000, "./data/56_imitate_train_set/",  ENGP=100, FIVEP=50, ENGNOLIMIT=True, filename="train")
119 |     generate(10240, "./data/56_imitate_vali_set/",  ENGP=100, FIVEP=50, ENGNOLIMIT=True, filename="vali")
120 |     generate(50000, "./data/5_imitate_train_set/",  ENGP=100, FIVEP=100, ENGNOLIMIT=True, filename="train")
121 |     generate(10240, "./data/5_imitate_vali_set/",  ENGP=100, FIVEP=100, ENGNOLIMIT=True, filename="vali")
122 |     generate(50000, "./data/6_imitate_train_set/",  ENGP=100, FIVEP=0, ENGNOLIMIT=True, filename="train")
123 |     generate(10240, "./data/6_imitate_vali_set/",  ENGP=100, FIVEP=0, ENGNOLIMIT=True, filename="vali")
124 | 


--------------------------------------------------------------------------------
/captcha_scrawl.py:
--------------------------------------------------------------------------------
 1 | import shutil
 2 | import requests
 3 | import time
 4 | SAVEPATH = "./data/manual_label/"
 5 | url = 'http://railway1.hinet.net/ImageOut.jsp'
 6 | for i in range(1, 3000):
 7 |     response = requests.get(url, stream=True)
 8 |     with open(SAVEPATH + str(i) + '.jpg', 'wb') as out_file:
 9 |         shutil.copyfileobj(response.raw, out_file)
10 |     del response
11 |     time.sleep(0.5)
12 | 


--------------------------------------------------------------------------------
/data_augment.py:
--------------------------------------------------------------------------------
 1 | from keras.preprocessing.image import ImageDataGenerator
 2 | from PIL import Image
 3 | import numpy as np
 4 | import csv
 5 | 
 6 | outputcsv = open('./data/6_real_train_set/captcha_train.csv', 'w', encoding = 'utf8', newline = '') # 輸出csv
 7 | inputcsv = open('./data/manual_label/captcha_vali.csv', 'r', encoding = 'utf8')
 8 | data = [np.array(Image.open('./data/manual_label/' + row[0] + ".jpg")) for row in csv.reader(inputcsv) if len(row[1]) == 6] # 只讀答案是6位的
 9 | inputcsv = open('./data/manual_label/captcha_vali.csv', 'r', encoding = 'utf8')
10 | oldanswer = [row[1] for row in csv.reader(inputcsv) if len(row[1]) == 6] # 只讀答案是6位的
11 | answer = []
12 | datagen = ImageDataGenerator(rotation_range=5,shear_range=0.2,zoom_range=0.05,fill_mode='nearest')
13 | index, augmentindex, oldanswerindex = 0, 0, 0
14 | for img in data:
15 |     for batch in datagen.flow(np.asarray([img]), batch_size=1):
16 |         index += 1
17 |         augmentindex += 1
18 |         batch = batch.reshape((60,200,3))
19 |         Image.fromarray(np.uint8(batch)).convert("RGB").save("./data/6_real_train_set/" + str(index) + ".jpg", "JPEG")
20 |         answer.append((str(index), oldanswer[oldanswerindex]))
21 |         if augmentindex >= 50: # 每張產生50個
22 |             oldanswerindex += 1
23 |             augmentindex = 0
24 |             break
25 | csv.writer(outputcsv).writerows(answer)
26 | 


--------------------------------------------------------------------------------
/demo_cnn.py:
--------------------------------------------------------------------------------
 1 | from keras.models import load_model
 2 | from keras.models import Model
 3 | from keras import backend as K
 4 | from PIL import Image
 5 | import numpy as np
 6 | import os
 7 | import csv
 8 | LETTERSTR = "0123456789ABCDEFGHJKLMNPQRSTUVWXYZ"
 9 | 
10 | 
11 | def toonehot(text):
12 |     labellist = []
13 |     for letter in text:
14 |         onehot = [0 for _ in range(34)]
15 |         num = LETTERSTR.find(letter)
16 |         onehot[num] = 1
17 |         labellist.append(onehot)
18 |     return labellist
19 | 
20 | 
21 | print("Loading test data...")
22 | testcsv = open('./data/manual_label/captcha_test.csv', 'r', encoding = 'utf8')
23 | test_data = np.stack([np.array(Image.open("./data/manual_label/" + row[0] + ".jpg"))/255.0 for row in csv.reader(testcsv)])
24 | testcsv = open('./data/manual_label/captcha_test.csv', 'r', encoding = 'utf8')
25 | test_label = [row[1] for row in csv.reader(testcsv)]
26 | print("Loading model...")
27 | K.clear_session()
28 | model = None
29 | model5 = load_model("./data/model/imitate_5_model.h5")
30 | model6 = load_model("./data/model/imitate_6_model.h5")
31 | model56 = load_model("./data/model/real_56_model.h5")
32 | print("Predicting...")
33 | prediction56 = [6 if arr[0] > 0.5 else 5 for arr in model56.predict(test_data)] # 5/6碼分類
34 | prediction5 = model5.predict(test_data) # 5碼
35 | prediction6 = model6.predict(test_data) # 6碼
36 | 
37 | # 以下計算各個模型各個字元辨識率等等，有點亂，以後有空再整理
38 | total, total5, total6 = len(prediction56), 0, 0
39 | correct5, correct6, correct56, correct = 0, 0, 0, 0
40 | correct5digit, correct6digit = [0 for _ in range(5)], [0 for _ in range(6)]
41 | totalalpha, correctalpha = len([1 for ans in test_label for char in ans if char.isalpha()]), 0
42 | for i in range(total):
43 |     checkcorrect = True
44 |     if prediction56[i] == len(test_label[i]):
45 |         correct56 += 1
46 |     else:
47 |         checkcorrect = False
48 |     if prediction56[i] == 5:
49 |         total5 += 1
50 |         allequal = True
51 |         for char in range(5):
52 |             if LETTERSTR[np.argmax(prediction5[char][i])] == test_label[i][char]:
53 |                 correct5digit[char] += 1
54 |                 correctalpha += 1 if LETTERSTR[np.argmax(prediction5[char][i])].isalpha() else 0
55 |             else:
56 |                 allequal = False
57 |         if allequal:
58 |             correct5 += 1
59 |         else:
60 |             checkcorrect = False
61 |     else:
62 |         total6 += 1
63 |         allequal = True
64 |         for char in range(6):
65 |             if LETTERSTR[np.argmax(prediction6[char][i])] == test_label[i][char]:
66 |                 correct6digit[char] += 1
67 |                 correctalpha += 1 if LETTERSTR[np.argmax(prediction6[char][i])].isalpha() else 0
68 |             else:
69 |                 allequal = False
70 |         if allequal:
71 |             correct6 += 1
72 |         else:
73 |             checkcorrect = False
74 |     if checkcorrect:
75 |         correct += 1
76 | 
77 | print("5 or 6 model acc:{:.4f}%".format(correct56/total*100)) # 5/6模型acc
78 | print("---------------------------")
79 | print("5digits model acc:{:.4f}%".format(correct5/total5*100)) # 5模型acc
80 | for i in range(5):
81 |     print("digit{:d} acc:{:.4f}%".format(i+1, correct5digit[i]/total5*100)) # 5模型各字元acc
82 | print("---------------------------")
83 | print("6digits model acc:{:.4f}%".format(correct6/total6*100)) # 6模型acc
84 | for i in range(6):
85 |     print("digit{:d} acc:{:.4f}%".format(i+1, correct6digit[i]/total6*100)) # 6模型各字元acc
86 | print("---------------------------")
87 | print("alpha acc:{:.4f}%".format(correctalpha/totalalpha*100)) # 整體英文字acc
88 | 


--------------------------------------------------------------------------------
/demo_online.py:
--------------------------------------------------------------------------------
 1 | from selenium import webdriver
 2 | from selenium.common.exceptions import TimeoutException
 3 | from selenium.webdriver.common.by import By
 4 | from selenium.webdriver.support.ui import WebDriverWait
 5 | from selenium.webdriver.support import expected_conditions as EC
 6 | import numpy as np
 7 | from PIL import Image
 8 | from keras.models import load_model, Model
 9 | import time
10 | import random
11 | IDNumber = "X123456789" # 填入你的身分證字號
12 | model = None
13 | model5 = load_model("./data/model/imitate_5_model.h5") # 辨識5碼的Model
14 | model6 = load_model("./data/model/imitate_6_model.h5") # 辨識6碼的Model
15 | model56 = load_model("./data/model/real_56_model.h5") # 辨識是5碼or6碼的Model
16 | LETTERSTR = "0123456789ABCDEFGHJKLMNPQRSTUVWXYZ"
17 | driver = webdriver.Chrome("./data/chromedriver.exe") # chromedriver 路徑
18 | correct, wrong = 0, 0
19 | 
20 | for _ in range(1000):# 跑1000次
21 |     driver.get('http://railway1.hinet.net/Foreign/TW/ecsearch.html')
22 |     id_textbox = driver.find_element_by_id('person_id')
23 |     id_textbox.send_keys(IDNumber)
24 |     button = driver.find_element_by_css_selector('body > div.container > div.row.contents > div > form > div > div.col-xs-12 > button')
25 |     button.click()
26 |     driver.save_screenshot('tmp.png')
27 |     location = driver.find_element_by_id('idRandomPic').location
28 |     x, y = location['x'] + 5, location['y'] + 5
29 |     img = Image.open('tmp.png')
30 |     captcha = img.crop((x, y, x+200, y+60))
31 |     captcha.convert("RGB").save('captcha.jpg', 'JPEG')
32 |     # check is 5 or 6 digits
33 |     p56 = model56.predict(np.stack([np.array(Image.open('captcha.jpg'))/255.0]))[0][0]
34 |     if p56 > 0.5:
35 |         model = model6
36 |     else:
37 |         model = model5
38 |     prediction = model.predict(np.stack([np.array(Image.open('captcha.jpg'))/255.0]))
39 |     answer = ""
40 |     for predict in prediction:
41 |         answer += LETTERSTR[np.argmax(predict[0])]
42 |     captcha_textbox = driver.find_element_by_id('randInput')
43 |     captcha_textbox.send_keys(answer)
44 |     driver.find_element_by_id('sbutton').click()
45 |     if "亂數號碼錯誤" in driver.page_source:
46 |         wrong += 1
47 |     else:
48 |         correct += 1
49 |     print("{:.4f}% (Correct{:d}-Wrong{:d})".format(correct/(correct+wrong)*100, correct, wrong))
50 |     time.sleep(3)
51 | 


--------------------------------------------------------------------------------
/readme_img/captcha_sample1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JasonLiTW/simple-railway-captcha-solver/c62f0836d6a321cf9443ce94cb817892cf6ac7d0/readme_img/captcha_sample1.jpg


--------------------------------------------------------------------------------
/readme_img/captcha_sample2.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JasonLiTW/simple-railway-captcha-solver/c62f0836d6a321cf9443ce94cb817892cf6ac7d0/readme_img/captcha_sample2.jpg


--------------------------------------------------------------------------------
/readme_img/captcha_sample3.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JasonLiTW/simple-railway-captcha-solver/c62f0836d6a321cf9443ce94cb817892cf6ac7d0/readme_img/captcha_sample3.jpg


--------------------------------------------------------------------------------
/readme_img/captcha_sample4.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JasonLiTW/simple-railway-captcha-solver/c62f0836d6a321cf9443ce94cb817892cf6ac7d0/readme_img/captcha_sample4.jpg


--------------------------------------------------------------------------------
/readme_img/captcha_seperate1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JasonLiTW/simple-railway-captcha-solver/c62f0836d6a321cf9443ce94cb817892cf6ac7d0/readme_img/captcha_seperate1.png


--------------------------------------------------------------------------------
/readme_img/captcha_seperate2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JasonLiTW/simple-railway-captcha-solver/c62f0836d6a321cf9443ce94cb817892cf6ac7d0/readme_img/captcha_seperate2.png


--------------------------------------------------------------------------------
/readme_img/captcha_seperate3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JasonLiTW/simple-railway-captcha-solver/c62f0836d6a321cf9443ce94cb817892cf6ac7d0/readme_img/captcha_seperate3.png


--------------------------------------------------------------------------------
/readme_img/csv.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JasonLiTW/simple-railway-captcha-solver/c62f0836d6a321cf9443ce94cb817892cf6ac7d0/readme_img/csv.png


--------------------------------------------------------------------------------
/readme_img/dataaugmentation.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JasonLiTW/simple-railway-captcha-solver/c62f0836d6a321cf9443ce94cb817892cf6ac7d0/readme_img/dataaugmentation.png


--------------------------------------------------------------------------------
/readme_img/generate.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JasonLiTW/simple-railway-captcha-solver/c62f0836d6a321cf9443ce94cb817892cf6ac7d0/readme_img/generate.png


--------------------------------------------------------------------------------
/readme_img/head.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JasonLiTW/simple-railway-captcha-solver/c62f0836d6a321cf9443ce94cb817892cf6ac7d0/readme_img/head.gif


--------------------------------------------------------------------------------
/readme_img/imitate6.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JasonLiTW/simple-railway-captcha-solver/c62f0836d6a321cf9443ce94cb817892cf6ac7d0/readme_img/imitate6.png


--------------------------------------------------------------------------------
/readme_img/imitate6_tensorboard.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JasonLiTW/simple-railway-captcha-solver/c62f0836d6a321cf9443ce94cb817892cf6ac7d0/readme_img/imitate6_tensorboard.png


--------------------------------------------------------------------------------
/readme_img/imitate_result.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JasonLiTW/simple-railway-captcha-solver/c62f0836d6a321cf9443ce94cb817892cf6ac7d0/readme_img/imitate_result.png


--------------------------------------------------------------------------------
/readme_img/old/1.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JasonLiTW/simple-railway-captcha-solver/c62f0836d6a321cf9443ce94cb817892cf6ac7d0/readme_img/old/1.jpeg


--------------------------------------------------------------------------------
/readme_img/old/10.PNG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JasonLiTW/simple-railway-captcha-solver/c62f0836d6a321cf9443ce94cb817892cf6ac7d0/readme_img/old/10.PNG


--------------------------------------------------------------------------------
/readme_img/old/11.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JasonLiTW/simple-railway-captcha-solver/c62f0836d6a321cf9443ce94cb817892cf6ac7d0/readme_img/old/11.png


--------------------------------------------------------------------------------
/readme_img/old/12.PNG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JasonLiTW/simple-railway-captcha-solver/c62f0836d6a321cf9443ce94cb817892cf6ac7d0/readme_img/old/12.PNG


--------------------------------------------------------------------------------
/readme_img/old/2.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JasonLiTW/simple-railway-captcha-solver/c62f0836d6a321cf9443ce94cb817892cf6ac7d0/readme_img/old/2.jpeg


--------------------------------------------------------------------------------
/readme_img/old/3.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JasonLiTW/simple-railway-captcha-solver/c62f0836d6a321cf9443ce94cb817892cf6ac7d0/readme_img/old/3.jpeg


--------------------------------------------------------------------------------
/readme_img/old/4.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JasonLiTW/simple-railway-captcha-solver/c62f0836d6a321cf9443ce94cb817892cf6ac7d0/readme_img/old/4.jpeg


--------------------------------------------------------------------------------
/readme_img/old/5.PNG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JasonLiTW/simple-railway-captcha-solver/c62f0836d6a321cf9443ce94cb817892cf6ac7d0/readme_img/old/5.PNG


--------------------------------------------------------------------------------
/readme_img/old/6.PNG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JasonLiTW/simple-railway-captcha-solver/c62f0836d6a321cf9443ce94cb817892cf6ac7d0/readme_img/old/6.PNG


--------------------------------------------------------------------------------
/readme_img/old/7.PNG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JasonLiTW/simple-railway-captcha-solver/c62f0836d6a321cf9443ce94cb817892cf6ac7d0/readme_img/old/7.PNG


--------------------------------------------------------------------------------
/readme_img/old/8.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JasonLiTW/simple-railway-captcha-solver/c62f0836d6a321cf9443ce94cb817892cf6ac7d0/readme_img/old/8.jpg


--------------------------------------------------------------------------------
/readme_img/old/9.PNG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JasonLiTW/simple-railway-captcha-solver/c62f0836d6a321cf9443ce94cb817892cf6ac7d0/readme_img/old/9.PNG


--------------------------------------------------------------------------------
/train_cnn_imitate_5.py:
--------------------------------------------------------------------------------
 1 | from keras.models import Model
 2 | from keras.layers import Input, Dense, Dropout, Flatten, Conv2D, MaxPooling2D, BatchNormalization
 3 | from keras.callbacks import ModelCheckpoint, EarlyStopping, TensorBoard
 4 | from PIL import Image
 5 | import numpy as np
 6 | import csv
 7 | 
 8 | LETTERSTR = "0123456789ABCDEFGHJKLMNPQRSTUVWXYZ"
 9 | 
10 | 
11 | def toonehot(text):
12 |     labellist = []
13 |     for letter in text:
14 |         onehot = [0 for _ in range(34)]
15 |         num = LETTERSTR.find(letter)
16 |         onehot[num] = 1
17 |         labellist.append(onehot)
18 |     return labellist
19 | 
20 | 
21 | # Create CNN Model
22 | print("Creating CNN model...")
23 | in = Input((60, 200, 3))
24 | out = in
25 | out = Conv2D(filters=32, kernel_size=(3, 3), padding='same', activation='relu')(out)
26 | out = Conv2D(filters=32, kernel_size=(3, 3), activation='relu')(out)
27 | out = BatchNormalization()(out)
28 | out = MaxPooling2D(pool_size=(2, 2))(out)
29 | out = Dropout(0.3)(out)
30 | out = Conv2D(filters=64, kernel_size=(3, 3), padding='same', activation='relu')(out)
31 | out = Conv2D(filters=64, kernel_size=(3, 3), activation='relu')(out)
32 | out = BatchNormalization()(out)
33 | out = MaxPooling2D(pool_size=(2, 2))(out)
34 | out = Dropout(0.3)(out)
35 | out = Conv2D(filters=128, kernel_size=(3, 3), padding='same', activation='relu')(out)
36 | out = Conv2D(filters=128, kernel_size=(3, 3), activation='relu')(out)
37 | out = BatchNormalization()(out)
38 | out = MaxPooling2D(pool_size=(2, 2))(out)
39 | out = Dropout(0.3)(out)
40 | out = Conv2D(filters=256, kernel_size=(3, 3), activation='relu')(out)
41 | out = BatchNormalization()(out)
42 | out = MaxPooling2D(pool_size=(2, 2))(out)
43 | out = Flatten()(out)
44 | out = Dropout(0.3)(out)
45 | out = [Dense(34, name='digit1', activation='softmax')(out),\
46 |     Dense(34, name='digit2', activation='softmax')(out),\
47 |     Dense(34, name='digit3', activation='softmax')(out),\
48 |     Dense(34, name='digit4', activation='softmax')(out),\
49 |     Dense(34, name='digit5', activation='softmax')(out)]
50 | model = Model(inputs=in, outputs=out)
51 | model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
52 | model.summary()
53 | 
54 | print("Reading training data...")
55 | traincsv = open('./data/5_imitate_train_set/captcha_train.csv', 'r', encoding = 'utf8')
56 | train_data = np.stack([np.array(Image.open("./data/5_imitate_train_set/" + row[0] + ".jpg"))/255.0 for row in csv.reader(traincsv)])
57 | traincsv = open('./data/5_imitate_train_set/captcha_train.csv', 'r', encoding = 'utf8')
58 | read_label = [toonehot(row[1]) for row in csv.reader(traincsv)]
59 | train_label = [[] for _ in range(5)]
60 | for arr in read_label:
61 |     for index in range(5):
62 |         train_label[index].append(arr[index])
63 | train_label = [arr for arr in np.asarray(train_label)]
64 | print("Shape of train data:", train_data.shape)
65 | 
66 | print("Reading validation data...")
67 | valicsv = open('./data/5_imitate_vali_set/captcha_vali.csv', 'r', encoding = 'utf8')
68 | vali_data = np.stack([np.array(Image.open("./data/5_imitate_vali_set/" + row[0] + ".jpg"))/255.0 for row in csv.reader(valicsv)])
69 | valicsv = open('./data/5_imitate_vali_set/captcha_vali.csv', 'r', encoding = 'utf8')
70 | read_label = [toonehot(row[1]) for row in csv.reader(valicsv)]
71 | vali_label = [[] for _ in range(5)]
72 | for arr in read_label:
73 |     for index in range(5):
74 |         vali_label[index].append(arr[index])
75 | vali_label = [arr for arr in np.asarray(vali_label)]
76 | print("Shape of validation data:", vali_data.shape)
77 | 
78 | filepath="./data/model/imitate_5_model.h5"
79 | checkpoint = ModelCheckpoint(filepath, monitor='val_digit5_acc', verbose=1, save_best_only=True, mode='max')
80 | earlystop = EarlyStopping(monitor='val_digit5_acc', patience=5, verbose=1, mode='auto')
81 | tensorBoard = TensorBoard(log_dir = "./logs", histogram_freq = 1)
82 | callbacks_list = [checkpoint, earlystop, tensorBoard]
83 | model.fit(train_data, train_label, batch_size=400, epochs=100, verbose=2, validation_data=(vali_data, vali_label), callbacks=callbacks_list)
84 | 


--------------------------------------------------------------------------------
/train_cnn_imitate_56.py:
--------------------------------------------------------------------------------
 1 | from keras.models import Model
 2 | from keras.layers import Input, Dense, Dropout, Flatten, Conv2D, MaxPooling2D, BatchNormalization
 3 | from keras.callbacks import ModelCheckpoint, EarlyStopping, TensorBoard
 4 | from PIL import Image
 5 | import numpy as np
 6 | import csv
 7 | 
 8 | 
 9 | # Create CNN Model
10 | print("Creating CNN model...")
11 | in = Input((60, 200, 3))
12 | out = in
13 | out = Conv2D(filters=32, kernel_size=(3, 3), padding='same', activation='relu')(out)
14 | out = Conv2D(filters=32, kernel_size=(3, 3), activation='relu')(out)
15 | out = BatchNormalization()(out)
16 | out = MaxPooling2D(pool_size=(2, 2))(out)
17 | out = Dropout(0.5)(out)
18 | out = Conv2D(filters=64, kernel_size=(3, 3), padding='same', activation='relu')(out)
19 | out = Conv2D(filters=64, kernel_size=(3, 3), activation='relu')(out)
20 | out = BatchNormalization()(out)
21 | out = MaxPooling2D(pool_size=(2, 2))(out)
22 | out = Dropout(0.5)(out)
23 | out = Conv2D(filters=128, kernel_size=(3, 3), padding='same', activation='relu')(out)
24 | out = Conv2D(filters=128, kernel_size=(3, 3), activation='relu')(out)
25 | out = BatchNormalization()(out)
26 | out = MaxPooling2D(pool_size=(2, 2))(out)
27 | out = Dropout(0.5)(out)
28 | out = Conv2D(filters=256, kernel_size=(3, 3), activation='relu')(out)
29 | out = BatchNormalization()(out)
30 | out = MaxPooling2D(pool_size=(2, 2))(out)
31 | out = Flatten()(out)
32 | out = Dropout(0.5)(out)
33 | out = Dense(1, name='6digit', activation='sigmoid')(out)
34 | model = Model(inputs=in, outputs=out)
35 | model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
36 | model.summary()
37 | 
38 | print("Reading training data...")
39 | traincsv = open('./data/56_imitate_train_set/len_train.csv', 'r', encoding = 'utf8')
40 | train_data = np.stack([np.array(Image.open("./data/56_imitate_train_set/" + row[0] + ".jpg"))/255.0 for row in csv.reader(traincsv)])
41 | traincsv = open('./data/56_imitate_train_set/len_train.csv', 'r', encoding = 'utf8')
42 | train_label = np.asarray([1 if row[1] == '6' else 0 for row in csv.reader(traincsv)])
43 | print("Shape of train data:", train_data.shape)
44 | 
45 | print("Reading validation data...")
46 | valicsv = open('./data/56_imitate_vali_set/len_vali.csv', 'r', encoding = 'utf8')
47 | vali_data = np.stack([np.array(Image.open('./data/56_imitate_vali_set/' + row[0] + ".jpg"))/255.0 for row in csv.reader(valicsv)])
48 | valicsv = open('./data/56_imitate_vali_set/len_vali.csv', 'r', encoding = 'utf8')
49 | vali_label = np.asarray([1 if row[1] == '6' else 0 for row in csv.reader(valicsv)])
50 | print("Shape of validation data:", vali_data.shape)
51 | 
52 | filepath="./data/model/imitate_56_model.h5"
53 | checkpoint = ModelCheckpoint(filepath, monitor='val_acc', verbose=1, save_best_only=True, mode='max')
54 | earlystop = EarlyStopping(monitor='val_acc', patience=10, verbose=1, mode='auto')
55 | tensorBoard = TensorBoard(log_dir = "./logs", histogram_freq = 1)
56 | callbacks_list = [checkpoint, earlystop, tensorBoard]
57 | model.fit(train_data, train_label, batch_size=400, epochs=100, verbose=1, validation_data=(vali_data, vali_label), callbacks=callbacks_list)
58 | 


--------------------------------------------------------------------------------
/train_cnn_imitate_6.py:
--------------------------------------------------------------------------------
 1 | from keras.models import Model
 2 | from keras.layers import Input, Dense, Dropout, Flatten, Conv2D, MaxPooling2D, BatchNormalization
 3 | from keras.callbacks import ModelCheckpoint, EarlyStopping, TensorBoard
 4 | from PIL import Image
 5 | import numpy as np
 6 | import csv
 7 | 
 8 | LETTERSTR = "0123456789ABCDEFGHJKLMNPQRSTUVWXYZ"
 9 | 
10 | 
11 | def toonehot(text):
12 |     labellist = []
13 |     for letter in text:
14 |         onehot = [0 for _ in range(34)]
15 |         num = LETTERSTR.find(letter)
16 |         onehot[num] = 1
17 |         labellist.append(onehot)
18 |     return labellist
19 | 
20 | 
21 | # Create CNN Model
22 | print("Creating CNN model...")
23 | in = Input((60, 200, 3))
24 | out = in
25 | out = Conv2D(filters=32, kernel_size=(3, 3), padding='same', activation='relu')(out)
26 | out = Conv2D(filters=32, kernel_size=(3, 3), activation='relu')(out)
27 | out = BatchNormalization()(out)
28 | out = MaxPooling2D(pool_size=(2, 2))(out)
29 | out = Dropout(0.3)(out)
30 | out = Conv2D(filters=64, kernel_size=(3, 3), padding='same', activation='relu')(out)
31 | out = Conv2D(filters=64, kernel_size=(3, 3), activation='relu')(out)
32 | out = BatchNormalization()(out)
33 | out = MaxPooling2D(pool_size=(2, 2))(out)
34 | out = Dropout(0.3)(out)
35 | out = Conv2D(filters=128, kernel_size=(3, 3), padding='same', activation='relu')(out)
36 | out = Conv2D(filters=128, kernel_size=(3, 3), activation='relu')(out)
37 | out = BatchNormalization()(out)
38 | out = MaxPooling2D(pool_size=(2, 2))(out)
39 | out = Dropout(0.3)(out)
40 | out = Conv2D(filters=256, kernel_size=(3, 3), activation='relu')(out)
41 | out = BatchNormalization()(out)
42 | out = MaxPooling2D(pool_size=(2, 2))(out)
43 | out = Flatten()(out)
44 | out = Dropout(0.3)(out)
45 | out = [Dense(34, name='digit1', activation='softmax')(out),\
46 |     Dense(34, name='digit2', activation='softmax')(out),\
47 |     Dense(34, name='digit3', activation='softmax')(out),\
48 |     Dense(34, name='digit4', activation='softmax')(out),\
49 |     Dense(34, name='digit5', activation='softmax')(out),\
50 |     Dense(34, name='digit6', activation='softmax')(out)]
51 | model = Model(inputs=in, outputs=out)
52 | model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
53 | model.summary()
54 | 
55 | print("Reading training data...")
56 | traincsv = open('./data/6_imitate_train_set/captcha_train.csv', 'r', encoding = 'utf8')
57 | train_data = np.stack([np.array(Image.open("./data/6_imitate_train_set/" + row[0] + ".jpg"))/255.0 for row in csv.reader(traincsv)])
58 | traincsv = open('./data/6_imitate_train_set/captcha_train.csv', 'r', encoding = 'utf8')
59 | read_label = [toonehot(row[1]) for row in csv.reader(traincsv)]
60 | train_label = [[] for _ in range(6)]
61 | for arr in read_label:
62 |     for index in range(6):
63 |         train_label[index].append(arr[index])
64 | train_label = [arr for arr in np.asarray(train_label)]
65 | print("Shape of train data:", train_data.shape)
66 | 
67 | print("Reading validation data...")
68 | valicsv = open('./data/6_imitate_vali_set/captcha_vali.csv', 'r', encoding = 'utf8')
69 | vali_data = np.stack([np.array(Image.open("./data/6_imitate_vali_set/" + row[0] + ".jpg"))/255.0 for row in csv.reader(valicsv)])
70 | valicsv = open('./data/6_imitate_vali_set/captcha_vali.csv', 'r', encoding = 'utf8')
71 | read_label = [toonehot(row[1]) for row in csv.reader(valicsv)]
72 | vali_label = [[] for _ in range(6)]
73 | for arr in read_label:
74 |     for index in range(6):
75 |         vali_label[index].append(arr[index])
76 | vali_label = [arr for arr in np.asarray(vali_label)]
77 | print("Shape of validation data:", vali_data.shape)
78 | 
79 | filepath="./data/model/imitate_6_model.h5"
80 | checkpoint = ModelCheckpoint(filepath, monitor='val_digit6_acc', verbose=1, save_best_only=True, mode='max')
81 | earlystop = EarlyStopping(monitor='val_digit6_acc', patience=5, verbose=1, mode='auto')
82 | tensorBoard = TensorBoard(log_dir = "./logs", histogram_freq = 1)
83 | callbacks_list = [checkpoint, earlystop, tensorBoard]
84 | model.fit(train_data, train_label, batch_size=400, epochs=100, verbose=2, validation_data=(vali_data, vali_label), callbacks=callbacks_list)
85 | 


--------------------------------------------------------------------------------
/train_cnn_real_5.py:
--------------------------------------------------------------------------------
 1 | from keras.models import Model
 2 | from keras.layers import Input, Dense, Dropout, Flatten, Conv2D, MaxPooling2D, BatchNormalization
 3 | from keras.callbacks import ModelCheckpoint, EarlyStopping, TensorBoard
 4 | from PIL import Image
 5 | import numpy as np
 6 | import csv
 7 | import os
 8 | 
 9 | LETTERSTR = "0123456789ABCDEFGHJKLMNPQRSTUVWXYZ"
10 | 
11 | 
12 | def toonehot(text):
13 |     labellist = []
14 |     for letter in text:
15 |         onehot = [0 for _ in range(34)]
16 |         num = LETTERSTR.find(letter)
17 |         onehot[num] = 1
18 |         labellist.append(onehot)
19 |     return labellist
20 | 
21 | 
22 | # Create CNN Model
23 | print("Creating CNN model...")
24 | in = Input((60, 200, 3))
25 | out = in
26 | out = Conv2D(filters=32, kernel_size=(3, 3), padding='same', activation='relu')(out)
27 | out = Conv2D(filters=32, kernel_size=(3, 3), activation='relu')(out)
28 | out = BatchNormalization()(out)
29 | out = MaxPooling2D(pool_size=(2, 2))(out)
30 | out = Dropout(0.5)(out)
31 | out = Conv2D(filters=64, kernel_size=(3, 3), padding='same', activation='relu')(out)
32 | out = Conv2D(filters=64, kernel_size=(3, 3), activation='relu')(out)
33 | out = BatchNormalization()(out)
34 | out = MaxPooling2D(pool_size=(2, 2))(out)
35 | out = Dropout(0.5)(out)
36 | out = Conv2D(filters=128, kernel_size=(3, 3), padding='same', activation='relu')(out)
37 | out = Conv2D(filters=128, kernel_size=(3, 3), activation='relu')(out)
38 | out = BatchNormalization()(out)
39 | out = MaxPooling2D(pool_size=(2, 2))(out)
40 | out = Dropout(0.5)(out)
41 | out = Conv2D(filters=256, kernel_size=(3, 3), activation='relu')(out)
42 | out = BatchNormalization()(out)
43 | out = MaxPooling2D(pool_size=(2, 2))(out)
44 | out = Flatten()(out)
45 | out = Dropout(0.5)(out)
46 | out = [Dense(34, name='digit1', activation='softmax')(out),\
47 |     Dense(34, name='digit2', activation='softmax')(out),\
48 |     Dense(34, name='digit3', activation='softmax')(out),\
49 |     Dense(34, name='digit4', activation='softmax')(out),\
50 |     Dense(34, name='digit5', activation='softmax')(out)]
51 | model = Model(inputs=in, outputs=out)
52 | model.compile(loss='categorical_crossentropy', optimizer='Adam', metrics=['accuracy'])
53 | model.summary()
54 | 
55 | print("Reading training data...")
56 | traincsv = open('./data/5_real_train_set/captcha_train.csv', 'r', encoding = 'utf8')
57 | train_data = np.stack([np.array(Image.open('./data/5_real_train_set/' + str(i) + ".jpg"))/255.0 for i in range(1, 60001)])
58 | traincsv = open('./data/5_real_train_set/captcha_train.csv', 'r', encoding = 'utf8')
59 | read_label = [toonehot(row[1]) for row in csv.reader(traincsv)][:60000]
60 | train_label = [[] for _ in range(5)]
61 | for arr in read_label:
62 |     for index in range(5):
63 |         train_label[index].append(arr[index])
64 | train_label = [arr for arr in np.asarray(train_label)]
65 | print("Shape of train data:", train_data.shape)
66 | 
67 | print("Reading validation data...")
68 | valicsv = open('./data/5_real_train_set/captcha_train.csv', 'r', encoding = 'utf8')
69 | vali_data = np.stack([np.array(Image.open('./data/5_real_train_set/' + str(i) + ".jpg"))/255.0 for i in range(60001, 75001)])
70 | valicsv = open('./data/5_real_train_set/captcha_train.csv', 'r', encoding = 'utf8')
71 | read_label = [toonehot(row[1]) for row in csv.reader(valicsv)][60000:]
72 | vali_label = [[] for _ in range(5)]
73 | for arr in read_label:
74 |     for index in range(5):
75 |         vali_label[index].append(arr[index])
76 | vali_label = [arr for arr in np.asarray(vali_label)]
77 | print("Shape of validation data:", vali_data.shape)
78 | 
79 | filepath="./data/model/real_5_model.h5"
80 | checkpoint = ModelCheckpoint(filepath, monitor='val_digit5_acc', verbose=1, save_best_only=True, mode='max')
81 | earlystop = EarlyStopping(monitor='val_digit5_acc', patience=5, verbose=1, mode='auto')
82 | tensorBoard = TensorBoard(log_dir = "./logs", histogram_freq = 1)
83 | callbacks_list = [checkpoint, earlystop, tensorBoard]
84 | model.fit(train_data, train_label, batch_size=400, epochs=100, verbose=2, validation_data=(vali_data, vali_label), callbacks=callbacks_list)
85 | 


--------------------------------------------------------------------------------
/train_cnn_real_56.py:
--------------------------------------------------------------------------------
 1 | from keras.models import Model
 2 | from keras.layers import Input, Dense, Dropout, Flatten, Conv2D, MaxPooling2D, BatchNormalization
 3 | from keras.callbacks import ModelCheckpoint, EarlyStopping, TensorBoard
 4 | from PIL import Image
 5 | import numpy as np
 6 | import csv
 7 | 
 8 | 
 9 | # Create CNN Model
10 | print("Creating CNN model...")
11 | in = Input((60, 200, 3))
12 | out = in
13 | out = Conv2D(filters=32, kernel_size=(3, 3), padding='same', activation='relu')(out)
14 | out = Conv2D(filters=32, kernel_size=(3, 3), activation='relu')(out)
15 | out = BatchNormalization()(out)
16 | out = MaxPooling2D(pool_size=(2, 2))(out)
17 | out = Dropout(0.5)(out)
18 | out = Conv2D(filters=64, kernel_size=(3, 3), padding='same', activation='relu')(out)
19 | out = Conv2D(filters=64, kernel_size=(3, 3), activation='relu')(out)
20 | out = BatchNormalization()(out)
21 | out = MaxPooling2D(pool_size=(2, 2))(out)
22 | out = Dropout(0.5)(out)
23 | out = Conv2D(filters=128, kernel_size=(3, 3), padding='same', activation='relu')(out)
24 | out = Conv2D(filters=128, kernel_size=(3, 3), activation='relu')(out)
25 | out = BatchNormalization()(out)
26 | out = MaxPooling2D(pool_size=(2, 2))(out)
27 | out = Dropout(0.5)(out)
28 | out = Conv2D(filters=256, kernel_size=(3, 3), activation='relu')(out)
29 | out = BatchNormalization()(out)
30 | out = MaxPooling2D(pool_size=(2, 2))(out)
31 | out = Flatten()(out)
32 | out = Dropout(0.5)(out)
33 | out = Dense(1, name='6digit', activation='sigmoid')(out)
34 | model = Model(inputs=in, outputs=out)
35 | model.compile(loss='binary_crossentropy', optimizer='Adam', metrics=['accuracy'])
36 | model.summary()
37 | 
38 | print("Reading training data...")
39 | train_label = np.asarray([0 for _ in range(40000)])
40 | train_data = [np.array(Image.open("./data/5_real_train_set/" + str(i) + ".jpg"))/255.0 for i in np.random.choice(range(1,60001), size=20000, replace=False)]
41 | train_data = np.concatenate((train_data, [np.array(Image.open("./data/6_real_train_set/" + str(i) + ".jpg"))/255.0 for i in np.random.choice(range(1,60001), size=20000, replace=False)]))
42 | train_data = np.stack(train_data)
43 | train_label[:20000] = 0
44 | train_label[20000:] = 1
45 | print("Shape of train data:", train_data.shape)
46 | 
47 | print("Reading validation data...")
48 | vali_label = np.asarray([0 for _ in range(10000)])
49 | vali_data = [np.array(Image.open("./data/5_real_train_set/" + str(i) + ".jpg"))/255.0 for i in np.random.choice(range(60001,75001), size=5000, replace=False)]
50 | vali_data = np.concatenate((vali_data, [np.array(Image.open("./data/6_real_train_set/" + str(i) + ".jpg"))/255.0 for i in np.random.choice(range(60001,75001), size=5000, replace=False)]))
51 | vali_data = np.stack(vali_data)
52 | vali_label[:5000] = 0
53 | vali_label[5000:] = 1
54 | print("Shape of validation data:", vali_data.shape)
55 | 
56 | filepath="./data/model/real_56_model.h5"
57 | checkpoint = ModelCheckpoint(filepath, monitor='val_acc', verbose=1, save_best_only=True, mode='max')
58 | earlystop = EarlyStopping(monitor='val_loss', patience=5, verbose=1, mode='auto')
59 | tensorBoard = TensorBoard(log_dir = "./logs", histogram_freq=1)
60 | callbacks_list = [checkpoint, earlystop, tensorBoard]
61 | model.fit(train_data, train_label, batch_size=400, epochs=100, verbose=2, validation_data=(vali_data, vali_label), callbacks=callbacks_list)
62 | 


--------------------------------------------------------------------------------
/train_cnn_real_6.py:
--------------------------------------------------------------------------------
 1 | from keras.models import Model
 2 | from keras.layers import Input, Dense, Dropout, Flatten, Conv2D, MaxPooling2D, BatchNormalization
 3 | from keras.callbacks import ModelCheckpoint, EarlyStopping, TensorBoard
 4 | from PIL import Image
 5 | import numpy as np
 6 | import csv
 7 | import os
 8 | 
 9 | LETTERSTR = "0123456789ABCDEFGHJKLMNPQRSTUVWXYZ"
10 | 
11 | 
12 | def toonehot(text):
13 |     labellist = []
14 |     for letter in text:
15 |         onehot = [0 for _ in range(34)]
16 |         num = LETTERSTR.find(letter)
17 |         onehot[num] = 1
18 |         labellist.append(onehot)
19 |     return labellist
20 | 
21 | 
22 | # Create CNN Model
23 | print("Creating CNN model...")
24 | in = Input((60, 200, 3))
25 | out = in
26 | out = Conv2D(filters=32, kernel_size=(3, 3), padding='same', activation='relu')(out)
27 | out = Conv2D(filters=32, kernel_size=(3, 3), activation='relu')(out)
28 | out = BatchNormalization()(out)
29 | out = MaxPooling2D(pool_size=(2, 2))(out)
30 | out = Dropout(0.5)(out)
31 | out = Conv2D(filters=64, kernel_size=(3, 3), padding='same', activation='relu')(out)
32 | out = Conv2D(filters=64, kernel_size=(3, 3), activation='relu')(out)
33 | out = BatchNormalization()(out)
34 | out = MaxPooling2D(pool_size=(2, 2))(out)
35 | out = Dropout(0.5)(out)
36 | out = Conv2D(filters=128, kernel_size=(3, 3), padding='same', activation='relu')(out)
37 | out = Conv2D(filters=128, kernel_size=(3, 3), activation='relu')(out)
38 | out = BatchNormalization()(out)
39 | out = MaxPooling2D(pool_size=(2, 2))(out)
40 | out = Dropout(0.5)(out)
41 | out = Conv2D(filters=256, kernel_size=(3, 3), activation='relu')(out)
42 | out = BatchNormalization()(out)
43 | out = MaxPooling2D(pool_size=(2, 2))(out)
44 | out = Flatten()(out)
45 | out = Dropout(0.5)(out)
46 | out = [Dense(34, name='digit1', activation='softmax')(out),\
47 |     Dense(34, name='digit2', activation='softmax')(out),\
48 |     Dense(34, name='digit3', activation='softmax')(out),\
49 |     Dense(34, name='digit4', activation='softmax')(out),\
50 |     Dense(34, name='digit5', activation='softmax')(out),\
51 |     Dense(34, name='digit6', activation='softmax')(out)]
52 | model = Model(inputs=in, outputs=out)
53 | model.compile(loss='categorical_crossentropy', optimizer='Adam', metrics=['accuracy'])
54 | model.summary()
55 | 
56 | print("Reading training data...")
57 | traincsv = open('./data/6_real_train_set/captcha_train.csv', 'r', encoding = 'utf8')
58 | train_data = np.stack([np.array(Image.open('./data/6_real_train_set/' + str(i) + ".jpg"))/255.0 for i in range(1, 60001)])
59 | traincsv = open('./data/6_real_train_set/captcha_train.csv', 'r', encoding = 'utf8')
60 | read_label = [toonehot(row[1]) for row in csv.reader(traincsv)][:60000]
61 | train_label = [[] for _ in range(6)]
62 | for arr in read_label:
63 |     for index in range(6):
64 |         train_label[index].append(arr[index])
65 | train_label = [arr for arr in np.asarray(train_label)]
66 | print("Shape of train data:", train_data.shape)
67 | 
68 | print("Reading validation data...")
69 | valicsv = open('./data/6_real_train_set/captcha_train.csv', 'r', encoding = 'utf8')
70 | vali_data = np.stack([np.array(Image.open('./data/6_real_train_set/' + str(i) + ".jpg"))/255.0 for i in range(60001, 75001)])
71 | valicsv = open('./data/6_real_train_set/captcha_train.csv', 'r', encoding = 'utf8')
72 | read_label = [toonehot(row[1]) for row in csv.reader(valicsv)][60000:]
73 | vali_label = [[] for _ in range(6)]
74 | for arr in read_label:
75 |     for index in range(6):
76 |         vali_label[index].append(arr[index])
77 | vali_label = [arr for arr in np.asarray(vali_label)]
78 | print("Shape of validation data:", vali_data.shape)
79 | 
80 | filepath="./data/model/real_6_model.h5"
81 | checkpoint = ModelCheckpoint(filepath, monitor='val_digit6_acc', verbose=1, save_best_only=True, mode='max')
82 | earlystop = EarlyStopping(monitor='val_digit6_acc', patience=5, verbose=1, mode='auto')
83 | tensorBoard = TensorBoard(log_dir = "./logs", histogram_freq = 1)
84 | callbacks_list = [checkpoint, earlystop, tensorBoard]
85 | model.fit(train_data, train_label, batch_size=400, epochs=100, verbose=2, validation_data=(vali_data, vali_label), callbacks=callbacks_list)
86 | 


--------------------------------------------------------------------------------