├── .gitignore ├── LICENSE ├── README.md ├── captcha_gen.py ├── captcha_scrawl.py ├── data_augment.py ├── demo_cnn.py ├── demo_online.py ├── readme_img ├── captcha_sample1.jpg ├── captcha_sample2.jpg ├── captcha_sample3.jpg ├── captcha_sample4.jpg ├── captcha_seperate1.png ├── captcha_seperate2.png ├── captcha_seperate3.png ├── csv.png ├── dataaugmentation.png ├── generate.png ├── head.gif ├── imitate6.png ├── imitate6_tensorboard.png ├── imitate_result.png └── old │ ├── 1.jpeg │ ├── 10.PNG │ ├── 11.png │ ├── 12.PNG │ ├── 2.jpeg │ ├── 3.jpeg │ ├── 4.jpeg │ ├── 5.PNG │ ├── 6.PNG │ ├── 7.PNG │ ├── 8.jpg │ └── 9.PNG ├── train_cnn_imitate_5.py ├── train_cnn_imitate_56.py ├── train_cnn_imitate_6.py ├── train_cnn_real_5.py ├── train_cnn_real_56.py └── train_cnn_real_6.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | MANIFEST 27 | 28 | # PyInstaller 29 | # Usually these files are written by a python script from a template 30 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 31 | *.manifest 32 | *.spec 33 | 34 | # Installer logs 35 | pip-log.txt 36 | pip-delete-this-directory.txt 37 | 38 | # Unit test / coverage reports 39 | htmlcov/ 40 | .tox/ 41 | .coverage 42 | .coverage.* 43 | .cache 44 | nosetests.xml 45 | coverage.xml 46 | *.cover 47 | .hypothesis/ 48 | 49 | # Translations 50 | *.mo 51 | *.pot 52 | 53 | # Django stuff: 54 | *.log 55 | .static_storage/ 56 | .media/ 57 | local_settings.py 58 | 59 | # Flask stuff: 60 | instance/ 61 | .webassets-cache 62 | 63 | # Scrapy stuff: 64 | .scrapy 65 | 66 | # Sphinx documentation 67 | docs/_build/ 68 | 69 | # PyBuilder 70 | target/ 71 | 72 | # Jupyter Notebook 73 | .ipynb_checkpoints 74 | 75 | # pyenv 76 | .python-version 77 | 78 | # celery beat schedule file 79 | celerybeat-schedule 80 | 81 | # SageMath parsed files 82 | *.sage.py 83 | 84 | # Environments 85 | .env 86 | .venv 87 | env/ 88 | venv/ 89 | ENV/ 90 | env.bak/ 91 | venv.bak/ 92 | 93 | # Spyder project settings 94 | .spyderproject 95 | .spyproject 96 | 97 | # Rope project settings 98 | .ropeproject 99 | 100 | # mkdocs documentation 101 | /site 102 | 103 | # mypy 104 | .mypy_cache/ 105 | 106 | # data 107 | *.data 108 | *.bin 109 | *.npy 110 | *.csv 111 | *.big 112 | *.small 113 | /data 114 | /test 115 | /logs 116 | /developing 117 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright 2017 Jason-ChengYing,Li 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # simple-railway-captcha-solver 2 | 3 | 本專案自2018年後已經停止更新/維護,且台鐵已經在2019年改用reCAPTCHA代替傳統的驗證碼。 4 | 5 | (這個專案是我在大學閒暇時的研究,當時code寫得很雜亂,後續issue也都沒時間回覆。但還是希望有幫助到想做類似project的人!) 6 | 7 | **This project is no longer maintained. Archived this repo on 2022/11/18.** 8 | 9 | ---- 10 | 11 | [Click here or scroll down for english version](#english-version) 12 | 13 | ![image](./readme_img/head.gif) 14 | 15 | 本專案利用簡單的Convolutional Neural Network來實作辨識台鐵訂票網站的驗證碼,訓練集及驗證集的部分以模仿驗證碼樣式的方式來產生、另外測試集的部分則自台鐵訂票網站擷取,再以手動方式標記約3000筆。 16 | 17 | 目前模型單碼辨識率最高達到```99.39%```,在台鐵網站上以查詢剩餘車票功能來測試驗證碼,整體辨識成功率(全部字元正確)達到```91.57%```。 18 | 19 | 底下有詳盡的說明。 20 | 21 | |Name|Description| 22 | |----|----| 23 | |captcha_gen.py|模仿驗證碼樣式建立訓練集| 24 | |captcha_scrawl.py|從台鐵網站取得真實驗證碼圖| 25 | |train_cnn_imitate_5.py|建立CNN並以模仿驗證碼訓練(5碼辨識)| 26 | |train_cnn_imitate_6.py|建立CNN並以模仿驗證碼訓練(6碼辨識)| 27 | |train_cnn_imitate_56.py|建立CNN並以模仿驗證碼訓練(辨識是5碼or6碼)| 28 | |data_augment.py|用於真實驗證碼的資料增強| 29 | |train_cnn_real_5.py|建立CNN並以真實驗證碼訓練(5碼辨識)| 30 | |train_cnn_real_6.py|建立CNN並以真實驗證碼訓練(6碼辨識)| 31 | |train_cnn_real_56.py|建立CNN並以真實驗證碼訓練(辨識是5碼or6碼)| 32 | |demo_cnn.py|Demo載入模型並以測試集評估辨識率| 33 | |demo_online.py|Demo載入模型並在台鐵網站上評估辨識率| 34 | 35 | ## 溫馨提醒 36 | 鐵路法第65條中提到:```「...以不正方法將虛偽資料或不正指令輸入電腦或其相關設備而購買車票、取得訂票或取票憑證者,處五年以下有期徒刑或科或併科新臺幣三百萬元以下罰金。」```,我想使用程式辨識驗證碼來自動訂票,應該也在其中"不正方法"的範疇中。 37 | 38 | 此專案僅供學術研究,所以請不要利用建立的模型辨識驗證碼去自動訂票,這是違法的喔。 39 | 40 | ## 0.-Dependencies 41 | |Name|Version| 42 | |----|----| 43 | |tensorflow|1.4.0| 44 | |tensorflow-gpu|1.4.0| 45 | |tensorflow-tensorboard|0.4.0rc3| 46 | |Keras|2.1.2| 47 | |h5py|2.7.1| 48 | |Pillow|5.1.0| 49 | |numpy|1.13.3| 50 | 51 | ## 1.-Training set? 52 | 要建立一個辨識驗證碼的CNN模型其實並非難事,難的是要如何取得標記好的訓練集呢? 53 | 54 | ![image](./readme_img/captcha_sample1.jpg)![image](./readme_img/captcha_sample2.jpg)![image](./readme_img/captcha_sample3.jpg) 55 | 56 | 在這邊我們會嘗試兩種方法(2.與3.): 57 | #### 2.-模仿驗證碼的樣式,自行產生訓練集和驗證集 58 | 因為要手動標記上萬張驗證碼是非常費時的,所以我們可以試著模仿產生一些驗證碼看看。 59 | 60 | 不過當然,我們產生的資料集必須非常接近真實的驗證碼,否則最後訓練完可能用在真實的驗證碼上效果會非常的差。 61 | 62 | #### 3.-標記少量的驗證碼,以資料增強的方式擴充資料集 63 | 因為不一定每一種驗證碼都可以很容易地找出他的樣式或規律等等,有時候我們還是得用手動方式標記一些驗證碼。 64 | 65 | 在這種情況下,我們可以透過資料增強的方法來擴充我們的資料集,讓我們的網路有更多資料可以學習。 66 | 67 | ```(註:在台鐵驗證碼的例子中,自從改版加入英文字後,因為英文字的出現機率遠低於數字(每digit大約只有1~5%),所以在手動標記的真實資料集中,英文字的比例是極低的,造成數據很不平衡。且在真實驗證碼中,一張驗證碼最多只會出現一個英文字,即使我們用資料增強的方式,也很難去做到平衡數字及英文的資料比例。雖然似乎可以透過設定class weight的方式來改善,但是keras好像不支援以one-hot encoding表示的輸出設定class weight...。這部分暫時想不到方式解決,不過若只是要訓練一個勉強堪用的模型,用這個方式還是可行的,因為實際上台鐵驗證碼英文出現的比例也沒有非常高。)``` 68 | 69 | -------------------- 70 | 71 | ## 2.1-Generate training and validation set 72 | 讓我們來模仿產生一些驗證碼吧! 73 | 首先我們要先觀察驗證碼,你可以寫一支爬蟲程式(eg.```captcha_scrawl.py```)去擷取一兩百張驗證碼回來細細比對。我們不難發現台鐵的驗證碼不外乎由兩個主要元素組成: 74 | - ```5 ~ 6碼```的數字及英文(不包含O和I),大小似乎不一致,而且都有經過旋轉,另外顏色是浮動的。 75 | - 背景是浮動的顏色,另外還有不少干擾的線條,看起來應該是矩形,由黑線和白線組成,且有部分會蓋到數字上面。 76 | 77 | 進一步研究會發現: 78 | - 數字的旋轉角度約在```-55 ~ 55度```間,大小約```25 ~ 27pt```。 79 | - 字型的部分,仔細觀察會發現同一個字會有兩種不一樣的樣式,推測是有兩種字型隨機更替,其中一個很明顯是```Courier New-Bold```,另一個比對一下也不難發現即是```Times New Roman-Bold```。 80 | - 背景和字型顏色的部分,可以用一些色彩均值化的手法快速的從數百張的驗證碼中得出每一張的背景及數字的顏色,進而我們就能算出顏色的範圍。這部分可以用OpenCV的k-means來實作,這邊就不再贅述。 81 | 82 | 背景的R/G/B範圍約是在```180 ~ 250```間,文字的部分則是```10 ~ 140```間。 83 | - 干擾的線條是矩形,有左、上是黑線條且右、下是白線條和倒過來,共兩種樣式(也可以當作是旋轉180度),平均大約會出現```30 ~ 32個```隨機分布在圖中,長寬都大約落在```5 ~ 21px```間。 84 | 另外,大約有4成的機會白線會蓋在數字上,黑線蓋在文字上的機率則更低。 85 | 86 | 有了這些觀察,只差一點點就可以產生訓練集了,我們現在來觀察文字都落在圖片上的甚麼位置上: 87 | 88 | ![image](./readme_img/captcha_seperate1.png)![image](./readme_img/captcha_seperate2.png)![image](./readme_img/captcha_seperate3.png) 89 | 90 | 從這幾張圖中不難看出文字並非規則地分布在圖片上,我們可以猜測文字是旋轉後被隨機左移或右移了,甚至還會有重疊的情況,所以沒辦法用切割的方式一次處理一個文字。 91 | 92 | 以上就是我們簡單觀察到的驗證碼規則,訓練集產生的部分實作在```captcha_gen.py```中,雖然寫得有點雜亂,不過沒甚麼特別的地方,就是照著上面的規則產生,可以試著以自己的方式實作看看。 93 | 94 | ![image](./readme_img/captcha_sample4.jpg) 95 | 96 | ```python 97 | if __name__ == "__main__": 98 | generate(50000, "./data/56_imitate_train_set/", ENGP=100, FIVEP=50, ENGNOLIMIT=True, filename="train") 99 | generate(10240, "./data/56_imitate_vali_set/", ENGP=100, FIVEP=50, ENGNOLIMIT=True, filename="vali") 100 | generate(50000, "./data/5_imitate_train_set/", ENGP=100, FIVEP=100, ENGNOLIMIT=True, filename="train") 101 | generate(10240, "./data/5_imitate_vali_set/", ENGP=100, FIVEP=100, ENGNOLIMIT=True, filename="vali") 102 | generate(50000, "./data/6_imitate_train_set/", ENGP=100, FIVEP=0, ENGNOLIMIT=True, filename="train") 103 | generate(10240, "./data/6_imitate_vali_set/", ENGP=100, FIVEP=0, ENGNOLIMIT=True, filename="vali") 104 | ``` 105 | 106 | 最後會為我們預計建立的三個CNN(2.2.1會提到)各分別產生50000筆Training data和10240筆Validate data,答案則標記在csv檔中。 107 | 108 | ![image](./readme_img/csv.png)![image](./readme_img/generate.png) 109 | 110 | 111 | ## 2.2.1-Building Convolution Neural Network 112 | 有了資料集,我們就可以來建立CNN了! 113 | 114 | 在這邊我們會建立三個CNN,分別是```1.辨識5碼驗證碼圖片的CNN```、```2.辨識6碼驗證碼圖片的CNN``` 以及 ```3.辨識圖片是5碼or6碼驗證碼的CNN```。 115 | 116 | 首先我們先來實作前兩個辨識驗證碼的CNN:輸入是```60*200```的圖片,共有3個channel(R/G/B),所以是shape會是```(60, 200, 3)```。 117 | 118 | 中間透過數層由ReLU函數激發的Convolution Layer擷取特徵,並以2x2的Max pooling layer採樣減少計算量、BatchNormalization layer做標準化 及 Dropout Layer隨機捨棄一些神經元(避免overfitting),最後用Flatten Layer來把資料降到1維,輸出到全連接層:5/6個34神經元的Softmax regression分類器。 119 | 120 | (註:34個代表數字0~9以及英文字母去除O及I的數量,另外5碼和6碼只差在最後有幾個Softmax regression分類器。) 121 | 122 | 以六碼為例(train_cnn_imitate_6.py): 123 | ```python 124 | in = Input((60, 200, 3)) 125 | out = in 126 | out = Conv2D(filters=32, kernel_size=(3, 3), padding='same', activation='relu')(out) 127 | out = Conv2D(filters=32, kernel_size=(3, 3), activation='relu')(out) 128 | out = BatchNormalization()(out) 129 | out = MaxPooling2D(pool_size=(2, 2))(out) 130 | out = Dropout(0.3)(out) 131 | out = Conv2D(filters=64, kernel_size=(3, 3), padding='same', activation='relu')(out) 132 | out = Conv2D(filters=64, kernel_size=(3, 3), activation='relu')(out) 133 | out = BatchNormalization()(out) 134 | out = MaxPooling2D(pool_size=(2, 2))(out) 135 | out = Dropout(0.3)(out) 136 | out = Conv2D(filters=128, kernel_size=(3, 3), padding='same', activation='relu')(out) 137 | out = Conv2D(filters=128, kernel_size=(3, 3), activation='relu')(out) 138 | out = BatchNormalization()(out) 139 | out = MaxPooling2D(pool_size=(2, 2))(out) 140 | out = Dropout(0.3)(out) 141 | out = Conv2D(filters=256, kernel_size=(3, 3), activation='relu')(out) 142 | out = BatchNormalization()(out) 143 | out = MaxPooling2D(pool_size=(2, 2))(out) 144 | out = Flatten()(out) 145 | out = Dropout(0.3)(out) 146 | out = [Dense(34, name='digit1', activation='softmax')(out),\ 147 | Dense(34, name='digit2', activation='softmax')(out),\ 148 | Dense(34, name='digit3', activation='softmax')(out),\ 149 | Dense(34, name='digit4', activation='softmax')(out),\ 150 | Dense(34, name='digit5', activation='softmax')(out),\ 151 | Dense(34, name='digit6', activation='softmax')(out)] 152 | model = Model(inputs=in, outputs=out) 153 | ``` 154 | 155 | 完成後要來compile模型,這邊loss使用```categorical_crossentropy```、optimizer*使用```Adam```,而metrics理所當然是```accuracy```了。 156 | ```python 157 | model.compile(loss='categorical_crossentropy', optimizer='Adamax', metrics=['accuracy']) 158 | ``` 159 | 160 | *關於optimizer的選擇,可以參考這兩篇,寫得不錯: 161 | 1. An overview of gradient descent optimization algorithms - http://ruder.io/optimizing-gradient-descent/index.html 162 | 2. SGD,Adagrad,Adadelta,Adam等优化方法总结和比较 - http://ycszen.github.io/2016/08/24/SGD%EF%BC%8CAdagrad%EF%BC%8CAdadelta%EF%BC%8CAdam%E7%AD%89%E4%BC%98%E5%8C%96%E6%96%B9%E6%B3%95%E6%80%BB%E7%BB%93%E5%92%8C%E6%AF%94%E8%BE%83/ 163 | ** 164 | 165 | -- 166 | 167 | 最後來看看model的summary輸出長甚麼樣子: 168 | ```python 169 | model.summary() 170 | 171 | __________________________________________________________________________________________________ 172 | Layer (type) Output Shape Param # Connected to 173 | ================================================================================================== 174 | input_1 (InputLayer) (None, 60, 200, 3) 0 175 | __________________________________________________________________________________________________ 176 | conv2d_1 (Conv2D) (None, 60, 200, 32) 896 input_1[0][0] 177 | __________________________________________________________________________________________________ 178 | conv2d_2 (Conv2D) (None, 58, 198, 32) 9248 conv2d_1[0][0] 179 | __________________________________________________________________________________________________ 180 | batch_normalization_1 (BatchNor (None, 58, 198, 32) 128 conv2d_2[0][0] 181 | __________________________________________________________________________________________________ 182 | max_pooling2d_1 (MaxPooling2D) (None, 29, 99, 32) 0 batch_normalization_1[0][0] 183 | __________________________________________________________________________________________________ 184 | dropout_1 (Dropout) (None, 29, 99, 32) 0 max_pooling2d_1[0][0] 185 | __________________________________________________________________________________________________ 186 | conv2d_3 (Conv2D) (None, 29, 99, 64) 18496 dropout_1[0][0] 187 | __________________________________________________________________________________________________ 188 | conv2d_4 (Conv2D) (None, 27, 97, 64) 36928 conv2d_3[0][0] 189 | __________________________________________________________________________________________________ 190 | batch_normalization_2 (BatchNor (None, 27, 97, 64) 256 conv2d_4[0][0] 191 | __________________________________________________________________________________________________ 192 | max_pooling2d_2 (MaxPooling2D) (None, 13, 48, 64) 0 batch_normalization_2[0][0] 193 | __________________________________________________________________________________________________ 194 | dropout_2 (Dropout) (None, 13, 48, 64) 0 max_pooling2d_2[0][0] 195 | __________________________________________________________________________________________________ 196 | conv2d_5 (Conv2D) (None, 13, 48, 128) 73856 dropout_2[0][0] 197 | __________________________________________________________________________________________________ 198 | conv2d_6 (Conv2D) (None, 11, 46, 128) 147584 conv2d_5[0][0] 199 | __________________________________________________________________________________________________ 200 | batch_normalization_3 (BatchNor (None, 11, 46, 128) 512 conv2d_6[0][0] 201 | __________________________________________________________________________________________________ 202 | max_pooling2d_3 (MaxPooling2D) (None, 5, 23, 128) 0 batch_normalization_3[0][0] 203 | __________________________________________________________________________________________________ 204 | dropout_3 (Dropout) (None, 5, 23, 128) 0 max_pooling2d_3[0][0] 205 | __________________________________________________________________________________________________ 206 | conv2d_7 (Conv2D) (None, 3, 21, 256) 295168 dropout_3[0][0] 207 | __________________________________________________________________________________________________ 208 | batch_normalization_4 (BatchNor (None, 3, 21, 256) 1024 conv2d_7[0][0] 209 | __________________________________________________________________________________________________ 210 | max_pooling2d_4 (MaxPooling2D) (None, 1, 10, 256) 0 batch_normalization_4[0][0] 211 | __________________________________________________________________________________________________ 212 | flatten_1 (Flatten) (None, 2560) 0 max_pooling2d_4[0][0] 213 | __________________________________________________________________________________________________ 214 | dropout_4 (Dropout) (None, 2560) 0 flatten_1[0][0] 215 | __________________________________________________________________________________________________ 216 | digit1 (Dense) (None, 34) 87074 dropout_4[0][0] 217 | __________________________________________________________________________________________________ 218 | digit2 (Dense) (None, 34) 87074 dropout_4[0][0] 219 | __________________________________________________________________________________________________ 220 | digit3 (Dense) (None, 34) 87074 dropout_4[0][0] 221 | __________________________________________________________________________________________________ 222 | digit4 (Dense) (None, 34) 87074 dropout_4[0][0] 223 | __________________________________________________________________________________________________ 224 | digit5 (Dense) (None, 34) 87074 dropout_4[0][0] 225 | __________________________________________________________________________________________________ 226 | digit6 (Dense) (None, 34) 87074 dropout_4[0][0] 227 | ================================================================================================== 228 | Total params: 1,106,540 229 | Trainable params: 1,105,580 230 | Non-trainable params: 960 231 | ``` 232 | 233 | 架構以圖片呈現的話: 234 | 235 | ![image](./readme_img/imitate6.png) 236 | 237 | ## 2.2.2-Building Another Convolution Neural Network 238 | 前面提到,我們還要建立一個用來識別驗證碼是5碼還是6碼的模型,我們最後會讓資料先經過這個模型看看輸入的驗證碼是5碼還是6碼,再送入對應的模型去辨識出上面的文字。 239 | 240 | 這個模型的架構基本上跟前面兩個是一樣的,只是Dropout Rate以及輸出不同。輸出的部份我們從數個softmax分類器改為一個sigmoid分類器,並將loss改為```binary_crossentropy```來compile模型。 241 | 242 | 這部分實作於```train_cnn_imitate_56.py```,下面只大略列出不同處: 243 | 244 | ```python 245 | out = Dense(1, name='6digit', activation='sigmoid')(tensor_out) 246 | model = Model(inputs=in, outputs=out) 247 | model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy']) 248 | ``` 249 | 250 | 最後當這個sigmoid分類器輸出>0.5時,我們視為辨識出6碼的驗證碼,反之則為5碼。 251 | 252 | ## 2.3-Load the training set 253 | 在訓練之前我們要先將資料載入到記憶體中,前面產生訓練集和驗證集的時候,我們是將驗證碼存成一張張編號好的圖片,並用csv檔記錄下了答案。 254 | 255 | 這邊一樣以6碼的為例,首先我們先處理X的部分,也就是特徵值,這邊就是指我們的圖片。 256 | 而要輸入進CNN的資料必須是numpy array的形式,所以我們用Pillow來讀取圖片並轉為numpy格式: 257 | 258 | ```python 259 | traincsv = open('./data/6_imitate_train_set/captcha_train.csv', 'r', encoding = 'utf8') 260 | for row in csv.reader(traincsv): 261 |    image = Image.open("./data/6_imitate_train_set/" + row[0] + ".jpg") # 讀取圖片 262 |    nparr = np.array(image) # 轉成np array 263 |    nparr = nparr / 255.0 264 | ``` 265 | 266 | 這時我們下```nparr.shape```,可以看到矩陣的大小是```(60, 200, 3)```,跟前面模型設計的Input是相同的。 267 | 268 | 而我們計劃使用50000張圖片來訓練,所以最後輸入給CNN的矩陣大小會是```(50000, 60, 200, 3)```,這部分只要利用stack就可以把它們合併,整理成下面: 269 | 270 | ```python 271 | train_data = np.stack([np.array(Image.open("./data/6_imitate_train_set/" + row[0] + ".jpg"))/255.0 for row in csv.reader(traincsv)]) 272 | ``` 273 | 274 | 最後train_data的shape就會是```(50000, 60, 200, 3)```。 275 | 276 | 接下來Y則是訓練集的標記,也就是我們訓練集的答案。 277 | 278 | 因為我們的模型是多輸出的結構(6組softmax函數分類器),所以Y要是一個含有6個numpy array的list,大概像是這樣: 279 | ``` 280 | [[第一張第1個數字,...,最後一張第1個數字], [第一張第2個數字,...,最後一張第2個數字], [...], [...], [...], [...]] 281 | ``` 282 | 而其中每個數字都是以one-hot encoding表示,例如0就是```[1, 0, 0, 0, ....,0]```,2就是```[0, 0, 1, 0, ....,0]``` 283 | 284 | ```python 285 | traincsv = open('./data/6_imitate_train_set/captcha_train.csv', 'r', encoding = 'utf8') # 讀取訓練集的標記 286 | read_label = [toonehot(row[1]) for row in csv.reader(traincsv)] # 將每一行的文字轉成one-hot encoding 287 | train_label = [[] for _ in range(6)] # 各組輸出的答案要放到train_label 288 | 289 | for arr in read_label: 290 | for index in range(6): 291 |        train_label[index].append(arr[index]) # 原本是[[第1字答案, ..., 第6字答案],......, [第1字答案, ..., 第6字答案]] 292 |                                              # 要轉成[[第1字答案,..., 第1字答案],..., [第6字答案,..., 第6字答案]]才符合Y的輸入 293 | train_label = [arr for arr in np.asarray(train_label)] # 最後要把6個numpy array 放在一個list 294 | ``` 295 | 296 | ## 2.4-Validation set 297 | 驗證集的載入方式跟訓練集相同,這邊略過。 298 | 299 | ## 2.5-Callback 300 | 在這邊要用到三個callback: 301 | 302 | ### 1.ModelCheckPoint 303 | 304 | ```python 305 | checkpoint = ModelCheckpoint(filepath, monitor='val_digit6_acc', verbose=1, save_best_only=True, mode='max') 306 | ``` 307 | 308 | 用於儲存最佳辨識率的模型,每次epoch完會檢查一次,如果比先前最佳的acc高,就會儲存model到filepath。 309 | 310 | 因為在多輸出模型中沒有像是各輸出平均的acc這種東西,觀察前幾epoch後發現```val_digit6_acc```上升最慢,因此用它當作checkpoint的monitor。 311 | (如果要自定義monitor可以自己寫callback,這部分留到未來有空再來實作。) 312 | 313 | ### 2.Earlystopping 314 | 315 | ```python 316 | earlystop = EarlyStopping(monitor='val_digit6_acc', patience=5, verbose=1, mode='auto') 317 | ``` 318 | 319 | 這邊的monitor設為val_digit6_acc,patience設為5,也就是在驗證集的val_digit6_acc連續5次不再下降時,就會提早結束訓練。(train_cnn_imitate_56是10) 320 | 321 | ### 3.TensorBoard 322 | 323 | ```python 324 | tensorBoard = TensorBoard(log_dir = "./logs", histogram_freq = 1) 325 | ``` 326 | 327 | TensorBoard可以讓我們更方便的以圖形化界面檢視訓練結果,要檢視時可以輸入```tensorboard --logdir=logs```來啟動。 328 | 329 | 最後把他們三個放到list中即可。 330 | ```python 331 | callbacks_list = [tensorBoard, earlystop, checkpoint] 332 | ``` 333 | 334 | ## 2.6-Training the model 335 | 至此為止我們已經把所有需要的資料都準備好了,現在只需要一台好電腦就可以開始訓練了,建議使用GPU來訓練,不然要很久,真的很久....。 336 | 337 | 若在訓練時出現Resource exhausted的錯誤,可以考慮調低一些參數(如batch_size)。 338 | 339 | ```python 340 | model.fit(train_data, train_label, batch_size=400, epochs=50, verbose=2, validation_data=(vali_data, vali_label), callbacks=callbacks_list) 341 | ``` 342 | 343 | ## 2.7-Result 344 | 三個模型都在訓練約25~30epochs後達到EarlyStopping的條件停止,val_acc皆達到```0.99```以上(下圖為imitate_6的訓練結果): 345 | 346 | ![image](./readme_img/imitate6_tensorboard.png) 347 | 348 | 接著利用手動標記的3000張真實驗證碼當測試集來評估模型的辨識率(demo_cnn.py): 349 | 350 | 辨識```5碼```驗證碼的模型(train_cnn_imitate_5)對真實驗證碼測試集的單碼辨識率達到平均約```98.87%```,一次辨識成功率(即一次5或6碼都辨識正確)達到約```94.55%```。 351 | 352 | 辨識```6碼```驗證碼的模型(train_cnn_imitate_6)對真實驗證碼測試集的單碼辨識率達到平均約```98.44%```,一次辨識成功率(即一次5或6碼都辨識正確)達到約```90.45%```。 353 | 354 | 分類驗證碼是5碼 or 6碼的模型(train_cnn_imitate_56)則達到約```98.13%```。 355 | 356 | ![image](./readme_img/imitate_result.png) 357 | 358 | ## 2.8-Online Test 359 | 模型都完成後,是時候到台鐵的網站上試試身手了! 360 | 361 | 不過開頭有提到以不正方式訂票是違法的,所以我們這邊僅在查詢剩餘車票的頁面下做測試,並設定適當的延遲,避免對網站伺服器造成負擔。(實作於```demo_online.py```) 362 | 363 | 這邊是以selenium控制chrome瀏覽器操作,這樣比較簡單,而且看的到畫面比較有感覺。 364 | 365 | 另外因為台鐵的驗證碼是浮動的,所以驗證碼圖片是以先screenshot,再將驗證碼位置圖片crop下來方式來取得的: 366 | 367 | ```python 368 | driver.save_screenshot('tmp.png') 369 | location = driver.find_element_by_id('idRandomPic').location 370 | x, y = location['x'] + 5, location['y'] + 5 371 | img = Image.open('tmp.png') 372 | captcha = img.crop((x, y, x+200, y+60)) 373 | captcha.convert("RGB").save('captcha.jpg', 'JPEG') 374 | ``` 375 | 376 | 其中```location['x'] + 5, location['y'] + 5```是因為畫面上的驗證碼有用css加了一個寬度5的外框上去。 377 | 378 | 最後執行了1000筆後,我們得到```91.57%```的成功率,大功告成! 379 | 380 | -------------------- 381 | 382 | ## 3.1-Label training and dataset 383 | 在第三部份我們會用少量手動標記的驗證碼,透過資料增強(Data Augmentation)產生大量資料來訓練模型。 384 | 385 | 首先我們需要取得一些真實的驗證碼,我們可以寫一支程式簡單地從台鐵網站上下載驗證碼圖片回來(實作於```captcha_scrawl.py```): 386 | 387 | ```python 388 | SAVEPATH = "./data/manual_label/" 389 | url = 'http://railway1.hinet.net/ImageOut.jsp' 390 | for i in range(1, 3000): 391 | response = requests.get(url, stream=True) 392 | with open(SAVEPATH + str(i) + '.jpg', 'wb') as out_file: 393 | shutil.copyfileobj(response.raw, out_file) 394 | del response 395 | time.sleep(0.5) 396 | ``` 397 | 398 | 如此我們就得到了3000張驗證碼,其中大約有1500張是6碼,剩下的則是5碼。(在我寫Readme之前,5碼和6碼的比例大約是1:3,但現在測試卻是1:1,可能網站有更新過吧) 399 | 400 | 之後我們可以仿照 2.1 的格式去標記驗證碼答案於csv中,也可以做個小工具來輔助標記(https://github.com/JasonLiTW/captcha-label-tool)。 401 | 402 | ## 3.2-Data Augmentation 403 | 因為我們標記的驗證碼有點太少了,所以我們要透過Data Augmentation的方式來產生更多的訓練資料來用! 404 | 405 | 我們使用Keras內建的ImageDataGenerator,他提供了非常多的功能,詳細可以看這裡:https://keras.io/preprocessing/image/ 406 | 407 | 我們這邊會用到的功能有:```rotation_range=5```(旋轉0~5度), ```shear_range=0.2```(斜變0~0.2度), ```zoom_range=0.05```(放大0~0.05倍) 408 | 409 | ```python 410 | datagen = ImageDataGenerator(rotation_range=5,shear_range=0.2,zoom_range=0.05,fill_mode='nearest') 411 | ``` 412 | 413 | 這邊的datagen是一個generator,它會隨機對圖片做旋轉、斜變和放大。我們預計要讓每張圖片產生50張增強的圖片。詳細的部分實作於```data_augment.py```,依序修改參數對5碼及6碼的驗證碼執行,即可由原本3000張產生到變成150000張驗證碼圖片。 414 | 415 | ![image](./readme_img/dataaugmentation.png) 416 | 417 | 418 | ## 3.3-Building Convolution Neural Network 419 | 這部分跟2.2的模型是完全相同的,這邊不再贅述。 420 | 421 | ## 3.4-Load the training and validation set 422 | 以6碼的為例,我們現在有75000張驗證碼圖片,我們取其中前60000張為訓練集,後15000張為驗證集來訓練。(驗證集也是一樣,只是改成取後15000張。) 423 | 424 | ```python 425 | traincsv = open('./data/6_real_train_set/captcha_train.csv', 'r', encoding = 'utf8') 426 | train_data = np.stack([np.array(Image.open("./data/6_real_train_set/" + row[0] + ".jpg"))/255.0 for row in csv.reader(traincsv)][:60000]) 427 | traincsv = open('./data/6_real_train_set/captcha_train.csv', 'r', encoding = 'utf8') 428 | read_label = [toonehot(row[1]) for row in csv.reader(traincsv)][:60000] 429 | train_label = [[] for _ in range(6)] 430 | for arr in read_label: 431 | for index in range(6): 432 | train_label[index].append(arr[index]) 433 | train_label = [arr for arr in np.asarray(train_label)] 434 | ``` 435 | 436 | 另外判斷是5碼or6碼的模型,則是各以5/6碼的前60000張各隨機挑選20000張=40000張當訓練集,剩下15000張各隨機挑選5000張=10000張當驗證集。 437 | 438 | ## 3.5-Callback 439 | 跟2.5相同,略過。 440 | 441 | ## 3.6-Training the model 442 | 跟2.6相同,略過。 443 | 444 | ## 3.7-Result 445 | 三個模型都在訓練約20 epochs後達到EarlyStopping的條件停止,val_acc皆達到```0.99```以上。 446 | 447 | 但由於在```demo_cnn.py```中使用的3000張手動標記驗證碼即為真實驗證碼模型的訓練集,所以沒辦法用來評估其辨識率,所以這邊我們會留到下一部分(3.8)到台鐵網站來評估。 448 | 449 | ## 3.8-Online Test 450 | 最後我們一樣來到台鐵網站做測試,程式的部分同2.8,只要修改model路徑即可。 451 | 452 | 執行了1000筆後,我們得到```51.63%```的整體成功率,雖然沒有上一部分的結果那麼好,但仍堪用了。 453 | 454 | ```(註:在台鐵驗證碼的例子中,自從改版加入英文字後,因為英文字的出現機率遠低於數字(每digit大約只有1~5%),所以在手動標記的真實資料集中,英文字的比例是極低的,造成數據很不平衡。且在真實驗證碼中,一張驗證碼最多只會出現一個英文字,即使我們用資料增強的方式,也很難去做到平衡數字及英文的資料比例。雖然似乎可以透過設定class weight的方式來改善,但是keras好像不支援以one-hot encoding表示的輸出設定class weight...。這部分暫時想不到方式解決,不過若只是要訓練一個勉強堪用的模型,用這個方式還是可行的,因為實際上台鐵驗證碼英文出現的比例也沒有非常高。)``` 455 | 456 | 有趣的是,在交叉測試後我們發現,在辨識是5碼or6碼的模型中,使用真實驗證碼(```train_cnn_real_56.py```)訓練出來的模型的準確率是比使用模仿的(```train_cnn_imitate_56.py```)還要稍微高一些些的。 457 | 458 | 而另外兩個真實驗證碼訓練出來的模型(```train_cnn_real_5.py```及```train_cnn_real_6.py```),在英文字母的辨識上辨識率可以說是接近0%,但其實每個digit的辨識率應該都還有90%左右,不過因為乘上同時5/6碼,整體的辨識率就很低了。 459 | 460 | 461 | ## 4.-Issue & Todo 462 | 1. 更新英文readme。 463 | 2. 重寫captcha_gen.py,有點亂。 464 | 3. 嘗試了使用Conditional-DCGAN產生驗證碼,但一直發生mode collapse,之後有空再弄看看了。 465 | 4. 嘗試了使用Capsule Network但效果不太好...等有空的時候再整理放上來。 466 | 467 | 468 | ## 5.-Reference 469 | 1. An overview of gradient descent optimization algorithms - http://ruder.io/optimizing-gradient-descent/index.html 470 | 2. SGD,Adagrad,Adadelta,Adam等优化方法总结和比较 - http://ycszen.github.io/2016/08/24/SGD%EF%BC%8CAdagrad%EF%BC%8CAdadelta%EF%BC%8CAdam%E7%AD%89%E4%BC%98%E5%8C%96%E6%96%B9%E6%B3%95%E6%80%BB%E7%BB%93%E5%92%8C%E6%AF%94%E8%BE%83/ 471 | 3. Going Deeper with Convolutions - http://arxiv.org/abs/1409.4842 472 | 473 | ## 6.-Contact 474 | 如果有任何建議或問題,請不吝發Issue或mail(jason860421gmail.com)給我。 475 | 476 | 477 | -------------------- 478 | #### english version 479 | ## Note: English version currently is old version! 480 | # simple-railway-captcha-solver 481 | ![image](./readme_img/old/1.jpeg) 482 | This project uses simple convolution neural network to implement solving the captcha(as above, in Taiwan railway booking website).The training set is generated by imitating the style of captcha, and the validation set is crawling from the booking site and labeled manually for about 1000 records. 483 | 484 | Currently, the accuracy of a single digit on the validation set is about ```98.84%```, and overall accuracy is ```91.13%``` (Successfully recognize 6-digits at once). 485 | 486 | |Name|Description| 487 | |----|----| 488 | |captcha_gen.py|Generating training set by imitating the style of captcha.| 489 | |train_cnn.py |Building the model and train it.| 490 | |demo_solver.py|Demo:Load the model and solve the captcha.| 491 | 492 | ## Dependecies 493 | |Name|Version| 494 | |----|----| 495 | |tensorflow|1.4.0| 496 | |tensorflow-gpu|1.4.0| 497 | |tensorflow-tensorboard|0.4.0rc3| 498 | |Keras|2.1.2| 499 | |h5py|2.7.1| 500 | |Pillow|4.3.0| 501 | |numpy|1.13.3| 502 | 503 | ## Training set? 504 | It is not difficult for building a CNN model to solve a captcha, but where and how do we get a labeled training set? 505 | 506 | ![image](./readme_img/old/2.jpeg)![image](./readme_img/old/3.jpeg)![image](./readme_img/old/4.jpeg) 507 | 508 | We can write a program to crawl thousands of captcha image, and labeled it manually, but it's a time-consuming job! Maybe we can try to generate some captcha image by imitating it. 509 | But of course, the image we generate should be really close to the real, otherwise, the accuracy on validation set will really bad. 510 | 511 | ## Generate training set 512 | 513 | Firstly we have to observe the captcha, it's easy to find that the captcha is made up of two primary elements: 514 | - ```5 ~ 6 digits``` number and the text size is not same. Furthermore, they are being rotated, and the color is floating. 515 | - The color of background is floating, and there have some white and black interference lines, and some of them will overlay on the number. 516 | 517 | And more...: 518 | - The angle of rotation is between about ```-55 ~ 55 degrees```, and the size is about ```25 ~ 27pt```. 519 | - We can found that one number has not only one style, so we guess that there have two fonts randomly in change. The first one obviously is ```Courier New-Bold```, and the second one is ```Times New Roman-Bold```.(You can use software such as Photoshop to cross-comparison.) 520 | - About the range of background and text color, we can through the color quantization such as k-means to get color of every background and text, and so we can calculate the color range.(I used k-means in opencv to implement.) 521 | - The color range(R/G/B) of the background is between about ```180 ~ 250```, and text is between ```10 ~ 140```. 522 | - Those interference lines form a rectangle, they have two styles: left and up sides are black, right and down sides are white, and vice versa.(you can also treat them as be rotated 180 degrees). 523 | - The number of the rectangle is between about ```30 ~ 32```, randomly distribute on captcha image, and the width and height is between about ```5 ~ 21px```. Besides, there has 40% white line will overlay on the number, and about 20% by the black line. 524 | 525 | With these observation, we are about to generate training set! Now, let's observe where the number place on the image: 526 | 527 | ![image](./readme_img/old/5.PNG)![image](./readme_img/old/6.PNG)![image](./readme_img/old/7.PNG) 528 | 529 | From these images we can find that the text(number) are not regularly distributed on the image, we can guess that the text is randomly moved left or right after a rotation. There has even some text overlap together, so we can't crop the image and process only one number at a time. 530 | 531 | Above is the captcha rule we simply observed. The implement of training set generate is in ```captcha_gen.py```, you can try to implement it in your own way. 532 | 533 | ![image](./readme_img/old/8.jpg) 534 | 535 | The generator finally will output 50,000 captcha image and a csv labeled answer. 536 | 537 | ![image](./readme_img/old/9.PNG)![image](./readme_img/old/10.PNG) 538 | 539 | 540 | ## Building Convolution Neural Network 541 | 542 | Let's build a simple CNN model! 543 | 544 | The input is ```60*200``` image, it has 3 channel(R/G/B), so the shape is ```(60, 200, 3)```. 545 | 546 | Firstly, the input through many convolution layers activated by ReLU function to capture feature, and perform downsampling by Max pooling layer, and then get into Dropout layer(randomly drop out some unit to avoid overfitting) and Flatten layer. Finally, they output to the full connect layer: 6 Softmax regression classifiers, each with 10 neurons. 547 | 548 | ```python 549 | tensor_in = Input((60, 200, 3)) 550 | out = tensor_in 551 | out = Conv2D(filters=32, kernel_size=(3, 3), padding='same', activation='relu')(out) 552 | out = Conv2D(filters=32, kernel_size=(3, 3), activation='relu')(out) 553 | out = MaxPooling2D(pool_size=(2, 2))(out) 554 | out = Conv2D(filters=64, kernel_size=(3, 3), padding='same', activation='relu')(out) 555 | out = Conv2D(filters=64, kernel_size=(3, 3), activation='relu')(out) 556 | out = MaxPooling2D(pool_size=(2, 2))(out) 557 | out = Conv2D(filters=128, kernel_size=(3, 3), padding='same', activation='relu')(out) 558 | out = Conv2D(filters=128, kernel_size=(3, 3), activation='relu')(out) 559 | out = MaxPooling2D(pool_size=(2, 2))(out) 560 | out = Conv2D(filters=256, kernel_size=(3, 3), activation='relu')(out) 561 | out = MaxPooling2D(pool_size=(2, 2))(out) 562 | out = Flatten()(out) 563 | out = Dropout(0.5)(out) 564 | out = [Dense(10, name='digit1', activation='softmax')(out),\ 565 | Dense(10, name='digit2', activation='softmax')(out),\ 566 | Dense(10, name='digit3', activation='softmax')(out),\ 567 | Dense(10, name='digit4', activation='softmax')(out),\ 568 | Dense(10, name='digit5', activation='softmax')(out),\ 569 | Dense(10, name='digit6', activation='softmax')(out)] 570 | model = Model(inputs=tensor_in, outputs=out) 571 | ``` 572 | 573 | Now we can do the next step: compile the model: loss use ```categorical_crossentropy```, optimizer* use ```Adamax```, and metrics is ```accuracy```. 574 | ```python 575 | model.compile(loss='categorical_crossentropy', optimizer='Adamax', metrics=['accuracy']) 576 | ``` 577 | 578 | *About the choice of an optimizer, you can refer below: 579 | 1. An overview of gradient descent optimization algorithms - http://ruder.io/optimizing-gradient-descent/index.html 580 | 2. SGD,Adagrad,Adadelta,Adam等优化方法总结和比较 - http://ycszen.github.io/2016/08/24/SGD%EF%BC%8CAdagrad%EF%BC%8CAdadelta%EF%BC%8CAdam%E7%AD%89%E4%BC%98%E5%8C%96%E6%96%B9%E6%B3%95%E6%80%BB%E7%BB%93%E5%92%8C%E6%AF%94%E8%BE%83/ 581 | ** 582 | 583 | -- 584 | 585 | Okay! Now we have finished the design of the model, let's see the summary of model: 586 | 587 | ```python 588 | model.summary() 589 | 590 | __________________________________________________________________________________________________ 591 | Layer (type) Output Shape Param # Connected to 592 | ====================================================== 593 | input_1 (InputLayer) (None, 60, 200, 3) 0 594 | __________________________________________________________________________________________________ 595 | conv2d_1 (Conv2D) (None, 60, 200, 32) 896 input_1[0][0] 596 | __________________________________________________________________________________________________ 597 | conv2d_2 (Conv2D) (None, 58, 198, 32) 9248 conv2d_1[0][0] 598 | __________________________________________________________________________________________________ 599 | max_pooling2d_1 (MaxPooling2D) (None, 29, 99, 32) 0 conv2d_2[0][0] 600 | __________________________________________________________________________________________________ 601 | conv2d_3 (Conv2D) (None, 29, 99, 64) 18496 max_pooling2d_1[0][0] 602 | __________________________________________________________________________________________________ 603 | conv2d_4 (Conv2D) (None, 27, 97, 64) 36928 conv2d_3[0][0] 604 | __________________________________________________________________________________________________ 605 | max_pooling2d_2 (MaxPooling2D) (None, 13, 48, 64) 0 conv2d_4[0][0] 606 | __________________________________________________________________________________________________ 607 | conv2d_5 (Conv2D) (None, 13, 48, 128) 73856 max_pooling2d_2[0][0] 608 | __________________________________________________________________________________________________ 609 | conv2d_6 (Conv2D) (None, 11, 46, 128) 147584 conv2d_5[0][0] 610 | __________________________________________________________________________________________________ 611 | max_pooling2d_3 (MaxPooling2D) (None, 5, 23, 128) 0 conv2d_6[0][0] 612 | __________________________________________________________________________________________________ 613 | conv2d_7 (Conv2D) (None, 3, 21, 256) 295168 max_pooling2d_3[0][0] 614 | __________________________________________________________________________________________________ 615 | max_pooling2d_4 (MaxPooling2D) (None, 1, 10, 256) 0 conv2d_7[0][0] 616 | __________________________________________________________________________________________________ 617 | flatten_1 (Flatten) (None, 2560) 0 max_pooling2d_4[0][0] 618 | __________________________________________________________________________________________________ 619 | dropout_1 (Dropout) (None, 2560) 0 flatten_1[0][0] 620 | __________________________________________________________________________________________________ 621 | digit1 (Dense) (None, 10) 25610 dropout_1[0][0] 622 | __________________________________________________________________________________________________ 623 | digit2 (Dense) (None, 10) 25610 dropout_1[0][0] 624 | __________________________________________________________________________________________________ 625 | digit3 (Dense) (None, 10) 25610 dropout_1[0][0] 626 | __________________________________________________________________________________________________ 627 | digit4 (Dense) (None, 10) 25610 dropout_1[0][0] 628 | __________________________________________________________________________________________________ 629 | digit5 (Dense) (None, 10) 25610 dropout_1[0][0] 630 | __________________________________________________________________________________________________ 631 | digit6 (Dense) (None, 10) 25610 dropout_1[0][0] 632 | ======================================================= 633 | Total params: 735,836 634 | Trainable params: 735,836 635 | Non-trainable params: 0 636 | ``` 637 | 638 | ![image](./readme_img/old/11.png) 639 | 640 | ## Load the training set 641 | Before train the model, we have to load the data into memory. 642 | 643 | Firstly we have to process X part: feature(our captcha image). 644 | The data we input to CNN should be numpy array type, so we use Pillow to read image and convert it to numpy array. 645 | 646 | ```python 647 | for index in range(1, 50001, 1) 648 | image = Image.open("./data/train_set/" + str(index) + ".jpg") #Load our image 649 | nparr = np.array(image) # Convert to numpy array 650 | nparr = nparr / 255.0 651 | ``` 652 | 653 | The shape of nparr is ```(60, 200, 3)```, it's same as the input we just designed in the model. 654 | And we plan to use 50,000 captcha image to train the model, so the input shape to CNN will be ```(50000, 60, 200, 3)```. Use numpy.stack to merge them all: 655 | 656 | ```python 657 | train_data = np.stack([np.array(Image.open("./data/train_set/" + str(index) + ".jpg"))/255.0 for index in range(1, 50001, 1)]) 658 | ``` 659 | 660 | Now, the shape of train_data is ```(50000, 60, 200, 3)```。 661 | 662 | The next is Y part, label: the answer of the training set. 663 | Because the model is multi-output(6 softmax regression classifier), so the Y should be a list containing 6 numpy array, like this: 664 | ``` 665 | [[First digit of first image,..., First digit of last image], [Second digit of first image,..., Second digit of last image], [...], [...], [...], [...]] 666 | ``` 667 | And every digit is present as one-hot encoding, for example 0 is ```[1, 0, 0, 0, ....,0]```, 2 is```[0, 0, 1, 0, ....,0]``` 668 | 669 | ```python 670 | traincsv = open('./data/train_set/train.csv', 'r', encoding = 'utf8') 671 | read_label = [toonehot(row[1]) for row in csv.reader(traincsv)] 672 | train_label = [[] for _ in range(6)] 673 | for arr in read_label: 674 | for index in range(6): 675 | train_label[index].append(arr[index]) 676 | train_label = [arr for arr in np.asarray(train_label)] 677 | ``` 678 | 679 | ## Validation set 680 | The validation set is real captcha image crawl from the railway booking website and labeled manually. Load the data as same as above, and X(feature(image)) put in ```vali_data```, Y(label) in ```vali_label```. 681 | 682 | ## Callback 683 | We are using 3 callbacks: 684 | 685 | ### 1.ModelCheckPoint 686 | 687 | ```python 688 | checkpoint = ModelCheckpoint(filepath, monitor='val_digit6_acc', verbose=1, save_best_only=True, mode='max') 689 | ``` 690 | For saving best accuracy model, it will check after every epoch, and save the model to filepath if the accuracy is better than before. 691 | 692 | ### 2.Earlystopping 693 | 694 | ```python 695 | earlystop = EarlyStopping(monitor='val_loss', patience=2, verbose=1, mode='auto') 696 | ``` 697 | The monitor set to ```val_loss```, ```patience``` set to 2, that is, if the loss of validation set didn't improve twice in a row, training will be stopped. 698 | 699 | ### 3.TensorBoard 700 | 701 | ```python 702 | tensorBoard = TensorBoard(log_dir = "./logs", histogram_freq = 1) 703 | ``` 704 | TensorBoard is a great visualization tool, we can use it to view our training result. 705 | 706 | ```python 707 | tensorboard --logdir=logs 708 | ``` 709 | to start it. 710 | 711 | 712 | Finally, put them into a list. 713 | ```python 714 | callbacks_list = [tensorBoard, earlystop, checkpoint] 715 | ``` 716 | 717 | ## Training the model 718 | We have prepared everything we need so far, now we can start training the model! 719 | (If you got Resource exhausted error, try to reduce ```batch_size```.) 720 | 721 | ```python 722 | model.fit(train_data, train_label, batch_size=400, epochs=50, verbose=2, validation_data=(vali_data, vali_label), callbacks=callbacks_list) 723 | ``` 724 | 725 | ## Result 726 | After 15 epochs, the training was stopped by EarlyStopping. 727 | The accuracy for a single digit on the validation set is about ```98.84%```, and overall accuracy is ```91.13%``` (Successfully recognize 6-digits at once). 728 | 729 | ![image](./readme_img/old/12.PNG) 730 | 731 | ## Issue & Todo 732 | 1. Currently unable to solve 5-digits captcha. Maybe implement by CNN + RNN. 733 | 2. Improve the grammar and everything in English version README. 734 | 3. Re-write captcha_gen.py in better way。 735 | 4. Try to use GAN(Generative Adversarial Network) generate the training set. 736 | 5. Try to create a new model with capsule network. 737 | 738 | ## Reference 739 | 1. An overview of gradient descent optimization algorithms - http://ruder.io/optimizing-gradient-descent/index.html 740 | 2. SGD,Adagrad,Adadelta,Adam等优化方法总结和比较 - http://ycszen.github.io/2016/08/24/SGD%EF%BC%8CAdagrad%EF%BC%8CAdadelta%EF%BC%8CAdam%E7%AD%89%E4%BC%98%E5%8C%96%E6%96%B9%E6%B3%95%E6%80%BB%E7%BB%93%E5%92%8C%E6%AF%94%E8%BE%83/ 741 | 3. Going Deeper with Convolutions - http://arxiv.org/abs/1409.4842 742 | -------------------------------------------------------------------------------- /captcha_gen.py: -------------------------------------------------------------------------------- 1 | from PIL import Image, ImageDraw, ImageFont 2 | from random import randint 3 | import csv 4 | import numpy as np 5 | FONTPATH = ["./data/font/times-bold.ttf", "./data/font/courier-bold.ttf"] 6 | ENGSTR = "ABCDEFGHJKLMNPQRSTUVWXYZ" # 沒有O和I 7 | LETTERSTR = "0123456789ABCDEFGHJKLMNPQRSTUVWXYZ" 8 | 9 | 10 | class rect: 11 | def __init__(self): 12 | self.size = (randint(5, 21), randint(5, 21)) 13 | self.location = (randint(1, 199), randint(1, 59)) 14 | self.luoverlay = True if randint(1, 10) > 6 else False 15 | self.rdoverlay = False if self.luoverlay else True if randint(1, 10) > 8 else False 16 | self.lucolor = 0 if randint(0, 1) else 255 17 | self.rdcolor = 0 if self.lucolor == 255 else 255 18 | self.ludrawn = False 19 | self.rddrawn = False 20 | self.pattern = randint(0, 1) 21 | 22 | 23 | def draw(self, image, overlay): 24 | if((overlay or not self.luoverlay) and not self.ludrawn): 25 | self.ludrawn = True 26 | stp = self.location 27 | transparent = int(255 * 0.45 if self.lucolor == 0 else 255 * 0.8) 28 | color = (self.lucolor, self.lucolor, self.lucolor, transparent) 29 | uline = Image.new("RGBA", (self.size[0], 1), color) 30 | lline = Image.new("RGBA", (1, self.size[1]), color) 31 | image.paste(uline, stp, uline) 32 | image.paste(lline, stp, lline) 33 | if((overlay or not self.rdoverlay) and not self.rddrawn): 34 | self.rddrawn = True 35 | dstp = (self.location[0], self.location[1] + self.size[1]) 36 | rstp = (self.location[0] + self.size[0], self.location[1]) 37 | transparent = int(255 * 0.45 if self.rdcolor == 0 else 255 * 0.8) 38 | color = (self.rdcolor, self.rdcolor, self.rdcolor, transparent) 39 | dline = Image.new("RGBA", (self.size[0], 1), color) 40 | rline = Image.new("RGBA", (1, self.size[1]), color) 41 | image.paste(dline, dstp, dline) 42 | image.paste(rline, rstp, rline) 43 | 44 | 45 | class captchatext: 46 | def __init__(self, priority, offset, captchalen, engletter, ENGNOLIMIT): 47 | self.engletter = engletter 48 | if ENGNOLIMIT: 49 | engletter = True if randint(1, 34) <= 24 else False 50 | if engletter: 51 | self.letter = ENGSTR[randint(0, len(ENGSTR) - 1)] 52 | else: 53 | self.letter = str(randint(0, 9)) 54 | self.color = [randint(10, 140) for _ in range(3)] 55 | self.angle = randint(-55, 55) 56 | self.priority = priority 57 | self.offset = offset 58 | self.next_offset = 0 59 | self.captchalen = captchalen 60 | 61 | 62 | def draw(self, image): 63 | color = (self.color[0], self.color[1], self.color[2], 255) 64 | font = ImageFont.truetype(FONTPATH[randint(0, 1)], randint(25, 27) * 10) 65 | text = Image.new("RGBA", (font.getsize(self.letter)[0], 300), (0, 0, 0, 0)) 66 | textdraw = ImageDraw.Draw(text) 67 | textdraw.text((0, 0), self.letter, font=font, fill=color) 68 | text = text.rotate(self.angle, expand=True) 69 | text = text.resize((int(text.size[0] / 10), int(text.size[1] / 10))) 70 | base = int(self.priority * (200 / self.captchalen)) 71 | rand_min = (self.offset - base - 4) if (self.offset - base - 4) >= -15 else -15 72 | rand_min = 0 if self.priority == 0 else rand_min 73 | avg_dp = int(200 / self.captchalen) 74 | rand_max = (avg_dp - text.size[0]) if self.priority == self.captchalen - 1 else (avg_dp - text.size[0] + 10) 75 | try: 76 | displace = randint(rand_min, rand_max) 77 | except: 78 | displace = rand_max 79 | location = (base + displace, randint(3, 23)) 80 | self.next_offset = location[0] + text.size[0] 81 | image.paste(text, location, text) 82 | 83 | 84 | def generate(GENNUM, SAVEPATH, ENGP=25, FIVEP=0, ENGNOLIMIT=False, filename="train"): 85 | captchacsv = open(SAVEPATH + "captcha_{:s}.csv".format(filename), 'w', encoding = 'utf8', newline = '') 86 | lencsv = open(SAVEPATH + "len_{:s}.csv".format(filename), 'w', encoding = 'utf8', newline = '') 87 | letterlist = [] 88 | lenlist = [] 89 | for index in range(1, GENNUM + 1, 1): 90 | captchastr = "" 91 | captchalen = 5 if randint(1, 100) <= FIVEP else 6 92 | engat = randint(0, captchalen - 1) if randint(1, 100) <= ENGP else -1 93 | bgcolor = [randint(180, 250) for _ in range(3)] 94 | captcha = Image.new('RGBA', (200, 60), (bgcolor[0], bgcolor[1], bgcolor[2], 255)) 95 | rectlist = [rect() for _ in range(32)] 96 | for obj in rectlist: 97 | obj.draw(image=captcha, overlay=False) 98 | offset = 0 99 | for i in range(captchalen): 100 | newtext = captchatext(i, offset, captchalen, (True if engat == i else False), ENGNOLIMIT) 101 | newtext.draw(image=captcha) 102 | offset = newtext.next_offset 103 | captchastr += str(newtext.letter) 104 | letterlist.append([str(index).zfill(len(str(GENNUM))), captchastr]) 105 | lenlist.append([str(index).zfill(len(str(GENNUM))), captchalen]) 106 | for obj in rectlist: 107 | obj.draw(image=captcha, overlay=True) 108 | captcha.convert("RGB").save(SAVEPATH + str(index).zfill(len(str(GENNUM))) + ".jpg", "JPEG") 109 | writer = csv.writer(captchacsv) 110 | writer.writerows(letterlist) 111 | writer = csv.writer(lencsv) 112 | writer.writerows(lenlist) 113 | captchacsv.close() 114 | lencsv.close() 115 | 116 | 117 | if __name__ == "__main__": 118 | generate(50000, "./data/56_imitate_train_set/", ENGP=100, FIVEP=50, ENGNOLIMIT=True, filename="train") 119 | generate(10240, "./data/56_imitate_vali_set/", ENGP=100, FIVEP=50, ENGNOLIMIT=True, filename="vali") 120 | generate(50000, "./data/5_imitate_train_set/", ENGP=100, FIVEP=100, ENGNOLIMIT=True, filename="train") 121 | generate(10240, "./data/5_imitate_vali_set/", ENGP=100, FIVEP=100, ENGNOLIMIT=True, filename="vali") 122 | generate(50000, "./data/6_imitate_train_set/", ENGP=100, FIVEP=0, ENGNOLIMIT=True, filename="train") 123 | generate(10240, "./data/6_imitate_vali_set/", ENGP=100, FIVEP=0, ENGNOLIMIT=True, filename="vali") 124 | -------------------------------------------------------------------------------- /captcha_scrawl.py: -------------------------------------------------------------------------------- 1 | import shutil 2 | import requests 3 | import time 4 | SAVEPATH = "./data/manual_label/" 5 | url = 'http://railway1.hinet.net/ImageOut.jsp' 6 | for i in range(1, 3000): 7 | response = requests.get(url, stream=True) 8 | with open(SAVEPATH + str(i) + '.jpg', 'wb') as out_file: 9 | shutil.copyfileobj(response.raw, out_file) 10 | del response 11 | time.sleep(0.5) 12 | -------------------------------------------------------------------------------- /data_augment.py: -------------------------------------------------------------------------------- 1 | from keras.preprocessing.image import ImageDataGenerator 2 | from PIL import Image 3 | import numpy as np 4 | import csv 5 | 6 | outputcsv = open('./data/6_real_train_set/captcha_train.csv', 'w', encoding = 'utf8', newline = '') # 輸出csv 7 | inputcsv = open('./data/manual_label/captcha_vali.csv', 'r', encoding = 'utf8') 8 | data = [np.array(Image.open('./data/manual_label/' + row[0] + ".jpg")) for row in csv.reader(inputcsv) if len(row[1]) == 6] # 只讀答案是6位的 9 | inputcsv = open('./data/manual_label/captcha_vali.csv', 'r', encoding = 'utf8') 10 | oldanswer = [row[1] for row in csv.reader(inputcsv) if len(row[1]) == 6] # 只讀答案是6位的 11 | answer = [] 12 | datagen = ImageDataGenerator(rotation_range=5,shear_range=0.2,zoom_range=0.05,fill_mode='nearest') 13 | index, augmentindex, oldanswerindex = 0, 0, 0 14 | for img in data: 15 | for batch in datagen.flow(np.asarray([img]), batch_size=1): 16 | index += 1 17 | augmentindex += 1 18 | batch = batch.reshape((60,200,3)) 19 | Image.fromarray(np.uint8(batch)).convert("RGB").save("./data/6_real_train_set/" + str(index) + ".jpg", "JPEG") 20 | answer.append((str(index), oldanswer[oldanswerindex])) 21 | if augmentindex >= 50: # 每張產生50個 22 | oldanswerindex += 1 23 | augmentindex = 0 24 | break 25 | csv.writer(outputcsv).writerows(answer) 26 | -------------------------------------------------------------------------------- /demo_cnn.py: -------------------------------------------------------------------------------- 1 | from keras.models import load_model 2 | from keras.models import Model 3 | from keras import backend as K 4 | from PIL import Image 5 | import numpy as np 6 | import os 7 | import csv 8 | LETTERSTR = "0123456789ABCDEFGHJKLMNPQRSTUVWXYZ" 9 | 10 | 11 | def toonehot(text): 12 | labellist = [] 13 | for letter in text: 14 | onehot = [0 for _ in range(34)] 15 | num = LETTERSTR.find(letter) 16 | onehot[num] = 1 17 | labellist.append(onehot) 18 | return labellist 19 | 20 | 21 | print("Loading test data...") 22 | testcsv = open('./data/manual_label/captcha_test.csv', 'r', encoding = 'utf8') 23 | test_data = np.stack([np.array(Image.open("./data/manual_label/" + row[0] + ".jpg"))/255.0 for row in csv.reader(testcsv)]) 24 | testcsv = open('./data/manual_label/captcha_test.csv', 'r', encoding = 'utf8') 25 | test_label = [row[1] for row in csv.reader(testcsv)] 26 | print("Loading model...") 27 | K.clear_session() 28 | model = None 29 | model5 = load_model("./data/model/imitate_5_model.h5") 30 | model6 = load_model("./data/model/imitate_6_model.h5") 31 | model56 = load_model("./data/model/real_56_model.h5") 32 | print("Predicting...") 33 | prediction56 = [6 if arr[0] > 0.5 else 5 for arr in model56.predict(test_data)] # 5/6碼分類 34 | prediction5 = model5.predict(test_data) # 5碼 35 | prediction6 = model6.predict(test_data) # 6碼 36 | 37 | # 以下計算各個模型各個字元辨識率等等,有點亂,以後有空再整理 38 | total, total5, total6 = len(prediction56), 0, 0 39 | correct5, correct6, correct56, correct = 0, 0, 0, 0 40 | correct5digit, correct6digit = [0 for _ in range(5)], [0 for _ in range(6)] 41 | totalalpha, correctalpha = len([1 for ans in test_label for char in ans if char.isalpha()]), 0 42 | for i in range(total): 43 | checkcorrect = True 44 | if prediction56[i] == len(test_label[i]): 45 | correct56 += 1 46 | else: 47 | checkcorrect = False 48 | if prediction56[i] == 5: 49 | total5 += 1 50 | allequal = True 51 | for char in range(5): 52 | if LETTERSTR[np.argmax(prediction5[char][i])] == test_label[i][char]: 53 | correct5digit[char] += 1 54 | correctalpha += 1 if LETTERSTR[np.argmax(prediction5[char][i])].isalpha() else 0 55 | else: 56 | allequal = False 57 | if allequal: 58 | correct5 += 1 59 | else: 60 | checkcorrect = False 61 | else: 62 | total6 += 1 63 | allequal = True 64 | for char in range(6): 65 | if LETTERSTR[np.argmax(prediction6[char][i])] == test_label[i][char]: 66 | correct6digit[char] += 1 67 | correctalpha += 1 if LETTERSTR[np.argmax(prediction6[char][i])].isalpha() else 0 68 | else: 69 | allequal = False 70 | if allequal: 71 | correct6 += 1 72 | else: 73 | checkcorrect = False 74 | if checkcorrect: 75 | correct += 1 76 | 77 | print("5 or 6 model acc:{:.4f}%".format(correct56/total*100)) # 5/6模型acc 78 | print("---------------------------") 79 | print("5digits model acc:{:.4f}%".format(correct5/total5*100)) # 5模型acc 80 | for i in range(5): 81 | print("digit{:d} acc:{:.4f}%".format(i+1, correct5digit[i]/total5*100)) # 5模型各字元acc 82 | print("---------------------------") 83 | print("6digits model acc:{:.4f}%".format(correct6/total6*100)) # 6模型acc 84 | for i in range(6): 85 | print("digit{:d} acc:{:.4f}%".format(i+1, correct6digit[i]/total6*100)) # 6模型各字元acc 86 | print("---------------------------") 87 | print("alpha acc:{:.4f}%".format(correctalpha/totalalpha*100)) # 整體英文字acc 88 | -------------------------------------------------------------------------------- /demo_online.py: -------------------------------------------------------------------------------- 1 | from selenium import webdriver 2 | from selenium.common.exceptions import TimeoutException 3 | from selenium.webdriver.common.by import By 4 | from selenium.webdriver.support.ui import WebDriverWait 5 | from selenium.webdriver.support import expected_conditions as EC 6 | import numpy as np 7 | from PIL import Image 8 | from keras.models import load_model, Model 9 | import time 10 | import random 11 | IDNumber = "X123456789" # 填入你的身分證字號 12 | model = None 13 | model5 = load_model("./data/model/imitate_5_model.h5") # 辨識5碼的Model 14 | model6 = load_model("./data/model/imitate_6_model.h5") # 辨識6碼的Model 15 | model56 = load_model("./data/model/real_56_model.h5") # 辨識是5碼or6碼的Model 16 | LETTERSTR = "0123456789ABCDEFGHJKLMNPQRSTUVWXYZ" 17 | driver = webdriver.Chrome("./data/chromedriver.exe") # chromedriver 路徑 18 | correct, wrong = 0, 0 19 | 20 | for _ in range(1000):# 跑1000次 21 | driver.get('http://railway1.hinet.net/Foreign/TW/ecsearch.html') 22 | id_textbox = driver.find_element_by_id('person_id') 23 | id_textbox.send_keys(IDNumber) 24 | button = driver.find_element_by_css_selector('body > div.container > div.row.contents > div > form > div > div.col-xs-12 > button') 25 | button.click() 26 | driver.save_screenshot('tmp.png') 27 | location = driver.find_element_by_id('idRandomPic').location 28 | x, y = location['x'] + 5, location['y'] + 5 29 | img = Image.open('tmp.png') 30 | captcha = img.crop((x, y, x+200, y+60)) 31 | captcha.convert("RGB").save('captcha.jpg', 'JPEG') 32 | # check is 5 or 6 digits 33 | p56 = model56.predict(np.stack([np.array(Image.open('captcha.jpg'))/255.0]))[0][0] 34 | if p56 > 0.5: 35 | model = model6 36 | else: 37 | model = model5 38 | prediction = model.predict(np.stack([np.array(Image.open('captcha.jpg'))/255.0])) 39 | answer = "" 40 | for predict in prediction: 41 | answer += LETTERSTR[np.argmax(predict[0])] 42 | captcha_textbox = driver.find_element_by_id('randInput') 43 | captcha_textbox.send_keys(answer) 44 | driver.find_element_by_id('sbutton').click() 45 | if "亂數號碼錯誤" in driver.page_source: 46 | wrong += 1 47 | else: 48 | correct += 1 49 | print("{:.4f}% (Correct{:d}-Wrong{:d})".format(correct/(correct+wrong)*100, correct, wrong)) 50 | time.sleep(3) 51 | -------------------------------------------------------------------------------- /readme_img/captcha_sample1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JasonLiTW/simple-railway-captcha-solver/c62f0836d6a321cf9443ce94cb817892cf6ac7d0/readme_img/captcha_sample1.jpg -------------------------------------------------------------------------------- /readme_img/captcha_sample2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JasonLiTW/simple-railway-captcha-solver/c62f0836d6a321cf9443ce94cb817892cf6ac7d0/readme_img/captcha_sample2.jpg -------------------------------------------------------------------------------- /readme_img/captcha_sample3.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JasonLiTW/simple-railway-captcha-solver/c62f0836d6a321cf9443ce94cb817892cf6ac7d0/readme_img/captcha_sample3.jpg -------------------------------------------------------------------------------- /readme_img/captcha_sample4.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JasonLiTW/simple-railway-captcha-solver/c62f0836d6a321cf9443ce94cb817892cf6ac7d0/readme_img/captcha_sample4.jpg -------------------------------------------------------------------------------- /readme_img/captcha_seperate1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JasonLiTW/simple-railway-captcha-solver/c62f0836d6a321cf9443ce94cb817892cf6ac7d0/readme_img/captcha_seperate1.png -------------------------------------------------------------------------------- /readme_img/captcha_seperate2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JasonLiTW/simple-railway-captcha-solver/c62f0836d6a321cf9443ce94cb817892cf6ac7d0/readme_img/captcha_seperate2.png -------------------------------------------------------------------------------- /readme_img/captcha_seperate3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JasonLiTW/simple-railway-captcha-solver/c62f0836d6a321cf9443ce94cb817892cf6ac7d0/readme_img/captcha_seperate3.png -------------------------------------------------------------------------------- /readme_img/csv.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JasonLiTW/simple-railway-captcha-solver/c62f0836d6a321cf9443ce94cb817892cf6ac7d0/readme_img/csv.png -------------------------------------------------------------------------------- /readme_img/dataaugmentation.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JasonLiTW/simple-railway-captcha-solver/c62f0836d6a321cf9443ce94cb817892cf6ac7d0/readme_img/dataaugmentation.png -------------------------------------------------------------------------------- /readme_img/generate.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JasonLiTW/simple-railway-captcha-solver/c62f0836d6a321cf9443ce94cb817892cf6ac7d0/readme_img/generate.png -------------------------------------------------------------------------------- /readme_img/head.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JasonLiTW/simple-railway-captcha-solver/c62f0836d6a321cf9443ce94cb817892cf6ac7d0/readme_img/head.gif -------------------------------------------------------------------------------- /readme_img/imitate6.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JasonLiTW/simple-railway-captcha-solver/c62f0836d6a321cf9443ce94cb817892cf6ac7d0/readme_img/imitate6.png -------------------------------------------------------------------------------- /readme_img/imitate6_tensorboard.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JasonLiTW/simple-railway-captcha-solver/c62f0836d6a321cf9443ce94cb817892cf6ac7d0/readme_img/imitate6_tensorboard.png -------------------------------------------------------------------------------- /readme_img/imitate_result.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JasonLiTW/simple-railway-captcha-solver/c62f0836d6a321cf9443ce94cb817892cf6ac7d0/readme_img/imitate_result.png -------------------------------------------------------------------------------- /readme_img/old/1.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JasonLiTW/simple-railway-captcha-solver/c62f0836d6a321cf9443ce94cb817892cf6ac7d0/readme_img/old/1.jpeg -------------------------------------------------------------------------------- /readme_img/old/10.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JasonLiTW/simple-railway-captcha-solver/c62f0836d6a321cf9443ce94cb817892cf6ac7d0/readme_img/old/10.PNG -------------------------------------------------------------------------------- /readme_img/old/11.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JasonLiTW/simple-railway-captcha-solver/c62f0836d6a321cf9443ce94cb817892cf6ac7d0/readme_img/old/11.png -------------------------------------------------------------------------------- /readme_img/old/12.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JasonLiTW/simple-railway-captcha-solver/c62f0836d6a321cf9443ce94cb817892cf6ac7d0/readme_img/old/12.PNG -------------------------------------------------------------------------------- /readme_img/old/2.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JasonLiTW/simple-railway-captcha-solver/c62f0836d6a321cf9443ce94cb817892cf6ac7d0/readme_img/old/2.jpeg -------------------------------------------------------------------------------- /readme_img/old/3.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JasonLiTW/simple-railway-captcha-solver/c62f0836d6a321cf9443ce94cb817892cf6ac7d0/readme_img/old/3.jpeg -------------------------------------------------------------------------------- /readme_img/old/4.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JasonLiTW/simple-railway-captcha-solver/c62f0836d6a321cf9443ce94cb817892cf6ac7d0/readme_img/old/4.jpeg -------------------------------------------------------------------------------- /readme_img/old/5.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JasonLiTW/simple-railway-captcha-solver/c62f0836d6a321cf9443ce94cb817892cf6ac7d0/readme_img/old/5.PNG -------------------------------------------------------------------------------- /readme_img/old/6.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JasonLiTW/simple-railway-captcha-solver/c62f0836d6a321cf9443ce94cb817892cf6ac7d0/readme_img/old/6.PNG -------------------------------------------------------------------------------- /readme_img/old/7.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JasonLiTW/simple-railway-captcha-solver/c62f0836d6a321cf9443ce94cb817892cf6ac7d0/readme_img/old/7.PNG -------------------------------------------------------------------------------- /readme_img/old/8.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JasonLiTW/simple-railway-captcha-solver/c62f0836d6a321cf9443ce94cb817892cf6ac7d0/readme_img/old/8.jpg -------------------------------------------------------------------------------- /readme_img/old/9.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JasonLiTW/simple-railway-captcha-solver/c62f0836d6a321cf9443ce94cb817892cf6ac7d0/readme_img/old/9.PNG -------------------------------------------------------------------------------- /train_cnn_imitate_5.py: -------------------------------------------------------------------------------- 1 | from keras.models import Model 2 | from keras.layers import Input, Dense, Dropout, Flatten, Conv2D, MaxPooling2D, BatchNormalization 3 | from keras.callbacks import ModelCheckpoint, EarlyStopping, TensorBoard 4 | from PIL import Image 5 | import numpy as np 6 | import csv 7 | 8 | LETTERSTR = "0123456789ABCDEFGHJKLMNPQRSTUVWXYZ" 9 | 10 | 11 | def toonehot(text): 12 | labellist = [] 13 | for letter in text: 14 | onehot = [0 for _ in range(34)] 15 | num = LETTERSTR.find(letter) 16 | onehot[num] = 1 17 | labellist.append(onehot) 18 | return labellist 19 | 20 | 21 | # Create CNN Model 22 | print("Creating CNN model...") 23 | in = Input((60, 200, 3)) 24 | out = in 25 | out = Conv2D(filters=32, kernel_size=(3, 3), padding='same', activation='relu')(out) 26 | out = Conv2D(filters=32, kernel_size=(3, 3), activation='relu')(out) 27 | out = BatchNormalization()(out) 28 | out = MaxPooling2D(pool_size=(2, 2))(out) 29 | out = Dropout(0.3)(out) 30 | out = Conv2D(filters=64, kernel_size=(3, 3), padding='same', activation='relu')(out) 31 | out = Conv2D(filters=64, kernel_size=(3, 3), activation='relu')(out) 32 | out = BatchNormalization()(out) 33 | out = MaxPooling2D(pool_size=(2, 2))(out) 34 | out = Dropout(0.3)(out) 35 | out = Conv2D(filters=128, kernel_size=(3, 3), padding='same', activation='relu')(out) 36 | out = Conv2D(filters=128, kernel_size=(3, 3), activation='relu')(out) 37 | out = BatchNormalization()(out) 38 | out = MaxPooling2D(pool_size=(2, 2))(out) 39 | out = Dropout(0.3)(out) 40 | out = Conv2D(filters=256, kernel_size=(3, 3), activation='relu')(out) 41 | out = BatchNormalization()(out) 42 | out = MaxPooling2D(pool_size=(2, 2))(out) 43 | out = Flatten()(out) 44 | out = Dropout(0.3)(out) 45 | out = [Dense(34, name='digit1', activation='softmax')(out),\ 46 | Dense(34, name='digit2', activation='softmax')(out),\ 47 | Dense(34, name='digit3', activation='softmax')(out),\ 48 | Dense(34, name='digit4', activation='softmax')(out),\ 49 | Dense(34, name='digit5', activation='softmax')(out)] 50 | model = Model(inputs=in, outputs=out) 51 | model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy']) 52 | model.summary() 53 | 54 | print("Reading training data...") 55 | traincsv = open('./data/5_imitate_train_set/captcha_train.csv', 'r', encoding = 'utf8') 56 | train_data = np.stack([np.array(Image.open("./data/5_imitate_train_set/" + row[0] + ".jpg"))/255.0 for row in csv.reader(traincsv)]) 57 | traincsv = open('./data/5_imitate_train_set/captcha_train.csv', 'r', encoding = 'utf8') 58 | read_label = [toonehot(row[1]) for row in csv.reader(traincsv)] 59 | train_label = [[] for _ in range(5)] 60 | for arr in read_label: 61 | for index in range(5): 62 | train_label[index].append(arr[index]) 63 | train_label = [arr for arr in np.asarray(train_label)] 64 | print("Shape of train data:", train_data.shape) 65 | 66 | print("Reading validation data...") 67 | valicsv = open('./data/5_imitate_vali_set/captcha_vali.csv', 'r', encoding = 'utf8') 68 | vali_data = np.stack([np.array(Image.open("./data/5_imitate_vali_set/" + row[0] + ".jpg"))/255.0 for row in csv.reader(valicsv)]) 69 | valicsv = open('./data/5_imitate_vali_set/captcha_vali.csv', 'r', encoding = 'utf8') 70 | read_label = [toonehot(row[1]) for row in csv.reader(valicsv)] 71 | vali_label = [[] for _ in range(5)] 72 | for arr in read_label: 73 | for index in range(5): 74 | vali_label[index].append(arr[index]) 75 | vali_label = [arr for arr in np.asarray(vali_label)] 76 | print("Shape of validation data:", vali_data.shape) 77 | 78 | filepath="./data/model/imitate_5_model.h5" 79 | checkpoint = ModelCheckpoint(filepath, monitor='val_digit5_acc', verbose=1, save_best_only=True, mode='max') 80 | earlystop = EarlyStopping(monitor='val_digit5_acc', patience=5, verbose=1, mode='auto') 81 | tensorBoard = TensorBoard(log_dir = "./logs", histogram_freq = 1) 82 | callbacks_list = [checkpoint, earlystop, tensorBoard] 83 | model.fit(train_data, train_label, batch_size=400, epochs=100, verbose=2, validation_data=(vali_data, vali_label), callbacks=callbacks_list) 84 | -------------------------------------------------------------------------------- /train_cnn_imitate_56.py: -------------------------------------------------------------------------------- 1 | from keras.models import Model 2 | from keras.layers import Input, Dense, Dropout, Flatten, Conv2D, MaxPooling2D, BatchNormalization 3 | from keras.callbacks import ModelCheckpoint, EarlyStopping, TensorBoard 4 | from PIL import Image 5 | import numpy as np 6 | import csv 7 | 8 | 9 | # Create CNN Model 10 | print("Creating CNN model...") 11 | in = Input((60, 200, 3)) 12 | out = in 13 | out = Conv2D(filters=32, kernel_size=(3, 3), padding='same', activation='relu')(out) 14 | out = Conv2D(filters=32, kernel_size=(3, 3), activation='relu')(out) 15 | out = BatchNormalization()(out) 16 | out = MaxPooling2D(pool_size=(2, 2))(out) 17 | out = Dropout(0.5)(out) 18 | out = Conv2D(filters=64, kernel_size=(3, 3), padding='same', activation='relu')(out) 19 | out = Conv2D(filters=64, kernel_size=(3, 3), activation='relu')(out) 20 | out = BatchNormalization()(out) 21 | out = MaxPooling2D(pool_size=(2, 2))(out) 22 | out = Dropout(0.5)(out) 23 | out = Conv2D(filters=128, kernel_size=(3, 3), padding='same', activation='relu')(out) 24 | out = Conv2D(filters=128, kernel_size=(3, 3), activation='relu')(out) 25 | out = BatchNormalization()(out) 26 | out = MaxPooling2D(pool_size=(2, 2))(out) 27 | out = Dropout(0.5)(out) 28 | out = Conv2D(filters=256, kernel_size=(3, 3), activation='relu')(out) 29 | out = BatchNormalization()(out) 30 | out = MaxPooling2D(pool_size=(2, 2))(out) 31 | out = Flatten()(out) 32 | out = Dropout(0.5)(out) 33 | out = Dense(1, name='6digit', activation='sigmoid')(out) 34 | model = Model(inputs=in, outputs=out) 35 | model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy']) 36 | model.summary() 37 | 38 | print("Reading training data...") 39 | traincsv = open('./data/56_imitate_train_set/len_train.csv', 'r', encoding = 'utf8') 40 | train_data = np.stack([np.array(Image.open("./data/56_imitate_train_set/" + row[0] + ".jpg"))/255.0 for row in csv.reader(traincsv)]) 41 | traincsv = open('./data/56_imitate_train_set/len_train.csv', 'r', encoding = 'utf8') 42 | train_label = np.asarray([1 if row[1] == '6' else 0 for row in csv.reader(traincsv)]) 43 | print("Shape of train data:", train_data.shape) 44 | 45 | print("Reading validation data...") 46 | valicsv = open('./data/56_imitate_vali_set/len_vali.csv', 'r', encoding = 'utf8') 47 | vali_data = np.stack([np.array(Image.open('./data/56_imitate_vali_set/' + row[0] + ".jpg"))/255.0 for row in csv.reader(valicsv)]) 48 | valicsv = open('./data/56_imitate_vali_set/len_vali.csv', 'r', encoding = 'utf8') 49 | vali_label = np.asarray([1 if row[1] == '6' else 0 for row in csv.reader(valicsv)]) 50 | print("Shape of validation data:", vali_data.shape) 51 | 52 | filepath="./data/model/imitate_56_model.h5" 53 | checkpoint = ModelCheckpoint(filepath, monitor='val_acc', verbose=1, save_best_only=True, mode='max') 54 | earlystop = EarlyStopping(monitor='val_acc', patience=10, verbose=1, mode='auto') 55 | tensorBoard = TensorBoard(log_dir = "./logs", histogram_freq = 1) 56 | callbacks_list = [checkpoint, earlystop, tensorBoard] 57 | model.fit(train_data, train_label, batch_size=400, epochs=100, verbose=1, validation_data=(vali_data, vali_label), callbacks=callbacks_list) 58 | -------------------------------------------------------------------------------- /train_cnn_imitate_6.py: -------------------------------------------------------------------------------- 1 | from keras.models import Model 2 | from keras.layers import Input, Dense, Dropout, Flatten, Conv2D, MaxPooling2D, BatchNormalization 3 | from keras.callbacks import ModelCheckpoint, EarlyStopping, TensorBoard 4 | from PIL import Image 5 | import numpy as np 6 | import csv 7 | 8 | LETTERSTR = "0123456789ABCDEFGHJKLMNPQRSTUVWXYZ" 9 | 10 | 11 | def toonehot(text): 12 | labellist = [] 13 | for letter in text: 14 | onehot = [0 for _ in range(34)] 15 | num = LETTERSTR.find(letter) 16 | onehot[num] = 1 17 | labellist.append(onehot) 18 | return labellist 19 | 20 | 21 | # Create CNN Model 22 | print("Creating CNN model...") 23 | in = Input((60, 200, 3)) 24 | out = in 25 | out = Conv2D(filters=32, kernel_size=(3, 3), padding='same', activation='relu')(out) 26 | out = Conv2D(filters=32, kernel_size=(3, 3), activation='relu')(out) 27 | out = BatchNormalization()(out) 28 | out = MaxPooling2D(pool_size=(2, 2))(out) 29 | out = Dropout(0.3)(out) 30 | out = Conv2D(filters=64, kernel_size=(3, 3), padding='same', activation='relu')(out) 31 | out = Conv2D(filters=64, kernel_size=(3, 3), activation='relu')(out) 32 | out = BatchNormalization()(out) 33 | out = MaxPooling2D(pool_size=(2, 2))(out) 34 | out = Dropout(0.3)(out) 35 | out = Conv2D(filters=128, kernel_size=(3, 3), padding='same', activation='relu')(out) 36 | out = Conv2D(filters=128, kernel_size=(3, 3), activation='relu')(out) 37 | out = BatchNormalization()(out) 38 | out = MaxPooling2D(pool_size=(2, 2))(out) 39 | out = Dropout(0.3)(out) 40 | out = Conv2D(filters=256, kernel_size=(3, 3), activation='relu')(out) 41 | out = BatchNormalization()(out) 42 | out = MaxPooling2D(pool_size=(2, 2))(out) 43 | out = Flatten()(out) 44 | out = Dropout(0.3)(out) 45 | out = [Dense(34, name='digit1', activation='softmax')(out),\ 46 | Dense(34, name='digit2', activation='softmax')(out),\ 47 | Dense(34, name='digit3', activation='softmax')(out),\ 48 | Dense(34, name='digit4', activation='softmax')(out),\ 49 | Dense(34, name='digit5', activation='softmax')(out),\ 50 | Dense(34, name='digit6', activation='softmax')(out)] 51 | model = Model(inputs=in, outputs=out) 52 | model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy']) 53 | model.summary() 54 | 55 | print("Reading training data...") 56 | traincsv = open('./data/6_imitate_train_set/captcha_train.csv', 'r', encoding = 'utf8') 57 | train_data = np.stack([np.array(Image.open("./data/6_imitate_train_set/" + row[0] + ".jpg"))/255.0 for row in csv.reader(traincsv)]) 58 | traincsv = open('./data/6_imitate_train_set/captcha_train.csv', 'r', encoding = 'utf8') 59 | read_label = [toonehot(row[1]) for row in csv.reader(traincsv)] 60 | train_label = [[] for _ in range(6)] 61 | for arr in read_label: 62 | for index in range(6): 63 | train_label[index].append(arr[index]) 64 | train_label = [arr for arr in np.asarray(train_label)] 65 | print("Shape of train data:", train_data.shape) 66 | 67 | print("Reading validation data...") 68 | valicsv = open('./data/6_imitate_vali_set/captcha_vali.csv', 'r', encoding = 'utf8') 69 | vali_data = np.stack([np.array(Image.open("./data/6_imitate_vali_set/" + row[0] + ".jpg"))/255.0 for row in csv.reader(valicsv)]) 70 | valicsv = open('./data/6_imitate_vali_set/captcha_vali.csv', 'r', encoding = 'utf8') 71 | read_label = [toonehot(row[1]) for row in csv.reader(valicsv)] 72 | vali_label = [[] for _ in range(6)] 73 | for arr in read_label: 74 | for index in range(6): 75 | vali_label[index].append(arr[index]) 76 | vali_label = [arr for arr in np.asarray(vali_label)] 77 | print("Shape of validation data:", vali_data.shape) 78 | 79 | filepath="./data/model/imitate_6_model.h5" 80 | checkpoint = ModelCheckpoint(filepath, monitor='val_digit6_acc', verbose=1, save_best_only=True, mode='max') 81 | earlystop = EarlyStopping(monitor='val_digit6_acc', patience=5, verbose=1, mode='auto') 82 | tensorBoard = TensorBoard(log_dir = "./logs", histogram_freq = 1) 83 | callbacks_list = [checkpoint, earlystop, tensorBoard] 84 | model.fit(train_data, train_label, batch_size=400, epochs=100, verbose=2, validation_data=(vali_data, vali_label), callbacks=callbacks_list) 85 | -------------------------------------------------------------------------------- /train_cnn_real_5.py: -------------------------------------------------------------------------------- 1 | from keras.models import Model 2 | from keras.layers import Input, Dense, Dropout, Flatten, Conv2D, MaxPooling2D, BatchNormalization 3 | from keras.callbacks import ModelCheckpoint, EarlyStopping, TensorBoard 4 | from PIL import Image 5 | import numpy as np 6 | import csv 7 | import os 8 | 9 | LETTERSTR = "0123456789ABCDEFGHJKLMNPQRSTUVWXYZ" 10 | 11 | 12 | def toonehot(text): 13 | labellist = [] 14 | for letter in text: 15 | onehot = [0 for _ in range(34)] 16 | num = LETTERSTR.find(letter) 17 | onehot[num] = 1 18 | labellist.append(onehot) 19 | return labellist 20 | 21 | 22 | # Create CNN Model 23 | print("Creating CNN model...") 24 | in = Input((60, 200, 3)) 25 | out = in 26 | out = Conv2D(filters=32, kernel_size=(3, 3), padding='same', activation='relu')(out) 27 | out = Conv2D(filters=32, kernel_size=(3, 3), activation='relu')(out) 28 | out = BatchNormalization()(out) 29 | out = MaxPooling2D(pool_size=(2, 2))(out) 30 | out = Dropout(0.5)(out) 31 | out = Conv2D(filters=64, kernel_size=(3, 3), padding='same', activation='relu')(out) 32 | out = Conv2D(filters=64, kernel_size=(3, 3), activation='relu')(out) 33 | out = BatchNormalization()(out) 34 | out = MaxPooling2D(pool_size=(2, 2))(out) 35 | out = Dropout(0.5)(out) 36 | out = Conv2D(filters=128, kernel_size=(3, 3), padding='same', activation='relu')(out) 37 | out = Conv2D(filters=128, kernel_size=(3, 3), activation='relu')(out) 38 | out = BatchNormalization()(out) 39 | out = MaxPooling2D(pool_size=(2, 2))(out) 40 | out = Dropout(0.5)(out) 41 | out = Conv2D(filters=256, kernel_size=(3, 3), activation='relu')(out) 42 | out = BatchNormalization()(out) 43 | out = MaxPooling2D(pool_size=(2, 2))(out) 44 | out = Flatten()(out) 45 | out = Dropout(0.5)(out) 46 | out = [Dense(34, name='digit1', activation='softmax')(out),\ 47 | Dense(34, name='digit2', activation='softmax')(out),\ 48 | Dense(34, name='digit3', activation='softmax')(out),\ 49 | Dense(34, name='digit4', activation='softmax')(out),\ 50 | Dense(34, name='digit5', activation='softmax')(out)] 51 | model = Model(inputs=in, outputs=out) 52 | model.compile(loss='categorical_crossentropy', optimizer='Adam', metrics=['accuracy']) 53 | model.summary() 54 | 55 | print("Reading training data...") 56 | traincsv = open('./data/5_real_train_set/captcha_train.csv', 'r', encoding = 'utf8') 57 | train_data = np.stack([np.array(Image.open('./data/5_real_train_set/' + str(i) + ".jpg"))/255.0 for i in range(1, 60001)]) 58 | traincsv = open('./data/5_real_train_set/captcha_train.csv', 'r', encoding = 'utf8') 59 | read_label = [toonehot(row[1]) for row in csv.reader(traincsv)][:60000] 60 | train_label = [[] for _ in range(5)] 61 | for arr in read_label: 62 | for index in range(5): 63 | train_label[index].append(arr[index]) 64 | train_label = [arr for arr in np.asarray(train_label)] 65 | print("Shape of train data:", train_data.shape) 66 | 67 | print("Reading validation data...") 68 | valicsv = open('./data/5_real_train_set/captcha_train.csv', 'r', encoding = 'utf8') 69 | vali_data = np.stack([np.array(Image.open('./data/5_real_train_set/' + str(i) + ".jpg"))/255.0 for i in range(60001, 75001)]) 70 | valicsv = open('./data/5_real_train_set/captcha_train.csv', 'r', encoding = 'utf8') 71 | read_label = [toonehot(row[1]) for row in csv.reader(valicsv)][60000:] 72 | vali_label = [[] for _ in range(5)] 73 | for arr in read_label: 74 | for index in range(5): 75 | vali_label[index].append(arr[index]) 76 | vali_label = [arr for arr in np.asarray(vali_label)] 77 | print("Shape of validation data:", vali_data.shape) 78 | 79 | filepath="./data/model/real_5_model.h5" 80 | checkpoint = ModelCheckpoint(filepath, monitor='val_digit5_acc', verbose=1, save_best_only=True, mode='max') 81 | earlystop = EarlyStopping(monitor='val_digit5_acc', patience=5, verbose=1, mode='auto') 82 | tensorBoard = TensorBoard(log_dir = "./logs", histogram_freq = 1) 83 | callbacks_list = [checkpoint, earlystop, tensorBoard] 84 | model.fit(train_data, train_label, batch_size=400, epochs=100, verbose=2, validation_data=(vali_data, vali_label), callbacks=callbacks_list) 85 | -------------------------------------------------------------------------------- /train_cnn_real_56.py: -------------------------------------------------------------------------------- 1 | from keras.models import Model 2 | from keras.layers import Input, Dense, Dropout, Flatten, Conv2D, MaxPooling2D, BatchNormalization 3 | from keras.callbacks import ModelCheckpoint, EarlyStopping, TensorBoard 4 | from PIL import Image 5 | import numpy as np 6 | import csv 7 | 8 | 9 | # Create CNN Model 10 | print("Creating CNN model...") 11 | in = Input((60, 200, 3)) 12 | out = in 13 | out = Conv2D(filters=32, kernel_size=(3, 3), padding='same', activation='relu')(out) 14 | out = Conv2D(filters=32, kernel_size=(3, 3), activation='relu')(out) 15 | out = BatchNormalization()(out) 16 | out = MaxPooling2D(pool_size=(2, 2))(out) 17 | out = Dropout(0.5)(out) 18 | out = Conv2D(filters=64, kernel_size=(3, 3), padding='same', activation='relu')(out) 19 | out = Conv2D(filters=64, kernel_size=(3, 3), activation='relu')(out) 20 | out = BatchNormalization()(out) 21 | out = MaxPooling2D(pool_size=(2, 2))(out) 22 | out = Dropout(0.5)(out) 23 | out = Conv2D(filters=128, kernel_size=(3, 3), padding='same', activation='relu')(out) 24 | out = Conv2D(filters=128, kernel_size=(3, 3), activation='relu')(out) 25 | out = BatchNormalization()(out) 26 | out = MaxPooling2D(pool_size=(2, 2))(out) 27 | out = Dropout(0.5)(out) 28 | out = Conv2D(filters=256, kernel_size=(3, 3), activation='relu')(out) 29 | out = BatchNormalization()(out) 30 | out = MaxPooling2D(pool_size=(2, 2))(out) 31 | out = Flatten()(out) 32 | out = Dropout(0.5)(out) 33 | out = Dense(1, name='6digit', activation='sigmoid')(out) 34 | model = Model(inputs=in, outputs=out) 35 | model.compile(loss='binary_crossentropy', optimizer='Adam', metrics=['accuracy']) 36 | model.summary() 37 | 38 | print("Reading training data...") 39 | train_label = np.asarray([0 for _ in range(40000)]) 40 | train_data = [np.array(Image.open("./data/5_real_train_set/" + str(i) + ".jpg"))/255.0 for i in np.random.choice(range(1,60001), size=20000, replace=False)] 41 | train_data = np.concatenate((train_data, [np.array(Image.open("./data/6_real_train_set/" + str(i) + ".jpg"))/255.0 for i in np.random.choice(range(1,60001), size=20000, replace=False)])) 42 | train_data = np.stack(train_data) 43 | train_label[:20000] = 0 44 | train_label[20000:] = 1 45 | print("Shape of train data:", train_data.shape) 46 | 47 | print("Reading validation data...") 48 | vali_label = np.asarray([0 for _ in range(10000)]) 49 | vali_data = [np.array(Image.open("./data/5_real_train_set/" + str(i) + ".jpg"))/255.0 for i in np.random.choice(range(60001,75001), size=5000, replace=False)] 50 | vali_data = np.concatenate((vali_data, [np.array(Image.open("./data/6_real_train_set/" + str(i) + ".jpg"))/255.0 for i in np.random.choice(range(60001,75001), size=5000, replace=False)])) 51 | vali_data = np.stack(vali_data) 52 | vali_label[:5000] = 0 53 | vali_label[5000:] = 1 54 | print("Shape of validation data:", vali_data.shape) 55 | 56 | filepath="./data/model/real_56_model.h5" 57 | checkpoint = ModelCheckpoint(filepath, monitor='val_acc', verbose=1, save_best_only=True, mode='max') 58 | earlystop = EarlyStopping(monitor='val_loss', patience=5, verbose=1, mode='auto') 59 | tensorBoard = TensorBoard(log_dir = "./logs", histogram_freq=1) 60 | callbacks_list = [checkpoint, earlystop, tensorBoard] 61 | model.fit(train_data, train_label, batch_size=400, epochs=100, verbose=2, validation_data=(vali_data, vali_label), callbacks=callbacks_list) 62 | -------------------------------------------------------------------------------- /train_cnn_real_6.py: -------------------------------------------------------------------------------- 1 | from keras.models import Model 2 | from keras.layers import Input, Dense, Dropout, Flatten, Conv2D, MaxPooling2D, BatchNormalization 3 | from keras.callbacks import ModelCheckpoint, EarlyStopping, TensorBoard 4 | from PIL import Image 5 | import numpy as np 6 | import csv 7 | import os 8 | 9 | LETTERSTR = "0123456789ABCDEFGHJKLMNPQRSTUVWXYZ" 10 | 11 | 12 | def toonehot(text): 13 | labellist = [] 14 | for letter in text: 15 | onehot = [0 for _ in range(34)] 16 | num = LETTERSTR.find(letter) 17 | onehot[num] = 1 18 | labellist.append(onehot) 19 | return labellist 20 | 21 | 22 | # Create CNN Model 23 | print("Creating CNN model...") 24 | in = Input((60, 200, 3)) 25 | out = in 26 | out = Conv2D(filters=32, kernel_size=(3, 3), padding='same', activation='relu')(out) 27 | out = Conv2D(filters=32, kernel_size=(3, 3), activation='relu')(out) 28 | out = BatchNormalization()(out) 29 | out = MaxPooling2D(pool_size=(2, 2))(out) 30 | out = Dropout(0.5)(out) 31 | out = Conv2D(filters=64, kernel_size=(3, 3), padding='same', activation='relu')(out) 32 | out = Conv2D(filters=64, kernel_size=(3, 3), activation='relu')(out) 33 | out = BatchNormalization()(out) 34 | out = MaxPooling2D(pool_size=(2, 2))(out) 35 | out = Dropout(0.5)(out) 36 | out = Conv2D(filters=128, kernel_size=(3, 3), padding='same', activation='relu')(out) 37 | out = Conv2D(filters=128, kernel_size=(3, 3), activation='relu')(out) 38 | out = BatchNormalization()(out) 39 | out = MaxPooling2D(pool_size=(2, 2))(out) 40 | out = Dropout(0.5)(out) 41 | out = Conv2D(filters=256, kernel_size=(3, 3), activation='relu')(out) 42 | out = BatchNormalization()(out) 43 | out = MaxPooling2D(pool_size=(2, 2))(out) 44 | out = Flatten()(out) 45 | out = Dropout(0.5)(out) 46 | out = [Dense(34, name='digit1', activation='softmax')(out),\ 47 | Dense(34, name='digit2', activation='softmax')(out),\ 48 | Dense(34, name='digit3', activation='softmax')(out),\ 49 | Dense(34, name='digit4', activation='softmax')(out),\ 50 | Dense(34, name='digit5', activation='softmax')(out),\ 51 | Dense(34, name='digit6', activation='softmax')(out)] 52 | model = Model(inputs=in, outputs=out) 53 | model.compile(loss='categorical_crossentropy', optimizer='Adam', metrics=['accuracy']) 54 | model.summary() 55 | 56 | print("Reading training data...") 57 | traincsv = open('./data/6_real_train_set/captcha_train.csv', 'r', encoding = 'utf8') 58 | train_data = np.stack([np.array(Image.open('./data/6_real_train_set/' + str(i) + ".jpg"))/255.0 for i in range(1, 60001)]) 59 | traincsv = open('./data/6_real_train_set/captcha_train.csv', 'r', encoding = 'utf8') 60 | read_label = [toonehot(row[1]) for row in csv.reader(traincsv)][:60000] 61 | train_label = [[] for _ in range(6)] 62 | for arr in read_label: 63 | for index in range(6): 64 | train_label[index].append(arr[index]) 65 | train_label = [arr for arr in np.asarray(train_label)] 66 | print("Shape of train data:", train_data.shape) 67 | 68 | print("Reading validation data...") 69 | valicsv = open('./data/6_real_train_set/captcha_train.csv', 'r', encoding = 'utf8') 70 | vali_data = np.stack([np.array(Image.open('./data/6_real_train_set/' + str(i) + ".jpg"))/255.0 for i in range(60001, 75001)]) 71 | valicsv = open('./data/6_real_train_set/captcha_train.csv', 'r', encoding = 'utf8') 72 | read_label = [toonehot(row[1]) for row in csv.reader(valicsv)][60000:] 73 | vali_label = [[] for _ in range(6)] 74 | for arr in read_label: 75 | for index in range(6): 76 | vali_label[index].append(arr[index]) 77 | vali_label = [arr for arr in np.asarray(vali_label)] 78 | print("Shape of validation data:", vali_data.shape) 79 | 80 | filepath="./data/model/real_6_model.h5" 81 | checkpoint = ModelCheckpoint(filepath, monitor='val_digit6_acc', verbose=1, save_best_only=True, mode='max') 82 | earlystop = EarlyStopping(monitor='val_digit6_acc', patience=5, verbose=1, mode='auto') 83 | tensorBoard = TensorBoard(log_dir = "./logs", histogram_freq = 1) 84 | callbacks_list = [checkpoint, earlystop, tensorBoard] 85 | model.fit(train_data, train_label, batch_size=400, epochs=100, verbose=2, validation_data=(vali_data, vali_label), callbacks=callbacks_list) 86 | --------------------------------------------------------------------------------