├── .gitignore ├── test-set ├── 008622.png ├── 010287.png ├── 017091.png ├── 018228.png ├── 026899.png ├── 044049.png ├── 046200.png ├── 058782.png ├── 074774.png ├── 076761.png ├── 095537.png ├── 097587.png ├── 102107.png ├── 146097.png ├── 150104.png ├── 150582.png ├── 167472.png ├── 187101.png ├── 202345.png ├── 213265.png ├── 229779.png ├── 229932.png ├── 249145.png ├── 249505.png ├── 252834.png ├── 261742.png ├── 263959.png ├── 265362.png ├── 281594.png ├── 296135.png ├── 302871.png ├── 303009.png ├── 308721.png ├── 311051.png ├── 318073.png ├── 321406.png ├── 329342.png ├── 349292.png ├── 360821.png ├── 372181.png ├── 372278.png ├── 385714.png ├── 393277.png ├── 395961.png ├── 403143.png ├── 404801.png ├── 408329.png ├── 409913.png ├── 440342.png ├── 449337.png ├── 451336.png ├── 463205.png ├── 465003.png ├── 471029.png ├── 474087.png ├── 474500.png ├── 478821.png ├── 515730.png ├── 521075.png ├── 543708.png ├── 565447.png ├── 583875.png ├── 595410.png ├── 601050.png ├── 611727.png ├── 614219.png ├── 614442.png ├── 615728.png ├── 618419.png ├── 619398.png ├── 619858.png ├── 624002.png ├── 624972.png ├── 628260.png ├── 631157.png ├── 632786.png ├── 654989.png ├── 656897.png ├── 659632.png ├── 686790.png ├── 692307.png ├── 695943.png ├── 701893.png ├── 726772.png ├── 732118.png ├── 737451.png ├── 759805.png ├── 775002.png ├── 800095.png ├── 840730.png ├── 861144.png ├── 876292.png ├── 878514.png ├── 882160.png ├── 885127.png ├── 885855.png ├── 925668.png ├── 930641.png ├── 957191.png └── 966910.png ├── README.md ├── chaptcha.user.js ├── COPYING └── chaptcha.py /.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | *.net 3 | *.tmp 4 | *.png 5 | !/test-set/* 6 | /.venv/ 7 | -------------------------------------------------------------------------------- /test-set/008622.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Kagami/chaptcha/HEAD/test-set/008622.png -------------------------------------------------------------------------------- /test-set/010287.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Kagami/chaptcha/HEAD/test-set/010287.png -------------------------------------------------------------------------------- /test-set/017091.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Kagami/chaptcha/HEAD/test-set/017091.png -------------------------------------------------------------------------------- /test-set/018228.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Kagami/chaptcha/HEAD/test-set/018228.png -------------------------------------------------------------------------------- /test-set/026899.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Kagami/chaptcha/HEAD/test-set/026899.png -------------------------------------------------------------------------------- /test-set/044049.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Kagami/chaptcha/HEAD/test-set/044049.png -------------------------------------------------------------------------------- /test-set/046200.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Kagami/chaptcha/HEAD/test-set/046200.png -------------------------------------------------------------------------------- /test-set/058782.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Kagami/chaptcha/HEAD/test-set/058782.png -------------------------------------------------------------------------------- /test-set/074774.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Kagami/chaptcha/HEAD/test-set/074774.png -------------------------------------------------------------------------------- /test-set/076761.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Kagami/chaptcha/HEAD/test-set/076761.png -------------------------------------------------------------------------------- /test-set/095537.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Kagami/chaptcha/HEAD/test-set/095537.png -------------------------------------------------------------------------------- /test-set/097587.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Kagami/chaptcha/HEAD/test-set/097587.png -------------------------------------------------------------------------------- /test-set/102107.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Kagami/chaptcha/HEAD/test-set/102107.png -------------------------------------------------------------------------------- /test-set/146097.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Kagami/chaptcha/HEAD/test-set/146097.png -------------------------------------------------------------------------------- /test-set/150104.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Kagami/chaptcha/HEAD/test-set/150104.png -------------------------------------------------------------------------------- /test-set/150582.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Kagami/chaptcha/HEAD/test-set/150582.png -------------------------------------------------------------------------------- /test-set/167472.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Kagami/chaptcha/HEAD/test-set/167472.png -------------------------------------------------------------------------------- /test-set/187101.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Kagami/chaptcha/HEAD/test-set/187101.png -------------------------------------------------------------------------------- /test-set/202345.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Kagami/chaptcha/HEAD/test-set/202345.png -------------------------------------------------------------------------------- /test-set/213265.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Kagami/chaptcha/HEAD/test-set/213265.png -------------------------------------------------------------------------------- /test-set/229779.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Kagami/chaptcha/HEAD/test-set/229779.png -------------------------------------------------------------------------------- /test-set/229932.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Kagami/chaptcha/HEAD/test-set/229932.png -------------------------------------------------------------------------------- /test-set/249145.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Kagami/chaptcha/HEAD/test-set/249145.png -------------------------------------------------------------------------------- /test-set/249505.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Kagami/chaptcha/HEAD/test-set/249505.png -------------------------------------------------------------------------------- /test-set/252834.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Kagami/chaptcha/HEAD/test-set/252834.png -------------------------------------------------------------------------------- /test-set/261742.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Kagami/chaptcha/HEAD/test-set/261742.png -------------------------------------------------------------------------------- /test-set/263959.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Kagami/chaptcha/HEAD/test-set/263959.png -------------------------------------------------------------------------------- /test-set/265362.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Kagami/chaptcha/HEAD/test-set/265362.png -------------------------------------------------------------------------------- /test-set/281594.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Kagami/chaptcha/HEAD/test-set/281594.png -------------------------------------------------------------------------------- /test-set/296135.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Kagami/chaptcha/HEAD/test-set/296135.png -------------------------------------------------------------------------------- /test-set/302871.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Kagami/chaptcha/HEAD/test-set/302871.png -------------------------------------------------------------------------------- /test-set/303009.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Kagami/chaptcha/HEAD/test-set/303009.png -------------------------------------------------------------------------------- /test-set/308721.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Kagami/chaptcha/HEAD/test-set/308721.png -------------------------------------------------------------------------------- /test-set/311051.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Kagami/chaptcha/HEAD/test-set/311051.png -------------------------------------------------------------------------------- /test-set/318073.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Kagami/chaptcha/HEAD/test-set/318073.png -------------------------------------------------------------------------------- /test-set/321406.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Kagami/chaptcha/HEAD/test-set/321406.png -------------------------------------------------------------------------------- /test-set/329342.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Kagami/chaptcha/HEAD/test-set/329342.png -------------------------------------------------------------------------------- /test-set/349292.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Kagami/chaptcha/HEAD/test-set/349292.png -------------------------------------------------------------------------------- /test-set/360821.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Kagami/chaptcha/HEAD/test-set/360821.png -------------------------------------------------------------------------------- /test-set/372181.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Kagami/chaptcha/HEAD/test-set/372181.png -------------------------------------------------------------------------------- /test-set/372278.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Kagami/chaptcha/HEAD/test-set/372278.png -------------------------------------------------------------------------------- /test-set/385714.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Kagami/chaptcha/HEAD/test-set/385714.png -------------------------------------------------------------------------------- /test-set/393277.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Kagami/chaptcha/HEAD/test-set/393277.png -------------------------------------------------------------------------------- /test-set/395961.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Kagami/chaptcha/HEAD/test-set/395961.png -------------------------------------------------------------------------------- /test-set/403143.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Kagami/chaptcha/HEAD/test-set/403143.png -------------------------------------------------------------------------------- /test-set/404801.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Kagami/chaptcha/HEAD/test-set/404801.png -------------------------------------------------------------------------------- /test-set/408329.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Kagami/chaptcha/HEAD/test-set/408329.png -------------------------------------------------------------------------------- /test-set/409913.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Kagami/chaptcha/HEAD/test-set/409913.png -------------------------------------------------------------------------------- /test-set/440342.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Kagami/chaptcha/HEAD/test-set/440342.png -------------------------------------------------------------------------------- /test-set/449337.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Kagami/chaptcha/HEAD/test-set/449337.png -------------------------------------------------------------------------------- /test-set/451336.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Kagami/chaptcha/HEAD/test-set/451336.png -------------------------------------------------------------------------------- /test-set/463205.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Kagami/chaptcha/HEAD/test-set/463205.png -------------------------------------------------------------------------------- /test-set/465003.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Kagami/chaptcha/HEAD/test-set/465003.png -------------------------------------------------------------------------------- /test-set/471029.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Kagami/chaptcha/HEAD/test-set/471029.png -------------------------------------------------------------------------------- /test-set/474087.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Kagami/chaptcha/HEAD/test-set/474087.png -------------------------------------------------------------------------------- /test-set/474500.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Kagami/chaptcha/HEAD/test-set/474500.png -------------------------------------------------------------------------------- /test-set/478821.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Kagami/chaptcha/HEAD/test-set/478821.png -------------------------------------------------------------------------------- /test-set/515730.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Kagami/chaptcha/HEAD/test-set/515730.png -------------------------------------------------------------------------------- /test-set/521075.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Kagami/chaptcha/HEAD/test-set/521075.png -------------------------------------------------------------------------------- /test-set/543708.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Kagami/chaptcha/HEAD/test-set/543708.png -------------------------------------------------------------------------------- /test-set/565447.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Kagami/chaptcha/HEAD/test-set/565447.png -------------------------------------------------------------------------------- /test-set/583875.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Kagami/chaptcha/HEAD/test-set/583875.png -------------------------------------------------------------------------------- /test-set/595410.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Kagami/chaptcha/HEAD/test-set/595410.png -------------------------------------------------------------------------------- /test-set/601050.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Kagami/chaptcha/HEAD/test-set/601050.png -------------------------------------------------------------------------------- /test-set/611727.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Kagami/chaptcha/HEAD/test-set/611727.png -------------------------------------------------------------------------------- /test-set/614219.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Kagami/chaptcha/HEAD/test-set/614219.png -------------------------------------------------------------------------------- /test-set/614442.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Kagami/chaptcha/HEAD/test-set/614442.png -------------------------------------------------------------------------------- /test-set/615728.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Kagami/chaptcha/HEAD/test-set/615728.png -------------------------------------------------------------------------------- /test-set/618419.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Kagami/chaptcha/HEAD/test-set/618419.png -------------------------------------------------------------------------------- /test-set/619398.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Kagami/chaptcha/HEAD/test-set/619398.png -------------------------------------------------------------------------------- /test-set/619858.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Kagami/chaptcha/HEAD/test-set/619858.png -------------------------------------------------------------------------------- /test-set/624002.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Kagami/chaptcha/HEAD/test-set/624002.png -------------------------------------------------------------------------------- /test-set/624972.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Kagami/chaptcha/HEAD/test-set/624972.png -------------------------------------------------------------------------------- /test-set/628260.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Kagami/chaptcha/HEAD/test-set/628260.png -------------------------------------------------------------------------------- /test-set/631157.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Kagami/chaptcha/HEAD/test-set/631157.png -------------------------------------------------------------------------------- /test-set/632786.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Kagami/chaptcha/HEAD/test-set/632786.png -------------------------------------------------------------------------------- /test-set/654989.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Kagami/chaptcha/HEAD/test-set/654989.png -------------------------------------------------------------------------------- /test-set/656897.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Kagami/chaptcha/HEAD/test-set/656897.png -------------------------------------------------------------------------------- /test-set/659632.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Kagami/chaptcha/HEAD/test-set/659632.png -------------------------------------------------------------------------------- /test-set/686790.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Kagami/chaptcha/HEAD/test-set/686790.png -------------------------------------------------------------------------------- /test-set/692307.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Kagami/chaptcha/HEAD/test-set/692307.png -------------------------------------------------------------------------------- /test-set/695943.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Kagami/chaptcha/HEAD/test-set/695943.png -------------------------------------------------------------------------------- /test-set/701893.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Kagami/chaptcha/HEAD/test-set/701893.png -------------------------------------------------------------------------------- /test-set/726772.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Kagami/chaptcha/HEAD/test-set/726772.png -------------------------------------------------------------------------------- /test-set/732118.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Kagami/chaptcha/HEAD/test-set/732118.png -------------------------------------------------------------------------------- /test-set/737451.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Kagami/chaptcha/HEAD/test-set/737451.png -------------------------------------------------------------------------------- /test-set/759805.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Kagami/chaptcha/HEAD/test-set/759805.png -------------------------------------------------------------------------------- /test-set/775002.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Kagami/chaptcha/HEAD/test-set/775002.png -------------------------------------------------------------------------------- /test-set/800095.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Kagami/chaptcha/HEAD/test-set/800095.png -------------------------------------------------------------------------------- /test-set/840730.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Kagami/chaptcha/HEAD/test-set/840730.png -------------------------------------------------------------------------------- /test-set/861144.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Kagami/chaptcha/HEAD/test-set/861144.png -------------------------------------------------------------------------------- /test-set/876292.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Kagami/chaptcha/HEAD/test-set/876292.png -------------------------------------------------------------------------------- /test-set/878514.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Kagami/chaptcha/HEAD/test-set/878514.png -------------------------------------------------------------------------------- /test-set/882160.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Kagami/chaptcha/HEAD/test-set/882160.png -------------------------------------------------------------------------------- /test-set/885127.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Kagami/chaptcha/HEAD/test-set/885127.png -------------------------------------------------------------------------------- /test-set/885855.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Kagami/chaptcha/HEAD/test-set/885855.png -------------------------------------------------------------------------------- /test-set/925668.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Kagami/chaptcha/HEAD/test-set/925668.png -------------------------------------------------------------------------------- /test-set/930641.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Kagami/chaptcha/HEAD/test-set/930641.png -------------------------------------------------------------------------------- /test-set/957191.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Kagami/chaptcha/HEAD/test-set/957191.png -------------------------------------------------------------------------------- /test-set/966910.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Kagami/chaptcha/HEAD/test-set/966910.png -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # chaptcha 2 | 3 | Break CAPTCHA at 2ch.hk using OpenCV and FANN. Just for fun (UWBFTP). 4 | 5 | **CAPTCHA WAS CHANGED, THIS CODE IS LEFT JUST FOR HISTORIC PURPOSES.** Maybe I'll update it for the new one some day. 6 | 7 | ## Demo 8 | 9 | Demo backend is available at `ch.genshiken.org`, [install userscript](https://raw.githubusercontent.com/Kagami/chaptcha/master/chaptcha.user.js) to try it out (not tested for compatibility with Dollchan Extension Tools). 10 | 11 | ![](https://raw.githubusercontent.com/Kagami/chaptcha/assets/vis.png) 12 | ![](https://raw.githubusercontent.com/Kagami/chaptcha/assets/cap.gif) 13 | 14 | ## Requirements 15 | 16 | * [Python](https://www.python.org/) 2.7+ or 3.2+ 17 | * [NumPy](http://www.numpy.org/) 1.7+ 18 | * [OpenCV](http://opencv.org/) 2.4+ with Python bindings 19 | * [FANN](http://leenissen.dk/fann/wp/) 2.1+ with Python bindings 20 | * [requests](http://python-requests.org/) 2+ 21 | * [bottle](http://bottlepy.org/) 0.10+ 22 | 23 | ## Usage 24 | 25 | ```bash 26 | # Visualize preprocess/segmentation steps 27 | python chaptcha.py vis -i captcha.png 28 | # Collect training data 29 | python chaptcha.py collect -o captchas/ -k ag.txt 30 | # Train neural network 31 | python chaptcha.py train -i captchas/ -o my.net 32 | # Recognize CAPTCHA 33 | python chaptcha.py ocr -i captcha.png -n my.net 34 | # Host OCR backend (for chaptcha.user.js) 35 | python chaptcha.py serve -n my.net 36 | ``` 37 | 38 | ## Links 39 | 40 | * [Image Denoising](http://docs.opencv.org/3.1.0/d5/d69/tutorial_py_non_local_means.html) 41 | * [Hough Line Transform](http://docs.opencv.org/3.1.0/d6/d10/tutorial_py_houghlines.html) 42 | * [Python FANN tutorial](http://jansipke.nl/using-fann-with-python/) 43 | * [Break simple CAPTCHA](https://habrahabr.ru/post/63854/) 44 | * [Break ifolder CAPTCHA](https://geektimes.ru/post/67194/) 45 | * [Break rzd CAPTCHA](https://toster.ru/q/216509) 46 | * [Break ops CAPTCHA](https://habrahabr.ru/post/116222/) 47 | 48 | ## License 49 | 50 | [CC0.](COPYING) 51 | -------------------------------------------------------------------------------- /chaptcha.user.js: -------------------------------------------------------------------------------- 1 | // ==UserScript== 2 | // @name chaptcha 3 | // @namespace https://2chk.hk/chaptcha 4 | // @description Automatically enter captcha at 2ch.hk using chaptcha.py backend 5 | // @downloadURL https://raw.githubusercontent.com/Kagami/chaptcha/master/chaptcha.user.js 6 | // @updateURL https://raw.githubusercontent.com/Kagami/chaptcha/master/chaptcha.user.js 7 | // @include https://2ch.hk/* 8 | // @version 0.0.2 9 | // @grant none 10 | // ==/UserScript== 11 | 12 | // TODO: Some better way to store that setting? 13 | var OCR_BACKEND_URL = localStorage.getItem("OCR_BACKEND_URL") || 14 | "https://ch.genshiken.org"; 15 | 16 | function getImageData(img) { 17 | return new Promise(function(resolve, reject) { 18 | img.onload = function() { 19 | var canvas = document.createElement("canvas"); 20 | canvas.width = img.width; 21 | canvas.height = img.height; 22 | var context = canvas.getContext("2d"); 23 | context.drawImage(img, 0, 0); 24 | canvas.toBlob(resolve); 25 | }; 26 | img.onerror = reject; 27 | }); 28 | } 29 | 30 | function ocr(data) { 31 | return new Promise(function(resolve, reject) { 32 | var xhr = new XMLHttpRequest(); 33 | xhr.open("POST", OCR_BACKEND_URL + "/ocr", true); 34 | xhr.onload = function() { 35 | if (this.status >= 200 && this.status < 400) { 36 | resolve(this.responseText); 37 | } else { 38 | reject(new Error(this.responseText)); 39 | } 40 | }; 41 | xhr.onerror = reject; 42 | var form = new FormData(); 43 | form.append("file", data); 44 | xhr.send(form); 45 | }); 46 | } 47 | 48 | function setAnswer(answer) { 49 | document.getElementById("captcha-value").value = answer; 50 | document.getElementById("qr-captcha-value").value = answer; 51 | } 52 | 53 | function initChaptcha() { 54 | var container = document.getElementById("captcha-widget-main"); 55 | if (!container) return; 56 | var observer = new MutationObserver(function(mutations) { 57 | mutations.forEach(function(mutation) { 58 | Array.prototype.filter.call(mutation.addedNodes, function(node) { 59 | return node.tagName === "IMG"; 60 | }).forEach(function(img) { 61 | setAnswer(""); 62 | getImageData(img).then(ocr).then(function(answer) { 63 | setAnswer(answer); 64 | }, function() { 65 | // Indicate error. 66 | setAnswer("000000"); 67 | }); 68 | }); 69 | }); 70 | }); 71 | // Captcha updates synchronously in all places so it's enough to 72 | // observe only single one. 73 | observer.observe(container, {childList: true}); 74 | } 75 | 76 | initChaptcha(); 77 | -------------------------------------------------------------------------------- /COPYING: -------------------------------------------------------------------------------- 1 | Creative Commons Legal Code 2 | 3 | CC0 1.0 Universal 4 | 5 | CREATIVE COMMONS CORPORATION IS NOT A LAW FIRM AND DOES NOT PROVIDE 6 | LEGAL SERVICES. DISTRIBUTION OF THIS DOCUMENT DOES NOT CREATE AN 7 | ATTORNEY-CLIENT RELATIONSHIP. CREATIVE COMMONS PROVIDES THIS 8 | INFORMATION ON AN "AS-IS" BASIS. CREATIVE COMMONS MAKES NO WARRANTIES 9 | REGARDING THE USE OF THIS DOCUMENT OR THE INFORMATION OR WORKS 10 | PROVIDED HEREUNDER, AND DISCLAIMS LIABILITY FOR DAMAGES RESULTING FROM 11 | THE USE OF THIS DOCUMENT OR THE INFORMATION OR WORKS PROVIDED 12 | HEREUNDER. 13 | 14 | Statement of Purpose 15 | 16 | The laws of most jurisdictions throughout the world automatically confer 17 | exclusive Copyright and Related Rights (defined below) upon the creator 18 | and subsequent owner(s) (each and all, an "owner") of an original work of 19 | authorship and/or a database (each, a "Work"). 20 | 21 | Certain owners wish to permanently relinquish those rights to a Work for 22 | the purpose of contributing to a commons of creative, cultural and 23 | scientific works ("Commons") that the public can reliably and without fear 24 | of later claims of infringement build upon, modify, incorporate in other 25 | works, reuse and redistribute as freely as possible in any form whatsoever 26 | and for any purposes, including without limitation commercial purposes. 27 | These owners may contribute to the Commons to promote the ideal of a free 28 | culture and the further production of creative, cultural and scientific 29 | works, or to gain reputation or greater distribution for their Work in 30 | part through the use and efforts of others. 31 | 32 | For these and/or other purposes and motivations, and without any 33 | expectation of additional consideration or compensation, the person 34 | associating CC0 with a Work (the "Affirmer"), to the extent that he or she 35 | is an owner of Copyright and Related Rights in the Work, voluntarily 36 | elects to apply CC0 to the Work and publicly distribute the Work under its 37 | terms, with knowledge of his or her Copyright and Related Rights in the 38 | Work and the meaning and intended legal effect of CC0 on those rights. 39 | 40 | 1. Copyright and Related Rights. A Work made available under CC0 may be 41 | protected by copyright and related or neighboring rights ("Copyright and 42 | Related Rights"). Copyright and Related Rights include, but are not 43 | limited to, the following: 44 | 45 | i. the right to reproduce, adapt, distribute, perform, display, 46 | communicate, and translate a Work; 47 | ii. moral rights retained by the original author(s) and/or performer(s); 48 | iii. publicity and privacy rights pertaining to a person's image or 49 | likeness depicted in a Work; 50 | iv. rights protecting against unfair competition in regards to a Work, 51 | subject to the limitations in paragraph 4(a), below; 52 | v. rights protecting the extraction, dissemination, use and reuse of data 53 | in a Work; 54 | vi. database rights (such as those arising under Directive 96/9/EC of the 55 | European Parliament and of the Council of 11 March 1996 on the legal 56 | protection of databases, and under any national implementation 57 | thereof, including any amended or successor version of such 58 | directive); and 59 | vii. other similar, equivalent or corresponding rights throughout the 60 | world based on applicable law or treaty, and any national 61 | implementations thereof. 62 | 63 | 2. Waiver. To the greatest extent permitted by, but not in contravention 64 | of, applicable law, Affirmer hereby overtly, fully, permanently, 65 | irrevocably and unconditionally waives, abandons, and surrenders all of 66 | Affirmer's Copyright and Related Rights and associated claims and causes 67 | of action, whether now known or unknown (including existing as well as 68 | future claims and causes of action), in the Work (i) in all territories 69 | worldwide, (ii) for the maximum duration provided by applicable law or 70 | treaty (including future time extensions), (iii) in any current or future 71 | medium and for any number of copies, and (iv) for any purpose whatsoever, 72 | including without limitation commercial, advertising or promotional 73 | purposes (the "Waiver"). Affirmer makes the Waiver for the benefit of each 74 | member of the public at large and to the detriment of Affirmer's heirs and 75 | successors, fully intending that such Waiver shall not be subject to 76 | revocation, rescission, cancellation, termination, or any other legal or 77 | equitable action to disrupt the quiet enjoyment of the Work by the public 78 | as contemplated by Affirmer's express Statement of Purpose. 79 | 80 | 3. Public License Fallback. Should any part of the Waiver for any reason 81 | be judged legally invalid or ineffective under applicable law, then the 82 | Waiver shall be preserved to the maximum extent permitted taking into 83 | account Affirmer's express Statement of Purpose. In addition, to the 84 | extent the Waiver is so judged Affirmer hereby grants to each affected 85 | person a royalty-free, non transferable, non sublicensable, non exclusive, 86 | irrevocable and unconditional license to exercise Affirmer's Copyright and 87 | Related Rights in the Work (i) in all territories worldwide, (ii) for the 88 | maximum duration provided by applicable law or treaty (including future 89 | time extensions), (iii) in any current or future medium and for any number 90 | of copies, and (iv) for any purpose whatsoever, including without 91 | limitation commercial, advertising or promotional purposes (the 92 | "License"). The License shall be deemed effective as of the date CC0 was 93 | applied by Affirmer to the Work. Should any part of the License for any 94 | reason be judged legally invalid or ineffective under applicable law, such 95 | partial invalidity or ineffectiveness shall not invalidate the remainder 96 | of the License, and in such case Affirmer hereby affirms that he or she 97 | will not (i) exercise any of his or her remaining Copyright and Related 98 | Rights in the Work or (ii) assert any associated claims and causes of 99 | action with respect to the Work, in either case contrary to Affirmer's 100 | express Statement of Purpose. 101 | 102 | 4. Limitations and Disclaimers. 103 | 104 | a. No trademark or patent rights held by Affirmer are waived, abandoned, 105 | surrendered, licensed or otherwise affected by this document. 106 | b. Affirmer offers the Work as-is and makes no representations or 107 | warranties of any kind concerning the Work, express, implied, 108 | statutory or otherwise, including without limitation warranties of 109 | title, merchantability, fitness for a particular purpose, non 110 | infringement, or the absence of latent or other defects, accuracy, or 111 | the present or absence of errors, whether or not discoverable, all to 112 | the greatest extent permissible under applicable law. 113 | c. Affirmer disclaims responsibility for clearing rights of other persons 114 | that may apply to the Work or any use thereof, including without 115 | limitation any person's Copyright and Related Rights in the Work. 116 | Further, Affirmer disclaims responsibility for obtaining any necessary 117 | consents, permissions or other rights required for any use of the 118 | Work. 119 | d. Affirmer understands and acknowledges that Creative Commons is not a 120 | party to this document and has no duty or obligation with respect to 121 | this CC0 or use of the Work. 122 | -------------------------------------------------------------------------------- /chaptcha.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | """ 4 | recognize CAPTCHA at 2ch.hk 5 | 6 | examples: 7 | python {title} vis -i captcha.png 8 | python {title} collect -o captchas/ -k ag.txt 9 | python {title} train -i captchas/ -o my.net 10 | python {title} ocr -i captcha.png -n my.net 11 | python {title} serve -n my.net 12 | """ 13 | 14 | from __future__ import division 15 | from __future__ import print_function 16 | from __future__ import unicode_literals 17 | 18 | import os 19 | import re 20 | import sys 21 | import time 22 | import errno 23 | import argparse 24 | from datetime import datetime 25 | import threading 26 | import traceback 27 | import numpy as np 28 | import cv2 29 | try: 30 | from fann2 import libfann 31 | except ImportError: 32 | # Ubuntu < 16.04 33 | from pyfann import libfann 34 | import requests 35 | import bottle 36 | 37 | 38 | __title__ = 'chaptcha.py' 39 | __version__ = '0.0.1' 40 | __license__ = 'CC0' 41 | 42 | 43 | NUM_CHARS = 6 44 | CAPTCHA_WIDTH = 220 45 | CAPTCHA_HEIGHT = 80 46 | CH_WIDTH = 22 47 | CH_HEIGHT = 40 48 | LINE_THICK = 2 49 | # See . 50 | _CONSTANT = str('constant') 51 | 52 | 53 | def check_image(img): 54 | assert img is not None, 'cannot read image' 55 | assert img.shape == (CAPTCHA_HEIGHT, CAPTCHA_WIDTH), 'bad image dimensions' 56 | 57 | 58 | def get_image(fpath): 59 | img = cv2.imread(fpath, 0) 60 | check_image(img) 61 | return img 62 | 63 | 64 | def decode_image(data): 65 | img = cv2.imdecode(data, 0) 66 | check_image(img) 67 | return img 68 | 69 | 70 | def get_network(fpath): 71 | ann = libfann.neural_net() 72 | assert ann.create_from_file(fpath), 'cannot init network' 73 | return ann 74 | 75 | 76 | def get_ch_data(img): 77 | data = img.flatten() & 1 78 | assert len(data) == CH_WIDTH * CH_HEIGHT, 'bad data size' 79 | return data 80 | 81 | 82 | def make_ann_output(digit): 83 | digit = int(digit) 84 | out = [0.0] * 10 85 | out[digit] = 1.0 86 | return out 87 | 88 | 89 | def get_match(answer1, answer2): 90 | return sum(dg1 == dg2 for (dg1, dg2) in zip(answer1, answer2)) 91 | 92 | 93 | def report(line='', progress=False): 94 | if progress: 95 | line = '\033[1A\033[K' + line 96 | line += '\n' 97 | sys.stderr.write(line) 98 | 99 | 100 | def mkdirp(dpath): 101 | try: 102 | os.makedirs(dpath) 103 | except OSError as exc: 104 | if exc.errno != errno.EEXIST: 105 | raise 106 | 107 | 108 | def _denoise(img): 109 | img = cv2.fastNlMeansDenoising(img, None, 65, 5, 21) 110 | img = cv2.threshold(img, 128, 255, cv2.THRESH_BINARY_INV)[1] 111 | return img 112 | 113 | 114 | def _get_lines(img): 115 | lines = cv2.HoughLinesP(img, 1, np.pi / 180, 100, 116 | minLineLength=100, maxLineGap=100) 117 | if lines is None: 118 | lines = [] 119 | return [line[0] for line in lines] 120 | 121 | 122 | def _preprocess(img): 123 | img = img.copy() 124 | img = _denoise(img) 125 | lines = _get_lines(img) 126 | for line in lines: 127 | x1, y1, x2, y2 = line 128 | cv2.line(img, (x1, y1), (x2, y2), 0, LINE_THICK) 129 | return img 130 | 131 | 132 | def segment(img): 133 | def find_filled_row(rows): 134 | for i, row in enumerate(rows): 135 | dots = np.sum(row) // 255 136 | if dots >= DOTS_THRESHOLD: 137 | return i 138 | assert False, 'cannot find filled row' 139 | 140 | def pad_ch(ch): 141 | pad_w = CH_WIDTH - len(ch.T) 142 | assert pad_w >= 0, 'bad char width' 143 | pad_w1 = pad_w // 2 144 | pad_w2 = pad_w - pad_w1 145 | pad_h = CH_HEIGHT - len(ch) 146 | assert pad_h >= 0, 'bad char height' 147 | pad_h1 = pad_h // 2 148 | pad_h2 = pad_h - pad_h1 149 | return np.pad(ch, ((pad_h1, pad_h2), (pad_w1, pad_w2)), _CONSTANT) 150 | 151 | BLANK_THRESHHOLD = 3 152 | DOTS_THRESHOLD = 3 153 | CH_MIN_WIDTH = 8 154 | 155 | # Search blank intervals. 156 | img = _preprocess(img) 157 | dots_per_col = np.apply_along_axis(lambda row: np.sum(row) // 255, 0, img) 158 | blanks = [] 159 | was_blank = False 160 | first_ch_x = None 161 | prev_x = 0 162 | x = 0 163 | while x < CAPTCHA_WIDTH: 164 | if dots_per_col[x] >= DOTS_THRESHOLD: 165 | if first_ch_x is None: 166 | first_ch_x = x 167 | if was_blank: 168 | # Skip first blank. 169 | if prev_x: 170 | blanks.append((prev_x, x)) 171 | # Don't allow too tight chars. 172 | x += CH_MIN_WIDTH 173 | was_blank = False 174 | elif not was_blank: 175 | was_blank = True 176 | prev_x = x 177 | x += 1 178 | blanks = [b for b in blanks if b[1] - b[0] >= BLANK_THRESHHOLD] 179 | blanks = sorted(blanks, key=lambda b: b[1] - b[0], reverse=True)[:5] 180 | # No more than one glued pair currently. 181 | assert len(blanks) >= 4, 'bad number of blanks' 182 | blanks = sorted(blanks, key=lambda b: b[0]) 183 | # Add last (imaginary) blank to simplify following loop. 184 | blanks.append((prev_x if was_blank else CAPTCHA_WIDTH, 0)) 185 | 186 | # Get chars. 187 | chars = [] 188 | x1 = first_ch_x 189 | widest = 0, 0 190 | for i, (x2, next_x1) in enumerate(blanks): 191 | width = x2 - x1 192 | # Don't allow more than CH_WIDTH * 2. 193 | extra_w = width - CH_WIDTH * 2 194 | extra_w1 = extra_w // 2 195 | extra_w2 = extra_w - extra_w1 196 | x1 = max(x1, x1 + extra_w1) 197 | x2 = min(x2, x2 - extra_w2) 198 | ch = img[:CAPTCHA_HEIGHT, x1:x2] 199 | 200 | y2 = CAPTCHA_HEIGHT - find_filled_row(ch[::-1]) 201 | y1 = max(0, y2 - CH_HEIGHT) 202 | ch = ch[y1:y2] 203 | 204 | chars.append(ch) 205 | if width > widest[0]: 206 | widest = x2 - x1, i 207 | x1 = next_x1 208 | 209 | # Fit chars into boxes. 210 | chars2 = [] 211 | for i, ch in enumerate(chars): 212 | widest_w, widest_i = widest 213 | # Split glued chars. 214 | if len(chars) < NUM_CHARS and i == widest_i: 215 | ch1 = ch[:, 0:widest_w // 2] 216 | ch2 = ch[:, widest_w // 2:widest_w] 217 | chars2.append(pad_ch(ch1)) 218 | chars2.append(pad_ch(ch2)) 219 | else: 220 | ch = ch[:, 0:CH_WIDTH] 221 | chars2.append(pad_ch(ch)) 222 | 223 | assert len(chars2) == NUM_CHARS, 'bad number of chars' 224 | return chars2 225 | 226 | 227 | def vis(fpath): 228 | def to_rgb(img): 229 | return cv2.merge([img] * 3) 230 | 231 | BOX_W = CAPTCHA_WIDTH // NUM_CHARS 232 | PAD_W = (BOX_W - CH_WIDTH) // 2 233 | PAD_H = (CAPTCHA_HEIGHT - CH_HEIGHT) // 2 234 | EXTRA_PAD_W = (CAPTCHA_WIDTH % NUM_CHARS) // 2 235 | HIGH_COLOR = (0, 255, 0) 236 | 237 | # Real result used for OCR. 238 | orig = get_image(fpath) 239 | try: 240 | ch_imgs = segment(orig) 241 | except Exception: 242 | traceback.print_exc() 243 | ch_imgs = [np.zeros((CH_HEIGHT, CH_WIDTH), dtype=np.uint8)] * NUM_CHARS 244 | 245 | # Visualizations. 246 | denoised = _denoise(orig) 247 | with_lines = to_rgb(denoised.copy()) 248 | for line in _get_lines(denoised): 249 | x1, y1, x2, y2 = line 250 | cv2.line(with_lines, (x1, y1), (x2, y2), HIGH_COLOR, LINE_THICK) 251 | processed = _preprocess(orig) 252 | # cv2.imwrite('vis.png', processed) 253 | with_rects = [np.pad(a, ((PAD_H,), (PAD_W,)), _CONSTANT) 254 | for a in ch_imgs] 255 | with_rects = np.concatenate(with_rects, axis=1) 256 | with_rects = np.pad(with_rects, ((0,), (EXTRA_PAD_W,)), _CONSTANT) 257 | with_rects = to_rgb(with_rects) 258 | for i in range(NUM_CHARS): 259 | x1 = i * BOX_W + PAD_W + EXTRA_PAD_W - 1 260 | x2 = x1 + CH_WIDTH + 1 261 | y1 = PAD_H - 1 262 | y2 = y1 + CH_HEIGHT + 1 263 | cv2.rectangle(with_rects, (x1, y1), (x2, y2), HIGH_COLOR, 1) 264 | 265 | res = np.concatenate(( 266 | to_rgb(orig), 267 | to_rgb(denoised), 268 | with_lines, 269 | to_rgb(processed), 270 | with_rects)) 271 | cv2.imshow('opencv-result', res) 272 | cv2.waitKey(0) 273 | cv2.destroyAllWindows() 274 | 275 | 276 | def train(captchas_dir): 277 | NUM_INPUT = CH_WIDTH * CH_HEIGHT 278 | NUM_NEURONS_HIDDEN = NUM_INPUT // 3 279 | NUM_OUTPUT = 10 280 | ann = libfann.neural_net() 281 | ann.create_standard_array((NUM_INPUT, NUM_NEURONS_HIDDEN, NUM_OUTPUT)) 282 | # ann.set_activation_function_hidden(libfann.SIGMOID) 283 | # ann.set_activation_function_output(libfann.SIGMOID) 284 | # ann.randomize_weights(0.0, 0.0) 285 | 286 | start = time.time() 287 | succeed = 0 288 | captchas_dir = os.path.abspath(captchas_dir) 289 | captchas = os.listdir(captchas_dir) 290 | report() 291 | for i, name in enumerate(captchas): 292 | answer = re.match(r'(\d{6})\.png$', name) 293 | if not answer: 294 | continue 295 | answer = answer.group(1) 296 | fpath = os.path.join(captchas_dir, name) 297 | try: 298 | img = get_image(fpath) 299 | ch_imgs = segment(img) 300 | for ch_img, digit in zip(ch_imgs, answer): 301 | ann.train(get_ch_data(ch_img), make_ann_output(digit)) 302 | except Exception as exc: 303 | report('Error occured while processing {}: {}'.format(name, exc)) 304 | report() 305 | else: 306 | succeed += 1 307 | report('{}/{}'.format(i + 1, len(captchas)), progress=True) 308 | runtime = time.time() - start 309 | report('Done training on {}/{} captchas in {:.3f} seconds'.format( 310 | succeed, len(captchas), runtime)) 311 | return ann 312 | 313 | 314 | def ocr(ann, img): 315 | def get_digit(ch_img): 316 | data = get_ch_data(ch_img) 317 | out = ann.run(data) 318 | return str(out.index(max(out))) 319 | return ''.join(map(get_digit, segment(img))) 320 | 321 | 322 | def ocr_bench(ann, captchas_dir): 323 | start = time.time() 324 | captchas_dir = os.path.abspath(captchas_dir) 325 | captchas = os.listdir(captchas_dir) 326 | correct = 0 327 | total = 0 328 | full = 0 329 | report() 330 | for i, name in enumerate(captchas): 331 | answer1 = re.match(r'(\d{6})\.png$', name) 332 | if not answer1: 333 | continue 334 | answer1 = answer1.group(1) 335 | total += NUM_CHARS 336 | fpath = os.path.join(captchas_dir, name) 337 | try: 338 | img = get_image(fpath) 339 | answer2 = ocr(ann, img) 340 | except Exception as exc: 341 | report('Error occured while processing {}: {}'.format(name, exc)) 342 | report() 343 | else: 344 | match = get_match(answer1, answer2) 345 | correct += match 346 | if match == NUM_CHARS: 347 | full += match 348 | report('{}/{}'.format(i + 1, len(captchas)), progress=True) 349 | runtime = time.time() - start 350 | report('{:.2f}% ({:.2f}% full) in {:.3f} seconds'.format( 351 | correct / total * 100, 352 | full / total * 100, 353 | runtime)) 354 | 355 | 356 | class FatalAntigateError(Exception): 357 | pass 358 | 359 | 360 | def antigate_ocr(api_key, data, timeout=90, ext='png', 361 | is_numeric=True, min_len=6, max_len=6, 362 | run=None): 363 | def check_run(): 364 | if run is None: 365 | return True 366 | else: 367 | return run.is_set() 368 | 369 | FIRST_SLEEP = 7 370 | ATTEMPT_SLEEP = 2 371 | start = datetime.now() 372 | 373 | # Uploading captcha. 374 | fields = {'key': api_key, 'method': 'post'} 375 | if is_numeric: 376 | fields['numeric'] = '1' 377 | if min_len: 378 | fields['min_len'] = str(min_len) 379 | if max_len: 380 | fields['max_len'] = str(max_len) 381 | files = {'file': ('captcha.' + ext, data)} 382 | res = requests.post('http://anti-captcha.com/in.php', 383 | data=fields, files=files).text 384 | if res in [ 385 | 'ERROR_WRONG_USER_KEY', 386 | 'ERROR_KEY_DOES_NOT_EXIST', 387 | 'ERROR_ZERO_BALANCE', 388 | 'ERROR_IP_NOT_ALLOWED', 389 | ]: 390 | raise FatalAntigateError(res) 391 | elif not res.startswith('OK|'): 392 | raise Exception(res) 393 | captcha_id = res[3:] 394 | 395 | # Getting captcha text. 396 | fields2 = { 397 | 'key': api_key, 398 | 'action': 'get', 399 | 'id': captcha_id, 400 | } 401 | time.sleep(FIRST_SLEEP) 402 | while check_run(): 403 | res = requests.get('http://anti-captcha.com/res.php', 404 | params=fields2).text 405 | if res.startswith('OK|'): 406 | return res[3:] 407 | elif res == 'CAPCHA_NOT_READY': 408 | delta = datetime.now() - start 409 | if delta.seconds >= timeout or delta.days > 0: 410 | raise Exception('getting captcha text timeout') 411 | time.sleep(ATTEMPT_SLEEP) 412 | else: 413 | raise Exception(res) 414 | raise Exception('antigate_ocr canceled') 415 | 416 | 417 | def get_captcha(): 418 | CAPTCHA_URL = 'https://2ch.hk/makaba/captcha.fcgi' 419 | CAPTCHA_FIELDS = { 420 | 'type': '2chaptcha', 421 | 'action': 'thread', 422 | 'board': 's', 423 | } 424 | CHROME_UA = ( 425 | 'Mozilla/5.0 (Windows NT 6.1; WOW64) ' + 426 | 'AppleWebKit/537.36 (KHTML, like Gecko) ' + 427 | 'Chrome/48.0.2564.116 Safari/537.36' 428 | ) 429 | CAPTCHA_HEADERS = { 430 | 'User-Agent': CHROME_UA, 431 | 'Referer': 'https://2ch.hk/s/', 432 | } 433 | res = requests.get(CAPTCHA_URL, params=CAPTCHA_FIELDS, 434 | headers=CAPTCHA_HEADERS).text 435 | if not res.startswith('CHECK\n'): 436 | raise Exception('bad answer on captcha request') 437 | captcha_id = res[6:] 438 | fields2 = { 439 | 'type': '2chaptcha', 440 | 'action': 'image', 441 | 'id': captcha_id, 442 | } 443 | r = requests.get(CAPTCHA_URL, params=fields2, headers=CAPTCHA_HEADERS) 444 | if r.headers.get('Content-Type') != 'image/png': 445 | raise Exception('bad captcha result') 446 | return r.content 447 | 448 | 449 | def collect(run, captchas_dir, tmp_path, api_key): 450 | while run.is_set(): 451 | try: 452 | data = get_captcha() 453 | answer = antigate_ocr(api_key, data, run=run) 454 | if not re.match(r'\d{6}$', answer): 455 | raise Exception('bad antigate answer {}'.format(answer)) 456 | name = answer + '.png' 457 | fpath = os.path.join(captchas_dir, name) 458 | # In order to not leave partial files. 459 | open(tmp_path, 'wb').write(data) 460 | os.rename(tmp_path, fpath) 461 | except FatalAntigateError as exc: 462 | if run.is_set(): 463 | report('Fatal antigate error ({}), exiting'.format(exc)) 464 | run.clear() 465 | return 466 | except Exception as exc: 467 | report('Error occured while collecting: {}'.format(exc)) 468 | else: 469 | report('Saved {}'.format(name)) 470 | # Just in case antigate response was too fast. 471 | time.sleep(1) 472 | 473 | 474 | def run_collect_threads(captchas_dir, api_key): 475 | NUM_THREADS = 10 476 | SPAWN_DELAY = 0.5 477 | 478 | threads = [] 479 | run = threading.Event() 480 | run.set() 481 | captchas_dir = os.path.abspath(captchas_dir) 482 | mkdirp(captchas_dir) 483 | 484 | for i in range(NUM_THREADS): 485 | tmp_path = os.path.join(captchas_dir, '.{}.tmp'.format(i)) 486 | thread = threading.Thread(target=collect, 487 | args=(run, captchas_dir, tmp_path, api_key)) 488 | threads.append(thread) 489 | thread.start() 490 | # Slightly smooth initial fetch burst. 491 | time.sleep(SPAWN_DELAY) 492 | 493 | try: 494 | while run.is_set(): 495 | time.sleep(1) 496 | except KeyboardInterrupt: 497 | report('Closing threads') 498 | run.clear() 499 | for thread in threads: 500 | thread.join() 501 | 502 | 503 | @bottle.post('/ocr') 504 | def serve(): 505 | bottle.response.set_header('Access-Control-Allow-Origin', '*') 506 | try: 507 | fh = bottle.request.files['file'].file 508 | except Exception: 509 | bottle.abort(400, 'No file provided.') 510 | # TODO: Probably there is a better way to store obj ref? 511 | ann = bottle.default_app().ann 512 | img = decode_image(np.fromfile(fh, dtype=np.uint8)) 513 | return ocr(ann, img) 514 | 515 | 516 | def create_app(): 517 | app = bottle.default_app() 518 | app.ann = get_network(os.environ['CHAPTCHA_NETFILE']) 519 | return app 520 | 521 | 522 | def main(): 523 | doc = __doc__.format(title=__title__) 524 | parser = argparse.ArgumentParser( 525 | prog=__title__, 526 | description=doc, 527 | formatter_class=argparse.RawTextHelpFormatter) 528 | parser.add_argument( 529 | 'mode', 530 | choices=['vis', 'collect', 'train', 'ocr', 'ocr-bench', 'serve'], 531 | help='operational mode') 532 | parser.add_argument( 533 | '-V', '--version', 534 | action='version', 535 | version='%(prog)s ' + __version__) 536 | parser.add_argument( 537 | '-i', dest='infile', metavar='infile', 538 | help='input file/directory') 539 | parser.add_argument( 540 | '-o', dest='outfile', metavar='outfile', 541 | help='output file/directory') 542 | parser.add_argument( 543 | '-k', dest='keyfile', metavar='keyfile', 544 | help='antigate key file') 545 | parser.add_argument( 546 | '-n', dest='netfile', metavar='netfile', 547 | help='neural network') 548 | parser.add_argument( 549 | '-b', dest='host', metavar='host', 550 | default='127.0.0.1', 551 | help='listening address (default: %(default)s)') 552 | parser.add_argument( 553 | '-p', dest='port', metavar='port', 554 | type=int, default=28228, 555 | help='listening port (default: %(default)s)') 556 | opts = parser.parse_args(sys.argv[1:]) 557 | if opts.mode == 'vis': 558 | if opts.infile is None: 559 | parser.error('specify input captcha') 560 | vis(opts.infile) 561 | elif opts.mode == 'train': 562 | if opts.infile is None: 563 | parser.error('specify input directory with captchas') 564 | if opts.outfile is None: 565 | parser.error('specify output file for network data') 566 | ann = train(opts.infile) 567 | ann.save(opts.outfile) 568 | elif opts.mode == 'ocr': 569 | if opts.infile is None: 570 | parser.error('specify input captcha') 571 | if opts.netfile is None: 572 | parser.error('specify network file') 573 | ann = get_network(opts.netfile) 574 | img = get_image(opts.infile) 575 | print(ocr(ann, img)) 576 | elif opts.mode == 'ocr-bench': 577 | if opts.infile is None: 578 | parser.error('specify input directory with captchas') 579 | if opts.netfile is None: 580 | parser.error('specify network file') 581 | ann = get_network(opts.netfile) 582 | ocr_bench(ann, opts.infile) 583 | elif opts.mode == 'collect': 584 | if opts.outfile is None: 585 | parser.error('specify output directory for captchas') 586 | if opts.keyfile is None: 587 | parser.error('specify antigate key file') 588 | api_key = open(opts.keyfile, 'r').read().strip() 589 | run_collect_threads(opts.outfile, api_key) 590 | elif opts.mode == 'serve': 591 | if opts.netfile is None: 592 | parser.error('specify network file') 593 | ann = get_network(opts.netfile) 594 | bottle.default_app().ann = ann 595 | bottle.run(host=opts.host, port=opts.port) 596 | 597 | 598 | if __name__ == '__main__': 599 | main() 600 | --------------------------------------------------------------------------------