├── .gitignore ├── README.md ├── captcha.png ├── captcha2.png ├── code.jpg ├── code2.jpg ├── main.py ├── verify.py ├── verify2.py ├── verify3.py ├── verify4.py ├── verify5.py └── verify6.py /.gitignore: -------------------------------------------------------------------------------- 1 | /.idea 2 | *.pyc -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # CrackImageCode 2 | Crack Image Code by Tesserocr 3 | -------------------------------------------------------------------------------- /captcha.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Python3WebSpider/CrackImageCaptcha/be07c72646c1879aaae379d8981902ae3a1fba2f/captcha.png -------------------------------------------------------------------------------- /captcha2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Python3WebSpider/CrackImageCaptcha/be07c72646c1879aaae379d8981902ae3a1fba2f/captcha2.png -------------------------------------------------------------------------------- /code.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Python3WebSpider/CrackImageCaptcha/be07c72646c1879aaae379d8981902ae3a1fba2f/code.jpg -------------------------------------------------------------------------------- /code2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Python3WebSpider/CrackImageCaptcha/be07c72646c1879aaae379d8981902ae3a1fba2f/code2.jpg -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- 1 | import time 2 | import re 3 | import tesserocr 4 | from selenium import webdriver 5 | from io import BytesIO 6 | from PIL import Image 7 | from retrying import retry 8 | from selenium.webdriver.support.wait import WebDriverWait 9 | from selenium.webdriver.support import expected_conditions as EC 10 | from selenium.webdriver.common.by import By 11 | from selenium.common.exceptions import TimeoutException 12 | import numpy as np 13 | 14 | 15 | def preprocess(image): 16 | image = image.convert('L') 17 | array = np.array(image) 18 | array = np.where(array > 50, 255, 0) 19 | image = Image.fromarray(array.astype('uint8')) 20 | return image 21 | 22 | 23 | @retry(stop_max_attempt_number=10, retry_on_result=lambda x: x is False) 24 | def login(): 25 | browser.get('https://captcha7.scrape.center/') 26 | browser.find_element_by_css_selector('.username input[type="text"]').send_keys('admin') 27 | browser.find_element_by_css_selector('.password input[type="password"]').send_keys('admin') 28 | captcha = browser.find_element_by_css_selector('#captcha') 29 | image = Image.open(BytesIO(captcha.screenshot_as_png)) 30 | image = preprocess(image) 31 | captcha = tesserocr.image_to_text(image) 32 | captcha = re.sub('[^A-Za-z0-9]', '', captcha) 33 | browser.find_element_by_css_selector('.captcha input[type="text"]').send_keys(captcha) 34 | browser.find_element_by_css_selector('.login').click() 35 | try: 36 | WebDriverWait(browser, 10).until(EC.presence_of_element_located((By.XPATH, '//h2[contains(., "登录成功")]'))) 37 | time.sleep(10) 38 | browser.close() 39 | return True 40 | except TimeoutException: 41 | return False 42 | 43 | 44 | if __name__ == '__main__': 45 | browser = webdriver.Chrome() 46 | login() 47 | -------------------------------------------------------------------------------- /verify.py: -------------------------------------------------------------------------------- 1 | import tesserocr 2 | from PIL import Image 3 | 4 | image = Image.open('code2.jpg') 5 | result = tesserocr.image_to_text(image) 6 | print(result) 7 | -------------------------------------------------------------------------------- /verify2.py: -------------------------------------------------------------------------------- 1 | import tesserocr 2 | 3 | print(tesserocr.file_to_text('code.jpg')) -------------------------------------------------------------------------------- /verify3.py: -------------------------------------------------------------------------------- 1 | import tesserocr 2 | from PIL import Image 3 | 4 | image = Image.open('code2.jpg') 5 | 6 | image = image.convert('L') 7 | threshold = 127 8 | table = [] 9 | for i in range(256): 10 | if i < threshold: 11 | table.append(0) 12 | else: 13 | table.append(1) 14 | 15 | image = image.point(table, '1') 16 | image.show() 17 | 18 | result = tesserocr.image_to_text(image) 19 | print(result) 20 | -------------------------------------------------------------------------------- /verify4.py: -------------------------------------------------------------------------------- 1 | import tesserocr 2 | from PIL import Image 3 | 4 | image = Image.open('captcha2.png') 5 | result = tesserocr.image_to_text(image) 6 | print(result) 7 | -------------------------------------------------------------------------------- /verify5.py: -------------------------------------------------------------------------------- 1 | import tesserocr 2 | print(tesserocr.file_to_text('captcha2.png')) 3 | -------------------------------------------------------------------------------- /verify6.py: -------------------------------------------------------------------------------- 1 | import tesserocr 2 | from PIL import Image 3 | import numpy as np 4 | 5 | image = Image.open('captcha2.png') 6 | image = image.convert('L') 7 | threshold = 50 8 | array = np.array(image) 9 | array = np.where(array > threshold, 255, 0) 10 | image = Image.fromarray(array.astype('uint8')) 11 | print(tesserocr.image_to_text(image)) 12 | --------------------------------------------------------------------------------