├── README.md ├── geetest_crack_gsxt.py ├── geetest_crack_gsxt_v2.py ├── geetest_crack_mobile.py ├── img1.jpg ├── img2.jpg ├── test_track_generator.ipynb └── track_record /README.md: -------------------------------------------------------------------------------- 1 | # Interesting idea: 2 | 3 | Use Deep Reinforcement Learning to solve this problem, just like what they did in [OpenAI Gym](https://gym.openai.com/) ?? 4 | 5 | 6 | # Geetest_GSXT_Captcha_Crack 7 | 8 | geetest_crack_gsxt.py 9 | 10 | Use Chrome as webdriver. 11 | 12 | Crack Gongshang website with Selenium and Chrome. 13 | 14 | TODO: 15 | 16 | [DONE] Modify code to record tracks (pass or fail) 17 | 18 | [TODO] Train a machine learning model to classify tracks, and use the model to generate better tracks. 19 | 20 | Reference: http://www.gsxt.gov.cn/index 21 | 22 | # Geetest_Mobile_Captcha_Crack 23 | 24 | geetest_crack_mobile.py (unmaintained) 25 | 26 | Please use Firefox 45.0. Newest version of Firefox may cause problems. 27 | 28 | Crack Mobile Version of Geetest Captcha with Firefox and PhantomJS 29 | 30 | Reference: http://www.geetest.com/mobile-pc 31 | 32 | 33 | -------------------------------------------------------------------------------- /geetest_crack_gsxt.py: -------------------------------------------------------------------------------- 1 | #-*-coding:utf-8-*- 2 | import requests 3 | import re 4 | import StringIO 5 | from PIL import Image 6 | import random 7 | import math 8 | import time 9 | from selenium.webdriver.common.desired_capabilities import DesiredCapabilities 10 | from selenium.webdriver.support.ui import WebDriverWait 11 | from selenium.webdriver.support import expected_conditions as EC 12 | from selenium.webdriver.common.by import By 13 | from selenium.webdriver.common.action_chains import ActionChains 14 | from selenium import webdriver 15 | from bs4 import BeautifulSoup 16 | from subprocess import call 17 | debug = True 18 | 19 | class crack_picture(object): 20 | def __init__(self, img_url1, img_url2): 21 | self.img1, self.img2 = self.picture_get(img_url1, img_url2) 22 | 23 | 24 | def picture_get(self, img_url1, img_url2): 25 | hd = {"Host": "static.geetest.com", 26 | "User-Agent": "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36"} 27 | img1 = StringIO.StringIO(self.repeat(img_url1, hd).content) 28 | img2 = StringIO.StringIO(self.repeat(img_url2, hd).content) 29 | return img1, img2 30 | 31 | 32 | def repeat(self, url, hd): 33 | times = 10 34 | while times > 0: 35 | try: 36 | ans = requests.get(url, headers=hd) 37 | return ans 38 | except: 39 | times -= 1 40 | 41 | 42 | def pictures_recover(self): 43 | xpos = self.judge(self.picture_recover(self.img1, 'img1.jpg'), self.picture_recover(self.img2, 'img2.jpg')) - 6 44 | return self.geetest_track_int(xpos) #_int, _float, _test 45 | 46 | 47 | def picture_recover(self, img, name): 48 | a =[39, 38, 48, 49, 41, 40, 46, 47, 35, 34, 50, 51, 33, 32, 28, 29, 27, 26, 36, 37, 31, 30, 44, 45, 43, 42, 12, 13, 23, 22, 14, 15, 21, 20, 8, 9, 25, 24, 6, 7, 3, 2, 0, 1, 11, 10, 4, 5, 19, 18, 16, 17] 49 | im = Image.open(img) 50 | im_new = Image.new("RGB", (260, 116)) 51 | for row in range(2): 52 | for column in range(26): 53 | right = a[row*26+column] % 26 * 12 + 1 54 | down = 58 if a[row*26+column] > 25 else 0 55 | for w in range(10): 56 | for h in range(58): 57 | ht = 58 * row + h 58 | wd = 10 * column + w 59 | im_new.putpixel((wd, ht), im.getpixel((w + right, h + down))) 60 | im_new.save(name) 61 | return im_new 62 | 63 | def geetest_track_float(self, distance): #Fail, cannot move non-int pixel steps 64 | print "generating track..." 65 | come_back = random.uniform(-2,3) 66 | cur_loc = 0 67 | track_list = [] 68 | magic_ratio = 1 69 | if distance < 50: 70 | magic_ratio = 1. 71 | else: 72 | magic_ratio = distance/50. 73 | print magic_ratio 74 | while cur_loc < distance * 1 / 4: 75 | track = random.uniform(2*magic_ratio, 4*magic_ratio) 76 | sleep_time = random.randint(10, 50) / 1000. 77 | track_list.append([track, 0.5, sleep_time]) 78 | cur_loc = cur_loc + track 79 | if len(track_list) > 50: 80 | print "whoops1!" 81 | print track_list 82 | return 83 | 84 | while cur_loc < distance * 2 / 4: 85 | track = random.uniform(4*magic_ratio, 6*magic_ratio) 86 | sleep_time = random.randint(10, 50) / 5000. 87 | track_list.append([track, 0.3, sleep_time]) 88 | cur_loc = cur_loc + track 89 | if len(track_list) > 50: 90 | print "whoops2!" 91 | print track_list 92 | return 93 | 94 | while cur_loc < distance * 3 / 4: 95 | track = random.uniform(3*magic_ratio, 5*magic_ratio) 96 | sleep_time = random.randint(10, 50) / 4000. 97 | track_list.append([track, 0.6, sleep_time]) 98 | cur_loc = cur_loc + track 99 | if len(track_list) > 50: 100 | print "whoops3!" 101 | print track_list 102 | return 103 | 104 | while cur_loc < distance + come_back: 105 | track = random.uniform(2*magic_ratio, 4*magic_ratio) 106 | sleep_time = random.randint(10, 50) / 500. 107 | track_list.append([track, 0.5, sleep_time]) 108 | cur_loc = cur_loc + track 109 | if len(track_list) > 50: 110 | print "whoops4!" 111 | print track_list 112 | return 113 | 114 | #Final Adjustment 115 | dist = 999 116 | while abs(dist) > 1: 117 | dist = cur_loc - distance 118 | if dist > 0: 119 | track = -1 * random.uniform(0.5, 2) 120 | else: 121 | track = random.uniform(0.5, 2) 122 | cur_loc = cur_loc + track 123 | sleep_time = random.randint(10, 30) / 100. 124 | track_list.append([track, 0.5, sleep_time]) 125 | if len(track_list) > 50: 126 | print "whoops5!" 127 | print track_list 128 | return 129 | return track_list 130 | 131 | def geetest_track_int(self, distance): 132 | print "generate track..." 133 | come_back = random.randint(-2,3) 134 | cur_loc = 0 135 | track_list = [] 136 | magic_ratio = 1 137 | if distance < 100: 138 | magic_ratio = 1 139 | #else: 140 | # magic_ratio = distance/100. 141 | time_ratio = 1 142 | while cur_loc < distance * 1 / 4: 143 | track = random.randint(math.floor(2*magic_ratio), math.floor(4*magic_ratio)) 144 | sleep_time = random.randint(10, 50) / 1000.*time_ratio 145 | track_list.append([track, 0.5, sleep_time]) 146 | cur_loc = cur_loc + track 147 | if len(track_list) > 100: 148 | print "whoops1!" 149 | print track_list 150 | return 151 | 152 | while cur_loc < distance * 2 / 4: 153 | track = random.randint(math.floor(4*magic_ratio), math.floor(6*magic_ratio)) 154 | sleep_time = random.randint(10, 50) / 5000.*time_ratio 155 | track_list.append([track, 0.3, sleep_time]) 156 | cur_loc = cur_loc + track 157 | if len(track_list) > 100: 158 | print "whoops2!" 159 | print track_list 160 | return 161 | 162 | while cur_loc < distance * 3 / 4: 163 | track = random.randint(math.floor(3*magic_ratio), math.floor(5*magic_ratio)) 164 | sleep_time = random.randint(10, 50) / 4000.*time_ratio 165 | track_list.append([track, 0.6, sleep_time]) 166 | cur_loc = cur_loc + track 167 | if len(track_list) > 100: 168 | print "whoops3!" 169 | print track_list 170 | return 171 | 172 | while cur_loc < distance + come_back: 173 | track = random.randint(math.floor(2*magic_ratio), math.floor(4*magic_ratio)) 174 | sleep_time = random.randint(10, 50) / 500.*time_ratio 175 | track_list.append([track, 0.5, sleep_time]) 176 | cur_loc = cur_loc + track 177 | if len(track_list) > 100: 178 | print "whoops4!" 179 | print track_list 180 | return 181 | 182 | #Final Adjustment 183 | dist = 999 184 | while abs(dist) > 2: 185 | dist = cur_loc - distance 186 | if dist > 0: 187 | track = -1 * random.randint(0, 1) 188 | else: 189 | track = random.randint(0, 1) 190 | cur_loc = cur_loc + track 191 | sleep_time = random.randint(10, 30) / 100.*time_ratio 192 | track_list.append([track, 0.5, sleep_time]) 193 | if len(track_list) > 100: 194 | print "whoops5!" 195 | print track_list 196 | return 197 | return track_list 198 | 199 | 200 | def geetest_track_test(self, distance): 201 | return [[distance, 0.5, 1]] 202 | #crucial trace code was deleted 203 | #tip-->> 1. to generate the trace array randomly 204 | # 2. to collect trace array manually 205 | 206 | 207 | 208 | def diff(self, img1, img2, wd, ht): 209 | rgb1 = img1.getpixel((wd, ht)) 210 | rgb2 = img2.getpixel((wd, ht)) 211 | tmp = reduce(lambda x,y: x+y, map(lambda x: abs(x[0]-x[1]), zip(rgb1, rgb2))) 212 | return True if tmp >= 200 else False 213 | 214 | 215 | def col(self, img1, img2, cl): 216 | for i in range(img2.size[1]): 217 | if self.diff(img1, img2, cl, i): 218 | return True 219 | return False 220 | 221 | 222 | def judge(self, img1, img2): 223 | for i in range(img2.size[0]): 224 | if self.col(img1, img2, i): 225 | return i 226 | return -1 227 | 228 | 229 | class gsxt(object): 230 | def __init__(self, br_name="phantomjs"): 231 | self.br = self.get_webdriver(br_name) 232 | self.wait = WebDriverWait(self.br, 10, 1.0) 233 | self.br.set_page_load_timeout(8) 234 | self.br.set_script_timeout(8) 235 | 236 | 237 | def input_params(self, name): 238 | self.br.get("http://www.gsxt.gov.cn/index") 239 | element = self.wait_for(By.ID, "keyword") 240 | element.send_keys(name) 241 | time.sleep(1.1) 242 | element = self.wait_for(By.ID, "btn_query") 243 | element.click() 244 | time.sleep(1.1) 245 | 246 | 247 | def drag_pic(self): 248 | return (self.find_img_url(self.wait_for(By.CLASS_NAME, "gt_cut_fullbg_slice")), 249 | self.find_img_url(self.wait_for(By.CLASS_NAME, "gt_cut_bg_slice"))) 250 | 251 | 252 | def wait_for(self, by1, by2): 253 | return self.wait.until(EC.presence_of_element_located((by1, by2))) 254 | 255 | 256 | def find_img_url(self, element): 257 | try: 258 | return re.findall('url\("(.*?)"\)', element.get_attribute('style'))[0].replace("webp", "jpg") 259 | except: 260 | return re.findall('url\((.*?)\)', element.get_attribute('style'))[0].replace("webp", "jpg") 261 | 262 | 263 | def emulate_track(self, tracks): 264 | element = self.br.find_element_by_class_name("gt_slider_knob") 265 | ActionChains(self.br).click_and_hold(on_element=element).perform() 266 | for x, y, t in tracks: 267 | print x, y ,t 268 | ActionChains(self.br).move_to_element_with_offset( 269 | to_element=element, 270 | xoffset=x+22., 271 | yoffset=y+22.).perform() 272 | ActionChains(self.br).click_and_hold().perform() 273 | time.sleep(t) 274 | time.sleep(0.24) 275 | ActionChains(self.br).release(on_element=element).perform() 276 | time.sleep(0.8) 277 | element = self.wait_for(By.CLASS_NAME, "gt_info_text") 278 | ans = element.text.encode("utf-8") 279 | print ans 280 | return ans 281 | 282 | 283 | def run(self): 284 | while True: 285 | for i in [u'招商银行', u'交通银行', u'中国银行']: 286 | self.hack_geetest(i) 287 | time.sleep(1) 288 | self.quit_webdriver() 289 | 290 | 291 | def hack_geetest(self, company=u"招商银行"): 292 | flag = True 293 | self.input_params(company) 294 | fail_count = 0 295 | outfile = open('track_record', 'a') 296 | while flag: 297 | img_url1, img_url2 = self.drag_pic() 298 | tracks = crack_picture(img_url1, img_url2).pictures_recover() 299 | tsb = self.emulate_track(tracks) 300 | #print "hahaha" 301 | #print tsb 302 | 303 | if '通过' in tsb: 304 | time.sleep(1) 305 | print >> outfile, 'True:' + str(tracks) 306 | soup = BeautifulSoup(self.br.page_source, 'html.parser') 307 | for sp in soup.find_all("a", attrs={"class":"search_list_item"}): 308 | print re.sub("\s+", "", sp.get_text().encode("utf-8")) 309 | #print sp.get_text() 310 | break 311 | elif '吃' or '失败' in tsb: 312 | print >> outfile, 'False:' + str(tracks) 313 | fail_count += 1 314 | if fail_count > 4: 315 | flag = False 316 | time.sleep(5) 317 | else: 318 | self.input_params(company) 319 | 320 | 321 | def quit_webdriver(self): 322 | self.br.quit() 323 | 324 | 325 | def get_webdriver(self, name): 326 | if name.lower() == "phantomjs": 327 | exe_path = '/home/guan/Software/phantomjs-2.1.1-linux-x86_64/bin/phantomjs' 328 | dcap = dict(DesiredCapabilities.PHANTOMJS) 329 | dcap["phantomjs.page.settings.userAgent"] = ( 330 | "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.98 Safari/537.36") 331 | return webdriver.PhantomJS(desired_capabilities=dcap, executable_path=exe_path) 332 | 333 | elif name.lower() == "chrome": 334 | return webdriver.Chrome("/usr/lib/chromium-browser/chromedriver") 335 | 336 | 337 | if __name__ == "__main__": 338 | #print crack_picture("http://static.geetest.com/pictures/gt/fc064fc73/fc064fc73.jpg", "http://static.geetest.com/pictures/gt/fc064fc73/bg/7ca363b09.jpg").pictures_recover() 339 | while True: 340 | try: 341 | gsxt("chrome").run() 342 | except Exception as e: 343 | print e 344 | call("kill $(ps ax | grep chromedriver | awk '{print $1}')", shell=True) 345 | call("kill $(ps ax | grep chromium | awk '{print $1}')", shell=True) 346 | time.sleep(30) 347 | continue 348 | 349 | 350 | #gsxt("phantomjs").run() 351 | 352 | 353 | 354 | -------------------------------------------------------------------------------- /geetest_crack_gsxt_v2.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import os 3 | import re 4 | import time 5 | from selenium import webdriver 6 | from io import BytesIO 7 | 8 | from PIL import Image 9 | from selenium.webdriver.common.action_chains import ActionChains 10 | from selenium.webdriver.common.by import By 11 | from selenium.webdriver.support.ui import WebDriverWait 12 | from selenium.webdriver.support import expected_conditions as EC 13 | import random 14 | from subprocess import call 15 | from bs4 import BeautifulSoup 16 | 17 | os.chdir('.') 18 | 19 | PROXY = "120.199.224.78:80" 20 | chrome_options = webdriver.ChromeOptions() 21 | chrome_options.add_argument('--proxy-server=%s' % PROXY) 22 | 23 | driver = webdriver.Chrome("/usr/lib/chromium-browser/chromedriver", chrome_options=chrome_options) 24 | #driver = webdriver.Chrome("/usr/lib/chromium-browser/chromedriver") 25 | 26 | global try1 27 | try1 = 0 28 | global success 29 | success = 0 30 | 31 | 32 | # driver.get("http://www.gsxt.gov.cn/index") 33 | # wait = WebDriverWait(driver, 10, 1.0) 34 | # element = wait.until(EC.presence_of_element_located((By.ID, "keyword"))) 35 | # element.send_keys(u"小桔科技") 36 | # time.sleep(1.1) 37 | # element = wait.until(EC.presence_of_element_located((By.ID, "btn_query"))) 38 | # element.click() 39 | # time.sleep(1.1) 40 | 41 | 42 | def get_captcha_image(filename): 43 | screenshot = driver.get_screenshot_as_png() 44 | screenshot = Image.open(BytesIO(screenshot)) 45 | # screenshot.show() 46 | 47 | captcha_el = driver.find_element_by_class_name("gt_box") 48 | location = captcha_el.location 49 | size = captcha_el.size 50 | left = location['x'] 51 | top = location['y'] 52 | right = location['x'] + size['width'] 53 | bottom = location['y'] + size['height'] 54 | box = (left, top, right, bottom) 55 | print(box) 56 | # if box[0] == 0: 57 | # raise(Exception('=======')) 58 | captcha_image = screenshot.crop(box) 59 | captcha_image.save(filename) # "%s.png" % uuid.uuid4().hex 60 | # print(u'截图成功') 61 | 62 | 63 | def find_offset(diff_image, offset_=62): 64 | d = diff_image.convert("L").point(lambda i: i > 52, mode='1') 65 | d.save('x-%s.png' % time.strftime("%Y%m%d-%H%M%S")) 66 | b1 = d.getbbox() # left, upper, right, and lower pixel coordinate 67 | # offset_ = 65 68 | b2 = d.crop((offset_, 0, d.width, d.height)).getbbox() 69 | global first_left 70 | first_left = b1[0] 71 | offset = b2[0] + offset_ - b1[0] - 2 72 | if b2[0] <= 4: 73 | offset = -1 74 | return offset 75 | # diff = diff_image.load() 76 | # http://stackoverflow.com/questions/9038160/break-two-for-loops 77 | # for x in range(61, width): 78 | # for y in range(height): 79 | # if all(i > 40 for i in diff[x, y]): 80 | # return x - 6 81 | 82 | 83 | def get_offsets(offset, setpointX): 84 | ''' 85 | 切记不能移动小数个像素位置 86 | ''' 87 | kp = 3.0 88 | ki = 0.0001 89 | kd = 80.0 90 | 91 | x = 0 92 | vx = 0 93 | prevErrorX = 0 94 | integralX = 0 95 | derivativeX = 0 96 | 97 | while 1: 98 | if x >= setpointX: 99 | break 100 | 101 | errorX = setpointX - x 102 | # print('xxxxx - ', x) 103 | integralX += errorX 104 | derivativeX = errorX - prevErrorX 105 | prevErrorX = errorX 106 | if offset < 100: 107 | K = 0.007 108 | elif offset < 180: 109 | K = 0.006 110 | else: 111 | K = 0.005 112 | ax = K * (kp * errorX + ki * integralX + kd * derivativeX) 113 | vx += ax 114 | 115 | if x + vx > setpointX: 116 | vx = setpointX - x 117 | vx = int(vx) 118 | if vx < 1: 119 | vx = random.randint(1, 3) 120 | yield vx 121 | print('vvvvv - ', vx) 122 | x += vx 123 | 124 | 125 | def get_offsets_back(goal): 126 | x = 0 127 | while 1: 128 | if x >= goal: 129 | break 130 | dx = random.randint(10, 50) 131 | if x + dx > goal: 132 | dx = goal - x 133 | yield dx 134 | x += dx 135 | 136 | 137 | def go(driver, name): 138 | driver.get("http://www.gsxt.gov.cn/index") 139 | wait = WebDriverWait(driver, 10, 1.0) 140 | element = wait.until(EC.presence_of_element_located((By.ID, "keyword"))) 141 | element.send_keys(name) 142 | time.sleep(1.1) 143 | element = wait.until(EC.presence_of_element_located((By.ID, "btn_query"))) 144 | element.click() 145 | time.sleep(1.1) 146 | go = True 147 | while go: 148 | 149 | time.sleep(0.2) 150 | # 设定窗口大小 151 | width = 1280 152 | height = 800 153 | driver.set_window_size(width, height) 154 | 155 | time.sleep(1) 156 | WebDriverWait(driver, 8).until( 157 | EC.presence_of_element_located((By.CLASS_NAME, "gt_box"))) 158 | knob = driver.find_element_by_class_name("gt_slider_knob") 159 | action = ActionChains(driver) 160 | action.move_to_element_with_offset(knob, 21, 21).perform() # # knob(21,21)的点,可移动到write上 161 | time.sleep(1) 162 | f_file = 'f-%s.png' % time.strftime("%Y%m%d-%H%M%S") 163 | get_captcha_image(f_file) 164 | ActionChains(driver).click_and_hold().perform() 165 | time.sleep(0.5) 166 | # action.drag_and_drop_by_offset(knob, x_offset, y_offset).perform() 167 | s_file = 's-%s.png' % time.strftime("%Y%m%d-%H%M%S") 168 | get_captcha_image(s_file) 169 | # action.move_by_offset(50, 0).release().perform() 170 | # action.reset_actions() 171 | 172 | # -------------------------------------------------------------- 173 | 174 | from PIL import Image, ImageFilter 175 | from PIL import ImageChops 176 | 177 | # 直观感受图片差异 178 | image_f = Image.open(f_file) 179 | image_s = Image.open(s_file) 180 | diff = ImageChops.difference(image_f, image_s) 181 | 182 | # ----------------------显示图片debug---------------------------- 183 | 184 | # -------------------------debug-------------------------------- 185 | global first_left 186 | first_left = 0 187 | 188 | offset = find_offset(diff) 189 | if offset < 0: 190 | # 拖动滑块到右方160像素处保持并截图 191 | ActionChains(driver).move_by_offset(160, 0).perform() 192 | time.sleep(0.5) 193 | # action.drag_and_drop_by_offset(knob, x_offset, y_offset).perform() 194 | s_file = 's-%s.png' % time.strftime("%Y%m%d-%H%M%S") 195 | get_captcha_image(s_file) 196 | # 放下 197 | ActionChains(driver).release().perform() 198 | image_s = Image.open(s_file) 199 | diff = ImageChops.difference(image_f, image_s) 200 | d = diff.convert("L").point(lambda i: i > 60, mode='1') 201 | offset = d.getbbox()[0] - first_left 202 | time.sleep(2.5) 203 | ActionChains(driver).move_to_element_with_offset( 204 | knob, 21, 21).click_and_hold().perform() 205 | time.sleep(0.5) 206 | print(offset) 207 | 208 | for o in get_offsets(offset, offset): 209 | y = random.randint(-1, 1) 210 | ActionChains(driver).move_by_offset(o, y).perform() 211 | # time.sleep(0.03) 212 | time.sleep(random.randint(2, 4) / 100) 213 | ActionChains(driver).release().perform() 214 | # action.drag_and_drop_by_offset(knob, offset, 0).perform() 215 | time.sleep(3) 216 | driver.refresh() 217 | #tsb = wait.until(EC.presence_of_element_located((By.CLASS_NAME, "gt_info_text"))) 218 | #ans = element.text.encode("utf-8") 219 | #global try1 220 | #global success 221 | 222 | #if '通过' in tsb: 223 | # try1 = try1 + 1 224 | # success = success + 1 225 | # time.sleep(1) 226 | # soup = BeautifulSoup(wait.page_source, 'html.parser') 227 | # for sp in soup.find_all("a", attrs={"class": "search_list_item"}): 228 | # print re.sub("\s+", "", sp.get_text().encode("utf-8")) 229 | # print sp.get_text() 230 | # print("try:", try1, "success:", success) 231 | # break 232 | #elif '吃' or '失败' in tsb: 233 | # try1 = try1 + 1 234 | # print("try:", try1, "success:", success) 235 | 236 | go = False 237 | 238 | 239 | while True: 240 | try: 241 | for i in [u'招商银行', u'交通银行', u'中国银行']: 242 | go(driver, i) 243 | time.sleep(1) 244 | except Exception as e: 245 | print e 246 | call("kill $(ps ax | grep chromedriver | awk '{print $1}')", shell=True) 247 | call("kill $(ps ax | grep chromium | awk '{print $1}')", shell=True) 248 | time.sleep(30) 249 | continue 250 | -------------------------------------------------------------------------------- /geetest_crack_mobile.py: -------------------------------------------------------------------------------- 1 | #!/usr/local/bin/python 2 | # -*- coding: utf8 -*- 3 | 4 | 5 | 6 | from selenium import webdriver 7 | from selenium.webdriver.support.ui import WebDriverWait 8 | from selenium.webdriver.common.action_chains import ActionChains 9 | from PIL import Image 10 | import PIL.Image as image 11 | import time,re,cStringIO,urllib2,random 12 | 13 | 14 | 15 | def is_similar(image1,image2,x,y): 16 | ''' 17 | 对比RGB值 18 | ''' 19 | pass 20 | 21 | pixel1=image1.getpixel((x,y)) 22 | pixel2=image2.getpixel((x,y)) 23 | 24 | for i in range(0,3): 25 | if abs(pixel1[i]-pixel2[i])>=50: 26 | return False 27 | 28 | return True 29 | 30 | def get_diff_location(image1,image2): 31 | ''' 32 | 计算缺口的位置 33 | ''' 34 | 35 | i=0 36 | 37 | for i in range(0,260): 38 | #One Vertical Line Must be Different to decide the location 39 | count = 0 40 | for j in range(0,116): 41 | if is_similar(image1,image2,260-i-1,j)==False: 42 | count = count + 1 43 | if count > 10: 44 | return 260-i-1-44 45 | 46 | 47 | 48 | 49 | 50 | def geettest_crack(driver, screenshot_path = ''): 51 | 52 | # 这里的文件路径是webdriver的文件路径 53 | #driver = webdriver.PhantomJS(executable_path='/home/guan/Software/phantomjs-2.1.1-linux-x86_64/bin/phantomjs') 54 | #driver = webdriver.Chrome(executable_path=r"C:\Program Files (x86)\Google\Chrome\Application\chromedriver.exe") 55 | #driver = webdriver.Firefox() 56 | #driver.maximize_window() 57 | # 打开网页 58 | driver.get("http://www.geetest.com/mobile-pc") 59 | time.sleep(3) 60 | login_button = driver.find_element_by_id("login") 61 | login_button.click() 62 | 63 | 64 | # 等待页面的上元素刷新出来 65 | #WebDriverWait(driver, 30).until(lambda the_driver: the_driver.find_element_by_xpath("//div[@class='gt_slider_knob gt_show']").is_displayed()) 66 | #WebDriverWait(driver, 30).until(lambda the_driver: the_driver.find_element_by_xpath("//div[@class='gt_cut_bg gt_show']").is_displayed()) 67 | #WebDriverWait(driver, 30).until(lambda the_driver: the_driver.find_element_by_xpath("//div[@class='gt_cut_fullbg gt_show']").is_displayed()) 68 | 69 | #To be replaced by wait until 70 | time.sleep(10) 71 | driver.save_screenshot(screenshot_path + 'screenshot_start.png') 72 | #outfile_webpage = open('test.html', 'w') 73 | #outfile_webpage.write(driver.page_source.encode('utf8') + '\n') 74 | #outfile_webpage.close() 75 | 76 | # 下载图片 77 | #image1=get_image(driver, "//div[@class='gt_cut_bg gt_show']/div") 78 | #image2=get_image(driver, "//div[@class='gt_cut_fullbg gt_show']/div") 79 | #captcha_el = driver.find_element_by_id("embed-captcha") 80 | captcha_el = driver.find_element_by_xpath('//*[local-name() = "svg"]') 81 | location = captcha_el.location 82 | size = captcha_el.size 83 | left = int(location['x']) 84 | top = int(location['y']) 85 | right = int(location['x'] + size['width']) 86 | bottom = int(location['y'] + size['height']) 87 | 88 | #dragger = driver.find_element_by_class_name("gt_slider_knob") 89 | 90 | 91 | 92 | trial_count = 0 93 | while trial_count < 5: 94 | # FireFox: 95 | # element = driver.find_element_by_xpath('//*[local-name() = "circle"][@style="fill: rgb(255, 255, 255); stroke-width: 1.5;"]') 96 | # PhantomJS: 97 | element = driver.find_element_by_xpath('//*[local-name() = "circle"][@style="fill: #ffffff; stroke-width: 1.5px;"]') 98 | time.sleep(3) 99 | driver.save_screenshot(screenshot_path + 'screenshot1.png') 100 | img1 = Image.open(screenshot_path + 'screenshot1.png') 101 | img1 = img1.crop((left, top, right, bottom)) 102 | img1.save(screenshot_path + 'screenshot1.png') 103 | action = ActionChains(driver) 104 | action.drag_and_drop_by_offset(element, 5, 0).perform() 105 | time.sleep(5) 106 | 107 | driver.save_screenshot(screenshot_path + 'screenshot2.png') 108 | img2 = Image.open(screenshot_path + 'screenshot2.png') 109 | img2 = img2.crop((left, top, right, bottom)) 110 | img2.save(screenshot_path + 'screenshot2.png') 111 | # 计算缺口位置 112 | loc=get_diff_location(img1, img2) 113 | print 'Location is: ' + str(loc) 114 | 115 | # 生成x的移动轨迹点 116 | track_list=get_track(loc) 117 | 118 | # 找到滑动的圆球 119 | #element=driver.find_element_by_xpath("//div[@class='gt_slider_knob gt_show']") 120 | #element = driver.find_element_by_xpath('//*[local-name() = "circle"][@style="fill: rgb(255, 255, 255); stroke-width: 1.5;"]') 121 | location=element.location 122 | # 获得滑动圆球的高度 123 | y=location['y'] 124 | init_x = location['x'] 125 | #ActionChains(driver).drag_and_drop_by_offset(source=element, xoffset = loc, yoffset= 0).perform() 126 | # 鼠标点击元素并按住不放 127 | #print "第一步,点击元素" 128 | 129 | 130 | ActionChains(driver).click_and_hold(on_element=element).perform() 131 | time.sleep(0.15) 132 | #print loc - 5 133 | 134 | come_back = random.randint(-2,5) 135 | 136 | while element.location['x'] - init_x < loc * 1 / 4 : 137 | track = random.randint(2, 4) 138 | ActionChains(driver).move_to_element_with_offset(to_element=element, xoffset=track + 22, yoffset=22).perform() 139 | time.sleep(random.randint(10, 50) / 1000.) 140 | 141 | while element.location['x'] - init_x < loc * 2 / 4 : 142 | track = random.randint(4, 6) 143 | ActionChains(driver).move_to_element_with_offset(to_element=element, xoffset=track + 22, yoffset=22).perform() 144 | # time.sleep(0.5) 145 | # print element.location['x'] - init_x 146 | time.sleep(random.randint(10, 50) / 5000.) 147 | 148 | while element.location['x'] - init_x < loc * 3 / 4 : 149 | track = random.randint(3,5) 150 | ActionChains(driver).move_to_element_with_offset(to_element=element, xoffset=track + 22, yoffset=22).perform() 151 | time.sleep(random.randint(10, 50)/ 4000.) 152 | 153 | while element.location['x'] - init_x < loc + come_back: 154 | track = random.randint(2,4) 155 | ActionChains(driver).move_to_element_with_offset(to_element=element, xoffset=track + 22, yoffset=22).perform() 156 | time.sleep(random.randint(10, 50)/ 500.) 157 | 158 | 159 | 160 | target_x = init_x + loc - 5 161 | dist = 999 162 | while abs(dist) > 2: 163 | 164 | dist = element.location['x'] - target_x 165 | #track = -1 * random.randint(1, 2) 166 | if dist > 0: 167 | track = -1 * random.randint(2, 4) 168 | else: 169 | track = random.randint(2, 3) 170 | ActionChains(driver).move_to_element_with_offset(to_element=element, xoffset=track + 22, yoffset=22).perform() 171 | time.sleep(random.randint(10, 30) / 100.) 172 | 173 | 174 | #driver.save_screenshot('screenshot_finish1.png') 175 | 176 | 177 | ActionChains(driver).release(on_element=element).perform() 178 | #driver.save_screenshot('screenshot_finish2.png') 179 | time.sleep(1) 180 | driver.save_screenshot(screenshot_path + 'screenshot_finish3.png') 181 | time.sleep(3) 182 | if element.location['x'] == init_x: 183 | print "Retrial..." 184 | trial_count += 1 185 | 186 | #print driver.current_url 187 | else: 188 | submit=driver.find_element_by_id("embed-submit") 189 | ActionChains(driver).click(on_element=submit).perform() 190 | time.sleep(3) 191 | driver.quit() 192 | print "Cracked Geettest." 193 | break 194 | 195 | 196 | #geettest_crack() 197 | if __name__ == '__main__': 198 | 199 | 200 | driver = webdriver.Firefox() 201 | #Can be replaced by PhantomJS driver 202 | try: 203 | geettest_crack(driver) 204 | except Exception as e: 205 | print e 206 | driver.quit() 207 | 208 | -------------------------------------------------------------------------------- /img1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/crownpku/Geetest-Captcha-Crack/700f1dac1d106cef3e10ec62f098a1cdccff55bd/img1.jpg -------------------------------------------------------------------------------- /img2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/crownpku/Geetest-Captcha-Crack/700f1dac1d106cef3e10ec62f098a1cdccff55bd/img2.jpg -------------------------------------------------------------------------------- /test_track_generator.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 79, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import random\n", 10 | "def geetest_track(distance):\n", 11 | " print \"hahaha\"\n", 12 | " come_back = random.uniform(-2,3)\n", 13 | " cur_loc = 0\n", 14 | " track_list = []\n", 15 | " magic_ratio = 1\n", 16 | " if distance < 50:\n", 17 | " magic_ratio = 1\n", 18 | " else:\n", 19 | " magic_ratio = distance/50.\n", 20 | " print magic_ratio\n", 21 | " while cur_loc < distance * 1 / 4:\n", 22 | " track = random.uniform(2*magic_ratio, 4*magic_ratio)\n", 23 | " sleep_time = random.randint(10, 50) / 1000.\n", 24 | " track_list.append([track, 0.5, sleep_time])\n", 25 | " cur_loc = cur_loc + track\n", 26 | " if len(track_list) > 50:\n", 27 | " print \"whoops1!\"\n", 28 | " print track_list\n", 29 | " return\n", 30 | " \n", 31 | " while cur_loc < distance * 2 / 4:\n", 32 | " track = random.uniform(4*magic_ratio, 6*magic_ratio)\n", 33 | " sleep_time = random.randint(10, 50) / 5000.\n", 34 | " track_list.append([track, 0.3, sleep_time])\n", 35 | " cur_loc = cur_loc + track\n", 36 | " if len(track_list) > 50:\n", 37 | " print \"whoops2!\"\n", 38 | " print track_list\n", 39 | " return\n", 40 | " \n", 41 | " while cur_loc < distance * 3 / 4:\n", 42 | " track = random.uniform(3*magic_ratio, 5*magic_ratio)\n", 43 | " sleep_time = random.randint(10, 50) / 4000.\n", 44 | " track_list.append([track, 0.6, sleep_time])\n", 45 | " cur_loc = cur_loc + track\n", 46 | " if len(track_list) > 50:\n", 47 | " print \"whoops3!\"\n", 48 | " print track_list\n", 49 | " return\n", 50 | " \n", 51 | " while cur_loc < distance + come_back:\n", 52 | " track = random.uniform(2*magic_ratio, 4*magic_ratio)\n", 53 | " sleep_time = random.randint(10, 50) / 500.\n", 54 | " track_list.append([track, 0.5, sleep_time])\n", 55 | " cur_loc = cur_loc + track\n", 56 | " if len(track_list) > 50:\n", 57 | " print \"whoops4!\"\n", 58 | " print track_list\n", 59 | " return\n", 60 | " \n", 61 | " #Final Adjustment\n", 62 | " dist = 999\n", 63 | " while abs(dist) > 1:\n", 64 | " dist = cur_loc - distance\n", 65 | " if dist > 0:\n", 66 | " track = -1 * random.uniform(0.5, 2)\n", 67 | " else:\n", 68 | " track = random.uniform(0.5, 2)\n", 69 | " cur_loc = cur_loc + track\n", 70 | " sleep_time = random.randint(10, 30) / 100.\n", 71 | " track_list.append([track, 0.5, sleep_time])\n", 72 | " if len(track_list) > 50:\n", 73 | " print \"whoops5!\"\n", 74 | " print track_list\n", 75 | " return\n", 76 | " return track_list" 77 | ] 78 | }, 79 | { 80 | "cell_type": "code", 81 | "execution_count": 81, 82 | "metadata": {}, 83 | "outputs": [ 84 | { 85 | "name": "stdout", 86 | "output_type": "stream", 87 | "text": [ 88 | "hahaha\n", 89 | "4.0\n" 90 | ] 91 | }, 92 | { 93 | "data": { 94 | "text/plain": [ 95 | "[[14.464269566584592, 0.5, 0.036],\n", 96 | " [9.574152040322744, 0.5, 0.027],\n", 97 | " [10.869245645226684, 0.5, 0.019],\n", 98 | " [10.351659127474901, 0.5, 0.041],\n", 99 | " [9.514443419455105, 0.5, 0.017],\n", 100 | " [23.854056902890534, 0.3, 0.0074],\n", 101 | " [17.653160147803792, 0.3, 0.0062],\n", 102 | " [19.35223560446206, 0.3, 0.0038],\n", 103 | " [12.92574042208101, 0.6, 0.008],\n", 104 | " [13.133554170738439, 0.6, 0.00375],\n", 105 | " [12.053751597765043, 0.6, 0.00575],\n", 106 | " [12.588406493220118, 0.5, 0.078],\n", 107 | " [11.292590379205668, 0.5, 0.09],\n", 108 | " [8.146228992828629, 0.5, 0.072],\n", 109 | " [13.194101465615203, 0.5, 0.088],\n", 110 | " [15.43856022560939, 0.5, 0.022],\n", 111 | " [-1.7111706929162458, 0.5, 0.17],\n", 112 | " [-1.7823567357507895, 0.5, 0.22],\n", 113 | " [-0.6894989340022635, 0.5, 0.26],\n", 114 | " [-1.7489889588834224, 0.5, 0.23],\n", 115 | " [-1.8151736604517428, 0.5, 0.29],\n", 116 | " [-1.2160967796100048, 0.5, 0.21],\n", 117 | " [-0.6484530646066641, 0.5, 0.12],\n", 118 | " [-1.5961695468431882, 0.5, 0.25],\n", 119 | " [-0.7510153771563826, 0.5, 0.29],\n", 120 | " [-1.1838216453918322, 0.5, 0.24],\n", 121 | " [-1.9176373778973457, 0.5, 0.13],\n", 122 | " [1.0587423973736225, 0.5, 0.11]]" 123 | ] 124 | }, 125 | "execution_count": 81, 126 | "metadata": {}, 127 | "output_type": "execute_result" 128 | } 129 | ], 130 | "source": [ 131 | "geetest_track(200)" 132 | ] 133 | }, 134 | { 135 | "cell_type": "code", 136 | "execution_count": null, 137 | "metadata": {}, 138 | "outputs": [], 139 | "source": [] 140 | }, 141 | { 142 | "cell_type": "code", 143 | "execution_count": null, 144 | "metadata": {}, 145 | "outputs": [], 146 | "source": [] 147 | } 148 | ], 149 | "metadata": { 150 | "kernelspec": { 151 | "display_name": "Python 2", 152 | "language": "python", 153 | "name": "python2" 154 | }, 155 | "language_info": { 156 | "codemirror_mode": { 157 | "name": "ipython", 158 | "version": 2 159 | }, 160 | "file_extension": ".py", 161 | "mimetype": "text/x-python", 162 | "name": "python", 163 | "nbconvert_exporter": "python", 164 | "pygments_lexer": "ipython2", 165 | "version": "2.7.12" 166 | } 167 | }, 168 | "nbformat": 4, 169 | "nbformat_minor": 0 170 | } 171 | --------------------------------------------------------------------------------