├── README.md
├── geetest_crack_gsxt.py
├── geetest_crack_gsxt_v2.py
├── geetest_crack_mobile.py
├── img1.jpg
├── img2.jpg
├── test_track_generator.ipynb
└── track_record


/README.md:
--------------------------------------------------------------------------------
 1 | # Interesting idea:
 2 | 
 3 | Use Deep Reinforcement Learning to solve this problem, just like what they did in [OpenAI Gym](https://gym.openai.com/) ??
 4 | 
 5 | 
 6 | # Geetest_GSXT_Captcha_Crack
 7 | 
 8 | geetest_crack_gsxt.py
 9 | 
10 | Use Chrome as webdriver.
11 | 
12 | Crack Gongshang website with Selenium and Chrome.
13 | 
14 | TODO:
15 | 
16 | [DONE] Modify code to record tracks (pass or fail)
17 | 
18 | [TODO] Train a machine learning model to classify tracks, and use the model to generate better tracks.
19 | 
20 | Reference: http://www.gsxt.gov.cn/index
21 | 
22 | # Geetest_Mobile_Captcha_Crack
23 | 
24 | geetest_crack_mobile.py (unmaintained)
25 | 
26 | Please use Firefox 45.0. Newest version of Firefox may cause problems.
27 | 
28 | Crack Mobile Version of Geetest Captcha with Firefox and PhantomJS
29 | 
30 | Reference: http://www.geetest.com/mobile-pc
31 | 
32 | 
33 | 


--------------------------------------------------------------------------------
/geetest_crack_gsxt.py:
--------------------------------------------------------------------------------
  1 | #-*-coding:utf-8-*-
  2 | import requests
  3 | import re
  4 | import StringIO
  5 | from PIL import Image
  6 | import random
  7 | import math
  8 | import time
  9 | from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
 10 | from selenium.webdriver.support.ui import WebDriverWait
 11 | from selenium.webdriver.support import expected_conditions as EC
 12 | from selenium.webdriver.common.by import By
 13 | from selenium.webdriver.common.action_chains import ActionChains
 14 | from selenium import webdriver
 15 | from bs4 import BeautifulSoup
 16 | from subprocess import call
 17 | debug = True
 18 | 
 19 | class crack_picture(object):
 20 |     def __init__(self, img_url1, img_url2):
 21 |         self.img1, self.img2 = self.picture_get(img_url1, img_url2)
 22 | 
 23 | 
 24 |     def picture_get(self, img_url1, img_url2):
 25 |         hd = {"Host": "static.geetest.com",
 26 |               "User-Agent": "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36"}
 27 |         img1 = StringIO.StringIO(self.repeat(img_url1, hd).content)
 28 |         img2 = StringIO.StringIO(self.repeat(img_url2, hd).content)
 29 |         return img1, img2
 30 | 
 31 | 
 32 |     def repeat(self, url, hd):
 33 |         times = 10
 34 |         while times > 0:
 35 |             try:
 36 |                 ans = requests.get(url, headers=hd)
 37 |                 return ans
 38 |             except:
 39 |                 times -= 1   
 40 | 
 41 | 
 42 |     def pictures_recover(self):
 43 |         xpos = self.judge(self.picture_recover(self.img1, 'img1.jpg'), self.picture_recover(self.img2, 'img2.jpg')) - 6
 44 |         return self.geetest_track_int(xpos) #_int, _float, _test
 45 | 
 46 | 
 47 |     def picture_recover(self, img, name):
 48 |         a =[39, 38, 48, 49, 41, 40, 46, 47, 35, 34, 50, 51, 33, 32, 28, 29, 27, 26, 36, 37, 31, 30, 44, 45, 43, 42, 12, 13, 23, 22, 14, 15, 21, 20, 8, 9, 25, 24, 6, 7, 3, 2, 0, 1, 11, 10, 4, 5, 19, 18, 16, 17]
 49 |         im = Image.open(img)
 50 |         im_new = Image.new("RGB", (260, 116))
 51 |         for row in range(2):
 52 |             for column in range(26):
 53 |                 right = a[row*26+column] % 26 * 12 + 1
 54 |                 down = 58 if a[row*26+column] > 25 else 0
 55 |                 for w in range(10):
 56 |                     for h in range(58):
 57 |                         ht = 58 * row + h
 58 |                         wd = 10 * column + w
 59 |                         im_new.putpixel((wd, ht), im.getpixel((w + right, h + down)))
 60 |         im_new.save(name)
 61 |         return im_new
 62 | 
 63 |     def geetest_track_float(self, distance): #Fail, cannot move non-int pixel steps
 64 |         print "generating track..."
 65 |         come_back = random.uniform(-2,3)
 66 |         cur_loc = 0
 67 |         track_list = []
 68 |         magic_ratio = 1
 69 |         if distance < 50:
 70 |             magic_ratio = 1.
 71 |         else:
 72 |             magic_ratio = distance/50.
 73 |         print magic_ratio
 74 |         while cur_loc < distance * 1 / 4:
 75 |             track = random.uniform(2*magic_ratio, 4*magic_ratio)
 76 |             sleep_time = random.randint(10, 50) / 1000.
 77 |             track_list.append([track, 0.5, sleep_time])
 78 |             cur_loc = cur_loc + track
 79 |             if len(track_list) > 50:
 80 |                 print "whoops1!"
 81 |                 print track_list
 82 |                 return
 83 |         
 84 |         while cur_loc < distance * 2 / 4:
 85 |             track = random.uniform(4*magic_ratio, 6*magic_ratio)
 86 |             sleep_time = random.randint(10, 50) / 5000.
 87 |             track_list.append([track, 0.3, sleep_time])
 88 |             cur_loc = cur_loc + track
 89 |             if len(track_list) > 50:
 90 |                 print "whoops2!"
 91 |                 print track_list
 92 |                 return
 93 |             
 94 |         while cur_loc < distance * 3 / 4:
 95 |             track = random.uniform(3*magic_ratio, 5*magic_ratio)
 96 |             sleep_time = random.randint(10, 50) / 4000.
 97 |             track_list.append([track, 0.6, sleep_time])
 98 |             cur_loc = cur_loc + track
 99 |             if len(track_list) > 50:
100 |                 print "whoops3!"
101 |                 print track_list
102 |                 return
103 |         
104 |         while cur_loc < distance + come_back:
105 |             track = random.uniform(2*magic_ratio, 4*magic_ratio)
106 |             sleep_time = random.randint(10, 50) / 500.
107 |             track_list.append([track, 0.5, sleep_time])
108 |             cur_loc = cur_loc + track
109 |             if len(track_list) > 50:
110 |                 print "whoops4!"
111 |                 print track_list
112 |                 return
113 |         
114 |         #Final Adjustment
115 |         dist = 999
116 |         while abs(dist) > 1:
117 |             dist  = cur_loc - distance
118 |             if dist > 0:
119 |                 track = -1 * random.uniform(0.5, 2)
120 |             else:
121 |                 track = random.uniform(0.5, 2)
122 |             cur_loc = cur_loc + track
123 |             sleep_time = random.randint(10, 30) / 100.
124 |             track_list.append([track, 0.5, sleep_time])
125 |             if len(track_list) > 50:
126 |                 print "whoops5!"
127 |                 print track_list
128 |                 return
129 |         return track_list
130 |     
131 |     def geetest_track_int(self, distance):
132 |         print "generate track..."
133 |         come_back = random.randint(-2,3)
134 |         cur_loc = 0
135 |         track_list = []
136 |         magic_ratio = 1
137 |         if distance < 100:
138 |             magic_ratio = 1
139 |         #else:
140 |         #    magic_ratio = distance/100.
141 |         time_ratio = 1
142 |         while cur_loc < distance * 1 / 4:
143 |             track = random.randint(math.floor(2*magic_ratio), math.floor(4*magic_ratio))
144 |             sleep_time = random.randint(10, 50) / 1000.*time_ratio
145 |             track_list.append([track, 0.5, sleep_time])
146 |             cur_loc = cur_loc + track
147 |             if len(track_list) > 100:
148 |                 print "whoops1!"
149 |                 print track_list
150 |                 return
151 |         
152 |         while cur_loc < distance * 2 / 4:
153 |             track = random.randint(math.floor(4*magic_ratio), math.floor(6*magic_ratio))
154 |             sleep_time = random.randint(10, 50) / 5000.*time_ratio
155 |             track_list.append([track, 0.3, sleep_time])
156 |             cur_loc = cur_loc + track
157 |             if len(track_list) > 100:
158 |                 print "whoops2!"
159 |                 print track_list
160 |                 return
161 |             
162 |         while cur_loc < distance * 3 / 4:
163 |             track = random.randint(math.floor(3*magic_ratio), math.floor(5*magic_ratio))
164 |             sleep_time = random.randint(10, 50) / 4000.*time_ratio
165 |             track_list.append([track, 0.6, sleep_time])
166 |             cur_loc = cur_loc + track
167 |             if len(track_list) > 100:
168 |                 print "whoops3!"
169 |                 print track_list
170 |                 return
171 |         
172 |         while cur_loc < distance + come_back:
173 |             track = random.randint(math.floor(2*magic_ratio), math.floor(4*magic_ratio))
174 |             sleep_time = random.randint(10, 50) / 500.*time_ratio
175 |             track_list.append([track, 0.5, sleep_time])
176 |             cur_loc = cur_loc + track
177 |             if len(track_list) > 100:
178 |                 print "whoops4!"
179 |                 print track_list
180 |                 return
181 |         
182 |         #Final Adjustment
183 |         dist = 999
184 |         while abs(dist) > 2:
185 |             dist  = cur_loc - distance
186 |             if dist > 0:
187 |                 track = -1 * random.randint(0, 1)
188 |             else:
189 |                 track = random.randint(0, 1)
190 |             cur_loc = cur_loc + track
191 |             sleep_time = random.randint(10, 30) / 100.*time_ratio
192 |             track_list.append([track, 0.5, sleep_time])
193 |             if len(track_list) > 100:
194 |                 print "whoops5!"
195 |                 print track_list
196 |                 return
197 |         return track_list
198 | 
199 | 
200 |     def geetest_track_test(self, distance):
201 |         return [[distance, 0.5, 1]]
202 |         #crucial trace code was deleted
203 |         #tip-->> 1. to generate the trace array randomly
204 |         #        2. to collect trace array manually
205 |         
206 | 
207 | 
208 |     def diff(self, img1, img2, wd, ht):
209 |         rgb1 = img1.getpixel((wd, ht))
210 |         rgb2 = img2.getpixel((wd, ht))
211 |         tmp = reduce(lambda x,y: x+y, map(lambda x: abs(x[0]-x[1]), zip(rgb1, rgb2)))
212 |         return True if tmp >= 200 else False
213 | 
214 |             
215 |     def col(self, img1, img2, cl):
216 |         for i in range(img2.size[1]):
217 |             if self.diff(img1, img2, cl, i):
218 |                 return True
219 |         return False
220 | 
221 | 
222 |     def judge(self, img1, img2):
223 |         for i in range(img2.size[0]):
224 |             if self.col(img1, img2, i):
225 |                 return i
226 |         return -1
227 | 
228 | 
229 | class gsxt(object):
230 |     def __init__(self, br_name="phantomjs"):
231 |         self.br = self.get_webdriver(br_name)
232 |         self.wait = WebDriverWait(self.br, 10, 1.0)
233 |         self.br.set_page_load_timeout(8)
234 |         self.br.set_script_timeout(8)
235 | 
236 | 
237 |     def input_params(self, name):
238 |         self.br.get("http://www.gsxt.gov.cn/index")
239 |         element = self.wait_for(By.ID, "keyword")
240 |         element.send_keys(name)
241 |         time.sleep(1.1)
242 |         element = self.wait_for(By.ID, "btn_query")
243 |         element.click()
244 |         time.sleep(1.1)
245 | 
246 | 
247 |     def drag_pic(self):
248 |         return (self.find_img_url(self.wait_for(By.CLASS_NAME, "gt_cut_fullbg_slice")),
249 |                self.find_img_url(self.wait_for(By.CLASS_NAME, "gt_cut_bg_slice")))
250 |         
251 |     
252 |     def wait_for(self, by1, by2):
253 |         return self.wait.until(EC.presence_of_element_located((by1, by2)))
254 | 
255 | 
256 |     def find_img_url(self, element):
257 |         try:
258 |             return re.findall('url\("(.*?)"\)', element.get_attribute('style'))[0].replace("webp", "jpg")
259 |         except:
260 |             return re.findall('url\((.*?)\)', element.get_attribute('style'))[0].replace("webp", "jpg")
261 | 
262 | 
263 |     def emulate_track(self, tracks):
264 |         element = self.br.find_element_by_class_name("gt_slider_knob")
265 |         ActionChains(self.br).click_and_hold(on_element=element).perform()
266 |         for x, y, t in tracks:
267 |             print x, y ,t 
268 |             ActionChains(self.br).move_to_element_with_offset(
269 |                         to_element=element, 
270 |                         xoffset=x+22.,
271 |                         yoffset=y+22.).perform()
272 |             ActionChains(self.br).click_and_hold().perform()
273 |             time.sleep(t)
274 |         time.sleep(0.24)
275 |         ActionChains(self.br).release(on_element=element).perform()
276 |         time.sleep(0.8)
277 |         element = self.wait_for(By.CLASS_NAME, "gt_info_text")
278 |         ans = element.text.encode("utf-8")
279 |         print ans
280 |         return ans
281 | 
282 | 
283 |     def run(self):
284 |         while True:
285 |             for i in [u'招商银行', u'交通银行', u'中国银行']:
286 |                 self.hack_geetest(i)
287 |                 time.sleep(1)
288 |         self.quit_webdriver()
289 | 
290 | 
291 |     def hack_geetest(self, company=u"招商银行"):
292 |         flag = True
293 |         self.input_params(company)
294 |         fail_count = 0
295 |         outfile = open('track_record', 'a')
296 |         while flag:
297 |             img_url1, img_url2 = self.drag_pic()
298 |             tracks = crack_picture(img_url1, img_url2).pictures_recover()
299 |             tsb = self.emulate_track(tracks)
300 |             #print "hahaha"
301 |             #print tsb
302 |             
303 |             if '通过' in tsb:
304 |                 time.sleep(1)   
305 |                 print >> outfile, 'True:' + str(tracks) 
306 |                 soup = BeautifulSoup(self.br.page_source, 'html.parser')
307 |                 for sp in soup.find_all("a", attrs={"class":"search_list_item"}):
308 |                     print re.sub("\s+", "", sp.get_text().encode("utf-8"))
309 |                     #print sp.get_text()
310 |                 break
311 |             elif '吃' or '失败' in tsb:
312 |                 print >> outfile, 'False:' + str(tracks)
313 |                 fail_count += 1
314 |                 if fail_count > 4:
315 |                     flag = False
316 |                 time.sleep(5)
317 |             else:
318 |                 self.input_params(company)
319 |                               
320 | 
321 |     def quit_webdriver(self):
322 |         self.br.quit()
323 | 
324 | 
325 |     def get_webdriver(self, name):
326 |         if name.lower() == "phantomjs":
327 |             exe_path = '/home/guan/Software/phantomjs-2.1.1-linux-x86_64/bin/phantomjs'
328 |             dcap = dict(DesiredCapabilities.PHANTOMJS)
329 |             dcap["phantomjs.page.settings.userAgent"] = (
330 |             "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.98 Safari/537.36")
331 |             return webdriver.PhantomJS(desired_capabilities=dcap, executable_path=exe_path)
332 | 
333 |         elif name.lower() == "chrome":
334 |             return webdriver.Chrome("/usr/lib/chromium-browser/chromedriver")
335 | 
336 | 
337 | if __name__ == "__main__":
338 |     #print crack_picture("http://static.geetest.com/pictures/gt/fc064fc73/fc064fc73.jpg", "http://static.geetest.com/pictures/gt/fc064fc73/bg/7ca363b09.jpg").pictures_recover()
339 |     while True:
340 |         try:
341 |             gsxt("chrome").run()
342 |         except Exception as e:
343 |             print e
344 |             call("kill $(ps ax | grep chromedriver | awk '{print $1}')", shell=True)
345 |             call("kill $(ps ax | grep chromium | awk '{print $1}')", shell=True)
346 |             time.sleep(30)
347 |             continue
348 |             
349 |     
350 |     #gsxt("phantomjs").run()
351 | 
352 | 
353 | 
354 | 


--------------------------------------------------------------------------------
/geetest_crack_gsxt_v2.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | import os
  3 | import re
  4 | import time
  5 | from selenium import webdriver
  6 | from io import BytesIO
  7 | 
  8 | from PIL import Image
  9 | from selenium.webdriver.common.action_chains import ActionChains
 10 | from selenium.webdriver.common.by import By
 11 | from selenium.webdriver.support.ui import WebDriverWait
 12 | from selenium.webdriver.support import expected_conditions as EC
 13 | import random
 14 | from subprocess import call
 15 | from bs4 import BeautifulSoup
 16 | 
 17 | os.chdir('.')
 18 | 
 19 | PROXY = "120.199.224.78:80"
 20 | chrome_options = webdriver.ChromeOptions()
 21 | chrome_options.add_argument('--proxy-server=%s' % PROXY)
 22 | 
 23 | driver = webdriver.Chrome("/usr/lib/chromium-browser/chromedriver", chrome_options=chrome_options)
 24 | #driver = webdriver.Chrome("/usr/lib/chromium-browser/chromedriver")
 25 | 
 26 | global try1
 27 | try1 = 0
 28 | global success
 29 | success = 0
 30 | 
 31 | 
 32 | # driver.get("http://www.gsxt.gov.cn/index")
 33 | # wait = WebDriverWait(driver, 10, 1.0)
 34 | # element = wait.until(EC.presence_of_element_located((By.ID, "keyword")))
 35 | # element.send_keys(u"小桔科技")
 36 | # time.sleep(1.1)
 37 | # element = wait.until(EC.presence_of_element_located((By.ID, "btn_query")))
 38 | # element.click()
 39 | # time.sleep(1.1)
 40 | 
 41 | 
 42 | def get_captcha_image(filename):
 43 |     screenshot = driver.get_screenshot_as_png()
 44 |     screenshot = Image.open(BytesIO(screenshot))
 45 |     # screenshot.show()
 46 | 
 47 |     captcha_el = driver.find_element_by_class_name("gt_box")
 48 |     location = captcha_el.location
 49 |     size = captcha_el.size
 50 |     left = location['x']
 51 |     top = location['y']
 52 |     right = location['x'] + size['width']
 53 |     bottom = location['y'] + size['height']
 54 |     box = (left, top, right, bottom)
 55 |     print(box)
 56 |     # if box[0] == 0:
 57 |     #     raise(Exception('======='))
 58 |     captcha_image = screenshot.crop(box)
 59 |     captcha_image.save(filename)  # "%s.png" % uuid.uuid4().hex
 60 |     # print(u'截图成功')
 61 | 
 62 | 
 63 | def find_offset(diff_image, offset_=62):
 64 |     d = diff_image.convert("L").point(lambda i: i > 52, mode='1')
 65 |     d.save('x-%s.png' % time.strftime("%Y%m%d-%H%M%S"))
 66 |     b1 = d.getbbox()  # left, upper, right, and lower pixel coordinate
 67 |     # offset_ = 65
 68 |     b2 = d.crop((offset_, 0, d.width, d.height)).getbbox()
 69 |     global first_left
 70 |     first_left = b1[0]
 71 |     offset = b2[0] + offset_ - b1[0] - 2
 72 |     if b2[0] <= 4:
 73 |         offset = -1
 74 |     return offset
 75 |     # diff = diff_image.load()
 76 |     # http://stackoverflow.com/questions/9038160/break-two-for-loops
 77 |     # for x in range(61, width):
 78 |     #     for y in range(height):
 79 |     #         if all(i > 40 for i in diff[x, y]):
 80 |     #             return x - 6
 81 | 
 82 | 
 83 | def get_offsets(offset, setpointX):
 84 |     '''
 85 |     切记不能移动小数个像素位置
 86 |     '''
 87 |     kp = 3.0
 88 |     ki = 0.0001
 89 |     kd = 80.0
 90 | 
 91 |     x = 0
 92 |     vx = 0
 93 |     prevErrorX = 0
 94 |     integralX = 0
 95 |     derivativeX = 0
 96 | 
 97 |     while 1:
 98 |         if x >= setpointX:
 99 |             break
100 | 
101 |         errorX = setpointX - x
102 |         # print('xxxxx - ', x)
103 |         integralX += errorX
104 |         derivativeX = errorX - prevErrorX
105 |         prevErrorX = errorX
106 |         if offset < 100:
107 |             K = 0.007
108 |         elif offset < 180:
109 |             K = 0.006
110 |         else:
111 |             K = 0.005
112 |         ax = K * (kp * errorX + ki * integralX + kd * derivativeX)
113 |         vx += ax
114 | 
115 |         if x + vx > setpointX:
116 |             vx = setpointX - x
117 |         vx = int(vx)
118 |         if vx < 1:
119 |             vx = random.randint(1, 3)
120 |         yield vx
121 |         print('vvvvv - ', vx)
122 |         x += vx
123 | 
124 | 
125 | def get_offsets_back(goal):
126 |     x = 0
127 |     while 1:
128 |         if x >= goal:
129 |             break
130 |         dx = random.randint(10, 50)
131 |         if x + dx > goal:
132 |             dx = goal - x
133 |         yield dx
134 |         x += dx
135 | 
136 | 
137 | def go(driver, name):
138 |     driver.get("http://www.gsxt.gov.cn/index")
139 |     wait = WebDriverWait(driver, 10, 1.0)
140 |     element = wait.until(EC.presence_of_element_located((By.ID, "keyword")))
141 |     element.send_keys(name)
142 |     time.sleep(1.1)
143 |     element = wait.until(EC.presence_of_element_located((By.ID, "btn_query")))
144 |     element.click()
145 |     time.sleep(1.1)
146 |     go = True
147 |     while go:
148 | 
149 |         time.sleep(0.2)
150 |         # 设定窗口大小
151 |         width = 1280
152 |         height = 800
153 |         driver.set_window_size(width, height)
154 | 
155 |         time.sleep(1)
156 |         WebDriverWait(driver, 8).until(
157 |             EC.presence_of_element_located((By.CLASS_NAME, "gt_box")))
158 |         knob = driver.find_element_by_class_name("gt_slider_knob")
159 |         action = ActionChains(driver)
160 |         action.move_to_element_with_offset(knob, 21, 21).perform()  # # knob(21,21)的点，可移动到write上
161 |         time.sleep(1)
162 |         f_file = 'f-%s.png' % time.strftime("%Y%m%d-%H%M%S")
163 |         get_captcha_image(f_file)
164 |         ActionChains(driver).click_and_hold().perform()
165 |         time.sleep(0.5)
166 |         # action.drag_and_drop_by_offset(knob, x_offset, y_offset).perform()
167 |         s_file = 's-%s.png' % time.strftime("%Y%m%d-%H%M%S")
168 |         get_captcha_image(s_file)
169 |         # action.move_by_offset(50, 0).release().perform()
170 |         # action.reset_actions()
171 | 
172 |         # --------------------------------------------------------------
173 | 
174 |         from PIL import Image, ImageFilter
175 |         from PIL import ImageChops
176 | 
177 |         # 直观感受图片差异
178 |         image_f = Image.open(f_file)
179 |         image_s = Image.open(s_file)
180 |         diff = ImageChops.difference(image_f, image_s)
181 | 
182 |         # ----------------------显示图片debug----------------------------
183 | 
184 |         # -------------------------debug--------------------------------
185 |         global first_left
186 |         first_left = 0
187 | 
188 |         offset = find_offset(diff)
189 |         if offset < 0:
190 |             # 拖动滑块到右方160像素处保持并截图
191 |             ActionChains(driver).move_by_offset(160, 0).perform()
192 |             time.sleep(0.5)
193 |             # action.drag_and_drop_by_offset(knob, x_offset, y_offset).perform()
194 |             s_file = 's-%s.png' % time.strftime("%Y%m%d-%H%M%S")
195 |             get_captcha_image(s_file)
196 |             # 放下
197 |             ActionChains(driver).release().perform()
198 |             image_s = Image.open(s_file)
199 |             diff = ImageChops.difference(image_f, image_s)
200 |             d = diff.convert("L").point(lambda i: i > 60, mode='1')
201 |             offset = d.getbbox()[0] - first_left
202 |             time.sleep(2.5)
203 |             ActionChains(driver).move_to_element_with_offset(
204 |                 knob, 21, 21).click_and_hold().perform()
205 |             time.sleep(0.5)
206 |         print(offset)
207 | 
208 |         for o in get_offsets(offset, offset):
209 |             y = random.randint(-1, 1)
210 |             ActionChains(driver).move_by_offset(o, y).perform()
211 |             # time.sleep(0.03)
212 |             time.sleep(random.randint(2, 4) / 100)
213 |         ActionChains(driver).release().perform()
214 |         # action.drag_and_drop_by_offset(knob, offset, 0).perform()
215 |         time.sleep(3)
216 |         driver.refresh()
217 |         #tsb = wait.until(EC.presence_of_element_located((By.CLASS_NAME, "gt_info_text")))
218 |         #ans = element.text.encode("utf-8")
219 |         #global try1
220 |         #global success
221 | 
222 |         #if '通过' in tsb:
223 |         #    try1 = try1 + 1
224 |         #    success = success + 1
225 |         #    time.sleep(1)
226 |         #    soup = BeautifulSoup(wait.page_source, 'html.parser')
227 |         #    for sp in soup.find_all("a", attrs={"class": "search_list_item"}):
228 |         #        print re.sub("\s+", "", sp.get_text().encode("utf-8"))
229 |                 # print sp.get_text()
230 |         #    print("try:", try1, "success:", success)
231 |         #    break
232 |         #elif '吃' or '失败' in tsb:
233 |         #    try1 = try1 + 1
234 |         #    print("try:", try1, "success:", success)
235 | 
236 |         go = False
237 | 
238 | 
239 | while True:
240 |     try:
241 |         for i in [u'招商银行', u'交通银行', u'中国银行']:
242 |             go(driver, i)
243 |             time.sleep(1)
244 |     except Exception as e:
245 |         print e
246 |         call("kill $(ps ax | grep chromedriver | awk '{print $1}')", shell=True)
247 |         call("kill $(ps ax | grep chromium | awk '{print $1}')", shell=True)
248 |         time.sleep(30)
249 |         continue
250 | 


--------------------------------------------------------------------------------
/geetest_crack_mobile.py:
--------------------------------------------------------------------------------
  1 | #!/usr/local/bin/python
  2 | # -*- coding: utf8 -*-
  3 | 
  4 | 
  5 | 
  6 | from selenium import webdriver
  7 | from selenium.webdriver.support.ui import WebDriverWait
  8 | from selenium.webdriver.common.action_chains import ActionChains
  9 | from PIL import Image
 10 | import PIL.Image as image
 11 | import time,re,cStringIO,urllib2,random
 12 | 
 13 | 
 14 | 
 15 | def is_similar(image1,image2,x,y):
 16 |     '''
 17 |     对比RGB值
 18 |     '''
 19 |     pass
 20 | 
 21 |     pixel1=image1.getpixel((x,y))
 22 |     pixel2=image2.getpixel((x,y))
 23 | 
 24 |     for i in range(0,3):
 25 |         if abs(pixel1[i]-pixel2[i])>=50:
 26 |             return False
 27 | 
 28 |     return True
 29 | 
 30 | def get_diff_location(image1,image2):
 31 |     '''
 32 |     计算缺口的位置
 33 |     '''
 34 | 
 35 |     i=0
 36 | 
 37 |     for i in range(0,260):
 38 |         #One Vertical Line Must be Different to decide the location
 39 |         count = 0
 40 |         for j in range(0,116):
 41 |             if is_similar(image1,image2,260-i-1,j)==False:
 42 |                 count = count + 1
 43 |         if count > 10:
 44 |             return  260-i-1-44
 45 | 
 46 | 
 47 | 
 48 | 
 49 | 
 50 | def geettest_crack(driver, screenshot_path = ''):
 51 | 
 52 | #     这里的文件路径是webdriver的文件路径
 53 |     #driver = webdriver.PhantomJS(executable_path='/home/guan/Software/phantomjs-2.1.1-linux-x86_64/bin/phantomjs')
 54 |     #driver = webdriver.Chrome(executable_path=r"C:\Program Files (x86)\Google\Chrome\Application\chromedriver.exe")
 55 |     #driver = webdriver.Firefox()
 56 |     #driver.maximize_window()
 57 | #     打开网页
 58 |     driver.get("http://www.geetest.com/mobile-pc")
 59 |     time.sleep(3)
 60 |     login_button = driver.find_element_by_id("login")
 61 |     login_button.click()
 62 | 
 63 |     
 64 | #     等待页面的上元素刷新出来
 65 |     #WebDriverWait(driver, 30).until(lambda the_driver: the_driver.find_element_by_xpath("//div[@class='gt_slider_knob gt_show']").is_displayed())
 66 |     #WebDriverWait(driver, 30).until(lambda the_driver: the_driver.find_element_by_xpath("//div[@class='gt_cut_bg gt_show']").is_displayed())
 67 |     #WebDriverWait(driver, 30).until(lambda the_driver: the_driver.find_element_by_xpath("//div[@class='gt_cut_fullbg gt_show']").is_displayed())
 68 | 
 69 |     #To be replaced by wait until
 70 |     time.sleep(10)
 71 |     driver.save_screenshot(screenshot_path + 'screenshot_start.png')
 72 |     #outfile_webpage = open('test.html', 'w')
 73 |     #outfile_webpage.write(driver.page_source.encode('utf8') + '\n')
 74 |     #outfile_webpage.close()
 75 | 
 76 | #     下载图片
 77 |     #image1=get_image(driver, "//div[@class='gt_cut_bg gt_show']/div")
 78 |     #image2=get_image(driver, "//div[@class='gt_cut_fullbg gt_show']/div")
 79 |     #captcha_el = driver.find_element_by_id("embed-captcha")
 80 |     captcha_el = driver.find_element_by_xpath('//*[local-name() = "svg"]')
 81 |     location = captcha_el.location
 82 |     size = captcha_el.size
 83 |     left = int(location['x'])
 84 |     top = int(location['y'])
 85 |     right = int(location['x'] + size['width'])
 86 |     bottom = int(location['y'] + size['height'])
 87 | 
 88 |     #dragger = driver.find_element_by_class_name("gt_slider_knob")
 89 | 
 90 | 
 91 | 
 92 |     trial_count = 0
 93 |     while trial_count < 5:
 94 |         # FireFox:
 95 |         # element = driver.find_element_by_xpath('//*[local-name() = "circle"][@style="fill: rgb(255, 255, 255); stroke-width: 1.5;"]')
 96 |         # PhantomJS:
 97 |         element = driver.find_element_by_xpath('//*[local-name() = "circle"][@style="fill: #ffffff; stroke-width: 1.5px;"]')
 98 |         time.sleep(3)
 99 |         driver.save_screenshot(screenshot_path + 'screenshot1.png')
100 |         img1 = Image.open(screenshot_path + 'screenshot1.png')
101 |         img1 = img1.crop((left, top, right, bottom))
102 |         img1.save(screenshot_path + 'screenshot1.png')
103 |         action = ActionChains(driver)
104 |         action.drag_and_drop_by_offset(element, 5, 0).perform()
105 |         time.sleep(5)
106 | 
107 |         driver.save_screenshot(screenshot_path + 'screenshot2.png')
108 |         img2 = Image.open(screenshot_path + 'screenshot2.png')
109 |         img2 = img2.crop((left, top, right, bottom))
110 |         img2.save(screenshot_path + 'screenshot2.png')
111 |     #     计算缺口位置
112 |         loc=get_diff_location(img1, img2)
113 |         print 'Location is: ' + str(loc)
114 | 
115 |     #     生成x的移动轨迹点
116 |         track_list=get_track(loc)
117 | 
118 |     #     找到滑动的圆球
119 |         #element=driver.find_element_by_xpath("//div[@class='gt_slider_knob gt_show']")
120 |         #element = driver.find_element_by_xpath('//*[local-name() = "circle"][@style="fill: rgb(255, 255, 255); stroke-width: 1.5;"]')
121 |         location=element.location
122 |     #     获得滑动圆球的高度
123 |         y=location['y']
124 |         init_x = location['x']
125 |         #ActionChains(driver).drag_and_drop_by_offset(source=element, xoffset = loc, yoffset= 0).perform()
126 |     #     鼠标点击元素并按住不放
127 |         #print "第一步,点击元素"
128 | 
129 | 
130 |         ActionChains(driver).click_and_hold(on_element=element).perform()
131 |         time.sleep(0.15)
132 |         #print loc - 5
133 | 
134 |         come_back = random.randint(-2,5)
135 | 
136 |         while element.location['x'] - init_x < loc * 1 / 4 :
137 |             track = random.randint(2, 4)
138 |             ActionChains(driver).move_to_element_with_offset(to_element=element, xoffset=track + 22, yoffset=22).perform()
139 |             time.sleep(random.randint(10, 50) / 1000.)
140 | 
141 |         while element.location['x'] - init_x < loc * 2 / 4 :
142 |             track = random.randint(4, 6)
143 |             ActionChains(driver).move_to_element_with_offset(to_element=element, xoffset=track + 22, yoffset=22).perform()
144 |             # time.sleep(0.5)
145 |             # print element.location['x'] - init_x
146 |             time.sleep(random.randint(10, 50) / 5000.)
147 | 
148 |         while element.location['x'] - init_x < loc * 3 / 4 :
149 |             track = random.randint(3,5)
150 |             ActionChains(driver).move_to_element_with_offset(to_element=element, xoffset=track + 22, yoffset=22).perform()
151 |             time.sleep(random.randint(10, 50)/ 4000.)
152 | 
153 |         while element.location['x'] - init_x < loc + come_back:
154 |             track = random.randint(2,4)
155 |             ActionChains(driver).move_to_element_with_offset(to_element=element, xoffset=track + 22, yoffset=22).perform()
156 |             time.sleep(random.randint(10, 50)/ 500.)
157 | 
158 | 
159 | 
160 |         target_x = init_x + loc - 5
161 |         dist = 999
162 |         while abs(dist) > 2:
163 | 
164 |             dist  = element.location['x'] - target_x
165 |             #track = -1 * random.randint(1, 2)
166 |             if dist > 0:
167 |                 track = -1 * random.randint(2, 4)
168 |             else:
169 |                 track = random.randint(2, 3)
170 |             ActionChains(driver).move_to_element_with_offset(to_element=element, xoffset=track + 22, yoffset=22).perform()
171 |             time.sleep(random.randint(10, 30) / 100.)
172 | 
173 | 
174 |         #driver.save_screenshot('screenshot_finish1.png')
175 | 
176 | 
177 |         ActionChains(driver).release(on_element=element).perform()
178 |         #driver.save_screenshot('screenshot_finish2.png')
179 |         time.sleep(1)
180 |         driver.save_screenshot(screenshot_path + 'screenshot_finish3.png')
181 |         time.sleep(3)
182 |         if element.location['x'] == init_x:
183 |             print "Retrial..."
184 |             trial_count += 1
185 | 
186 |         #print driver.current_url
187 |         else:
188 |             submit=driver.find_element_by_id("embed-submit")
189 |             ActionChains(driver).click(on_element=submit).perform()
190 |             time.sleep(3)
191 |             driver.quit()
192 |             print "Cracked Geettest."
193 |             break
194 | 
195 | 
196 | #geettest_crack()
197 | if __name__ == '__main__':
198 | 
199 | 
200 |     driver = webdriver.Firefox()
201 |     #Can be replaced by PhantomJS driver
202 |     try:
203 |         geettest_crack(driver)
204 |     except Exception as e:
205 |         print e
206 |         driver.quit()
207 | 
208 | 


--------------------------------------------------------------------------------
/img1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/crownpku/Geetest-Captcha-Crack/700f1dac1d106cef3e10ec62f098a1cdccff55bd/img1.jpg


--------------------------------------------------------------------------------
/img2.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/crownpku/Geetest-Captcha-Crack/700f1dac1d106cef3e10ec62f098a1cdccff55bd/img2.jpg


--------------------------------------------------------------------------------
/test_track_generator.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 79,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "import random\n",
 10 |     "def geetest_track(distance):\n",
 11 |     "        print \"hahaha\"\n",
 12 |     "        come_back = random.uniform(-2,3)\n",
 13 |     "        cur_loc = 0\n",
 14 |     "        track_list = []\n",
 15 |     "        magic_ratio = 1\n",
 16 |     "        if distance < 50:\n",
 17 |     "            magic_ratio = 1\n",
 18 |     "        else:\n",
 19 |     "            magic_ratio = distance/50.\n",
 20 |     "        print magic_ratio\n",
 21 |     "        while cur_loc < distance * 1 / 4:\n",
 22 |     "            track = random.uniform(2*magic_ratio, 4*magic_ratio)\n",
 23 |     "            sleep_time = random.randint(10, 50) / 1000.\n",
 24 |     "            track_list.append([track, 0.5, sleep_time])\n",
 25 |     "            cur_loc = cur_loc + track\n",
 26 |     "            if len(track_list) > 50:\n",
 27 |     "                print \"whoops1!\"\n",
 28 |     "                print track_list\n",
 29 |     "                return\n",
 30 |     "        \n",
 31 |     "        while cur_loc < distance * 2 / 4:\n",
 32 |     "            track = random.uniform(4*magic_ratio, 6*magic_ratio)\n",
 33 |     "            sleep_time = random.randint(10, 50) / 5000.\n",
 34 |     "            track_list.append([track, 0.3, sleep_time])\n",
 35 |     "            cur_loc = cur_loc + track\n",
 36 |     "            if len(track_list) > 50:\n",
 37 |     "                print \"whoops2!\"\n",
 38 |     "                print track_list\n",
 39 |     "                return\n",
 40 |     "            \n",
 41 |     "        while cur_loc < distance * 3 / 4:\n",
 42 |     "            track = random.uniform(3*magic_ratio, 5*magic_ratio)\n",
 43 |     "            sleep_time = random.randint(10, 50) / 4000.\n",
 44 |     "            track_list.append([track, 0.6, sleep_time])\n",
 45 |     "            cur_loc = cur_loc + track\n",
 46 |     "            if len(track_list) > 50:\n",
 47 |     "                print \"whoops3!\"\n",
 48 |     "                print track_list\n",
 49 |     "                return\n",
 50 |     "        \n",
 51 |     "        while cur_loc < distance + come_back:\n",
 52 |     "            track = random.uniform(2*magic_ratio, 4*magic_ratio)\n",
 53 |     "            sleep_time = random.randint(10, 50) / 500.\n",
 54 |     "            track_list.append([track, 0.5, sleep_time])\n",
 55 |     "            cur_loc = cur_loc + track\n",
 56 |     "            if len(track_list) > 50:\n",
 57 |     "                print \"whoops4!\"\n",
 58 |     "                print track_list\n",
 59 |     "                return\n",
 60 |     "        \n",
 61 |     "        #Final Adjustment\n",
 62 |     "        dist = 999\n",
 63 |     "        while abs(dist) > 1:\n",
 64 |     "            dist  = cur_loc - distance\n",
 65 |     "            if dist > 0:\n",
 66 |     "                track = -1 * random.uniform(0.5, 2)\n",
 67 |     "            else:\n",
 68 |     "                track = random.uniform(0.5, 2)\n",
 69 |     "            cur_loc = cur_loc + track\n",
 70 |     "            sleep_time = random.randint(10, 30) / 100.\n",
 71 |     "            track_list.append([track, 0.5, sleep_time])\n",
 72 |     "            if len(track_list) > 50:\n",
 73 |     "                print \"whoops5!\"\n",
 74 |     "                print track_list\n",
 75 |     "                return\n",
 76 |     "        return track_list"
 77 |    ]
 78 |   },
 79 |   {
 80 |    "cell_type": "code",
 81 |    "execution_count": 81,
 82 |    "metadata": {},
 83 |    "outputs": [
 84 |     {
 85 |      "name": "stdout",
 86 |      "output_type": "stream",
 87 |      "text": [
 88 |       "hahaha\n",
 89 |       "4.0\n"
 90 |      ]
 91 |     },
 92 |     {
 93 |      "data": {
 94 |       "text/plain": [
 95 |        "[[14.464269566584592, 0.5, 0.036],\n",
 96 |        " [9.574152040322744, 0.5, 0.027],\n",
 97 |        " [10.869245645226684, 0.5, 0.019],\n",
 98 |        " [10.351659127474901, 0.5, 0.041],\n",
 99 |        " [9.514443419455105, 0.5, 0.017],\n",
100 |        " [23.854056902890534, 0.3, 0.0074],\n",
101 |        " [17.653160147803792, 0.3, 0.0062],\n",
102 |        " [19.35223560446206, 0.3, 0.0038],\n",
103 |        " [12.92574042208101, 0.6, 0.008],\n",
104 |        " [13.133554170738439, 0.6, 0.00375],\n",
105 |        " [12.053751597765043, 0.6, 0.00575],\n",
106 |        " [12.588406493220118, 0.5, 0.078],\n",
107 |        " [11.292590379205668, 0.5, 0.09],\n",
108 |        " [8.146228992828629, 0.5, 0.072],\n",
109 |        " [13.194101465615203, 0.5, 0.088],\n",
110 |        " [15.43856022560939, 0.5, 0.022],\n",
111 |        " [-1.7111706929162458, 0.5, 0.17],\n",
112 |        " [-1.7823567357507895, 0.5, 0.22],\n",
113 |        " [-0.6894989340022635, 0.5, 0.26],\n",
114 |        " [-1.7489889588834224, 0.5, 0.23],\n",
115 |        " [-1.8151736604517428, 0.5, 0.29],\n",
116 |        " [-1.2160967796100048, 0.5, 0.21],\n",
117 |        " [-0.6484530646066641, 0.5, 0.12],\n",
118 |        " [-1.5961695468431882, 0.5, 0.25],\n",
119 |        " [-0.7510153771563826, 0.5, 0.29],\n",
120 |        " [-1.1838216453918322, 0.5, 0.24],\n",
121 |        " [-1.9176373778973457, 0.5, 0.13],\n",
122 |        " [1.0587423973736225, 0.5, 0.11]]"
123 |       ]
124 |      },
125 |      "execution_count": 81,
126 |      "metadata": {},
127 |      "output_type": "execute_result"
128 |     }
129 |    ],
130 |    "source": [
131 |     "geetest_track(200)"
132 |    ]
133 |   },
134 |   {
135 |    "cell_type": "code",
136 |    "execution_count": null,
137 |    "metadata": {},
138 |    "outputs": [],
139 |    "source": []
140 |   },
141 |   {
142 |    "cell_type": "code",
143 |    "execution_count": null,
144 |    "metadata": {},
145 |    "outputs": [],
146 |    "source": []
147 |   }
148 |  ],
149 |  "metadata": {
150 |   "kernelspec": {
151 |    "display_name": "Python 2",
152 |    "language": "python",
153 |    "name": "python2"
154 |   },
155 |   "language_info": {
156 |    "codemirror_mode": {
157 |     "name": "ipython",
158 |     "version": 2
159 |    },
160 |    "file_extension": ".py",
161 |    "mimetype": "text/x-python",
162 |    "name": "python",
163 |    "nbconvert_exporter": "python",
164 |    "pygments_lexer": "ipython2",
165 |    "version": "2.7.12"
166 |   }
167 |  },
168 |  "nbformat": 4,
169 |  "nbformat_minor": 0
170 | }
171 | 


--------------------------------------------------------------------------------