├── README.MD ├── data.json └── gwhatweb.py /README.MD: -------------------------------------------------------------------------------- 1 | # gwhatweb 2 | - 网站CMS识别,1400+指纹库 json格式 MD5+关键词匹配 3 | - python >= 2.7 4 | - python 协程 5 | 6 | ## TODO 7 | - 更新移步分布式指纹识别平台 [https://github.com/boy-hack/w11scan](https://github.com/boy-hack/w11scan) 8 | - goWhatweb [https://github.com/boy-hack/goWhatweb](https://github.com/boy-hack/goWhatweb) 9 | 10 | ### Requement 11 | - ` pip install requests,gevent` 12 | - 也可以安装 `pip install grequests ` 13 | 14 | ### Useage 15 | - python gwhatweb.py http://wwww.xxxx.com 16 | -------------------------------------------------------------------------------- /gwhatweb.py: -------------------------------------------------------------------------------- 1 | import requests 2 | import json,hashlib,sys 3 | import gevent 4 | from gevent import monkey 5 | monkey.patch_all() 6 | from gevent.queue import Queue 7 | import time 8 | 9 | class gwhatweb(object): 10 | def __init__(self,url): 11 | self.tasks = Queue() 12 | self.url = url.rstrip("/") 13 | fp = open('data.json') 14 | webdata = json.load(fp, encoding="utf-8") 15 | for i in webdata: 16 | self.tasks.put(i) 17 | fp.close() 18 | print("webdata total:%d"%len(webdata)) 19 | 20 | def _GetMd5(self,body): 21 | m2 = hashlib.md5() 22 | m2.update(body.encode("utf8")) 23 | return m2.hexdigest() 24 | 25 | def _clearQueue(self): 26 | while not self.tasks.empty(): 27 | self.tasks.get() 28 | 29 | def _worker(self): 30 | data = self.tasks.get() 31 | test_url = self.url + data["url"] 32 | rtext = '' 33 | try: 34 | r = requests.get(test_url,timeout=10) 35 | if (r.status_code != 200): 36 | return 37 | rtext = r.text 38 | if rtext is None: 39 | return 40 | except: 41 | rtext = '' 42 | 43 | if data["re"]: 44 | if (rtext.find(data["re"]) != -1): 45 | result = data["name"] 46 | print("CMS:%s Judge:%s re:%s" % (result, test_url, data["re"])) 47 | self._clearQueue() 48 | return True 49 | else: 50 | md5 = self._GetMd5(rtext) 51 | if (md5 == data["md5"]): 52 | result = data["name"] 53 | print("CMS:%s Judge:%s md5:%s" % (result, test_url, data["md5"])) 54 | self._clearQueue() 55 | return True 56 | 57 | 58 | def _boss(self): 59 | while not self.tasks.empty(): 60 | self._worker() 61 | 62 | def whatweb(self,maxsize=100): 63 | start = time.clock() 64 | allr = [gevent.spawn(self._boss) for i in range(maxsize)] 65 | gevent.joinall(allr) 66 | end = time.clock() 67 | print ("cost: %f s" % (end - start)) 68 | 69 | if __name__ == '__main__': 70 | if len(sys.argv) < 2: 71 | print("usag:python gwhatweb.py http://www.xxx.com") 72 | else: 73 | url = sys.argv[1] 74 | g = gwhatweb(url) 75 | g.whatweb(1000) 76 | --------------------------------------------------------------------------------