├── README.md ├── pic0.png ├── pic1.png ├── pic2.png ├── spider_v3.py └── www.wuhubtv.com.txt /README.md: -------------------------------------------------------------------------------- 1 | # Common_Spider 2 | 3 | 一个还算通用的爬虫脚本,可自己设定爬取的深度,可以把网站的动态链接地址和外链单独分出来,做安全测试时可以提前爬一下页面,避免测试时会有遗漏。 4 | 5 | 6 | # Change Log 7 | 8 | - [2018-07-05] 对爬取时遇到的一些异常进行了优化 9 | - [2018-04-03] 对域名的处理进行了完善 10 | 11 | # Usage 12 | 13 | 使用比较简单: 14 | 15 | 安装requests依赖 16 | 17 | ``` 18 | pip install requests 19 | ``` 20 | 21 | 运行爬虫 22 | 23 | ``` 24 | python2 spider_v3.py url 5 --> url为待爬取的网站地址,5为爬取深度,可以不设,默认为5。 25 | ``` 26 | 27 | 28 | # Screenshot 29 | 30 | - 扫描过程 31 | 32 | ![screenshot](pic0.png) 33 | 34 | - 扫描结果 35 | 36 | ![screenshot](pic1.png) 37 | 38 | - 外链和动态链接 39 | 40 | ![screenshot](pic2.png) -------------------------------------------------------------------------------- /pic0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TideSec/Common_Spider/cb0d66e6a261997da2b15910a4b7f50e5fbdd678/pic0.png -------------------------------------------------------------------------------- /pic1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TideSec/Common_Spider/cb0d66e6a261997da2b15910a4b7f50e5fbdd678/pic1.png -------------------------------------------------------------------------------- /pic2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TideSec/Common_Spider/cb0d66e6a261997da2b15910a4b7f50e5fbdd678/pic2.png -------------------------------------------------------------------------------- /spider_v3.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # @Time : 18/4/14 上午2:07 4 | # @Author : SecPlus 5 | # @Site : www.SecPlus.org 6 | # @Email : TideSecPlus@gmail.com 7 | 8 | # 2018.04.14 结合wdscan和其他爬虫,相对比较完善的spider 9 | 10 | import random 11 | import urllib2,re,requests 12 | import time 13 | 14 | import sys 15 | 16 | 17 | def url_protocol(url): 18 | domain = re.findall(r'.*(?=://)', url) 19 | if domain: 20 | return domain[0] 21 | else: 22 | return url 23 | 24 | def same_url(urlprotocol,url): 25 | url = url.replace(urlprotocol + '://', '') 26 | if re.findall(r'^www', url) == []: 27 | sameurl = 'www.' + url 28 | if sameurl.find('/') != -1: 29 | sameurl = re.findall(r'(?<=www.).*?(?=/)', sameurl)[0] 30 | else: 31 | sameurl = sameurl + '/' 32 | sameurl = re.findall(r'(?<=www.).*?(?=/)', sameurl)[0] 33 | else: 34 | if url.find('/') != -1: 35 | sameurl = 'www.' + re.findall(r'(?<=www.).*?(?=/)', url)[0] 36 | else: 37 | sameurl = url + '/' 38 | sameurl = 'www.' + re.findall(r'(?<=www.).*?(?=/)', sameurl)[0] 39 | print('the domain is:' + sameurl) 40 | return sameurl 41 | 42 | def requests_headers(): 43 | ''' 44 | Random UA for every requests && Use cookie to scan 45 | ''' 46 | user_agent = ['Mozilla/5.0 (Windows; U; Win98; en-US; rv:1.8.1) Gecko/20061010 Firefox/2.0', 47 | 'Mozilla/5.0 (Windows; U; Windows NT 5.0; en-US) AppleWebKit/532.0 (KHTML, like Gecko) Chrome/3.0.195.6 Safari/532.0', 48 | 'Mozilla/5.0 (Windows; U; Windows NT 5.1 ; x64; en-US; rv:1.9.1b2pre) Gecko/20081026 Firefox/3.1b2pre', 49 | 'Opera/10.60 (Windows NT 5.1; U; zh-cn) Presto/2.6.30 Version/10.60','Opera/8.01 (J2ME/MIDP; Opera Mini/2.0.4062; en; U; ssr)', 50 | 'Mozilla/5.0 (Windows; U; Windows NT 5.1; ; rv:1.9.0.14) Gecko/2009082707 Firefox/3.0.14', 51 | 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.106 Safari/537.36', 52 | 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/57.0.2987.133 Safari/537.36', 53 | 'Mozilla/5.0 (Windows; U; Windows NT 6.0; fr; rv:1.9.2.4) Gecko/20100523 Firefox/3.6.4 ( .NET CLR 3.5.30729)', 54 | 'Mozilla/5.0 (Windows; U; Windows NT 6.0; fr-FR) AppleWebKit/528.16 (KHTML, like Gecko) Version/4.0 Safari/528.16', 55 | 'Mozilla/5.0 (Windows; U; Windows NT 6.0; fr-FR) AppleWebKit/533.18.1 (KHTML, like Gecko) Version/5.0.2 Safari/533.18.5'] 56 | UA = random.choice(user_agent) 57 | headers = { 58 | 'Accept':'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8', 59 | 'User-Agent':UA,'Upgrade-Insecure-Requests':'1','Connection':'keep-alive','Cache-Control':'max-age=0', 60 | 'Accept-Encoding':'gzip, deflate, sdch','Accept-Language':'zh-CN,zh;q=0.8', 61 | "Referer": "http://www.baidu.com/link?url=www.so.com&url=www.soso.com&&url=www.sogou.com"} 62 | return headers 63 | 64 | 65 | class linkQuence: 66 | def __init__(self): 67 | self.visited = [] #已访问过的url初始化列表 68 | self.unvisited = [] #未访问过的url初始化列表 69 | self.external_url=[] #外部链接 70 | 71 | def getVisitedUrl(self): #获取已访问过的url 72 | return self.visited 73 | def getUnvisitedUrl(self): #获取未访问过的url 74 | return self.unvisited 75 | def getExternal_link(self): 76 | return self.external_url #获取外部链接地址 77 | def addVisitedUrl(self,url): #添加已访问过的url 78 | return self.visited.append(url) 79 | def addUnvisitedUrl(self,url): #添加未访问过的url 80 | if url != '' and url not in self.visited and url not in self.unvisited: 81 | return self.unvisited.insert(0,url) 82 | def addExternalUrl(self,url): #添加外部链接列表 83 | if url!='' and url not in self.external_url: 84 | return self.external_url.insert(0,url) 85 | 86 | def removeVisited(self,url): 87 | return self.visited.remove(url) 88 | def popUnvisitedUrl(self): #从未访问过的url中取出一个url 89 | try: #pop动作会报错终止操作,所以需要使用try进行异常处理 90 | return self.unvisited.pop() 91 | except: 92 | return None 93 | def unvisitedUrlEmpty(self): #判断未访问过列表是不是为空 94 | return len(self.unvisited) == 0 95 | 96 | class Spider(): 97 | ''' 98 | 真正的爬取程序 99 | ''' 100 | def __init__(self,url,domain_url,urlprotocol): 101 | self.linkQuence = linkQuence() #引入linkQuence类 102 | self.linkQuence.addUnvisitedUrl(url) #并将需要爬取的url添加进linkQuence对列中 103 | self.current_deepth = 1 #设置爬取的深度 104 | self.domain_url = domain_url 105 | self.urlprotocol = urlprotocol 106 | 107 | def getPageLinks(self,url): 108 | ''' 109 | 获取页面中的所有链接 110 | ''' 111 | try: 112 | headers = requests_headers() 113 | content = requests.get(url, timeout=5, headers=headers, verify=False).text.encode('utf-8') 114 | links = [] 115 | tags = ['a', 'A', 'link', 'script', 'area', 'iframe', 'form'] # img 116 | tos = ['href', 'src', 'action'] 117 | if url[-1:] == '/': 118 | url = url[:-1] 119 | try: 120 | for tag in tags: 121 | for to in tos: 122 | link1 = re.findall(r'<%s.*?%s="(.*?)"' % (tag, to), str(content)) 123 | link2 = re.findall(r'<%s.*?%s=\'(.*?)\'' % (tag, to), str(content)) 124 | for i in link1: 125 | links.append(i) 126 | 127 | for i in link2: 128 | if i not in links: 129 | links.append(i) 130 | 131 | except Exception, e: 132 | print e 133 | print '[!] Get link error' 134 | pass 135 | return links 136 | except: 137 | return [] 138 | def getPageLinks_bak(self,url): 139 | ''' 140 | 获取页面中的所有链接 141 | ''' 142 | try: 143 | 144 | # pageSource=urllib2.urlopen(url).read() 145 | headers = requests_headers() 146 | time.sleep(0.5) 147 | pageSource = requests.get(url, timeout=5, headers=headers).text.encode('utf-8') 148 | pageLinks = re.findall(r'(?<=href=\").*?(?=\")|(?<=href=\').*?(?=\')', pageSource) 149 | # print pageLinks 150 | except: 151 | # print ('open url error') 152 | return [] 153 | return pageLinks 154 | 155 | def processUrl(self,url): 156 | ''' 157 | 判断正确的链接及处理相对路径为正确的完整url 158 | :return: 159 | ''' 160 | true_url = [] 161 | in_link = [] 162 | excludeext = ['.zip', '.rar', '.pdf', '.doc', '.xls', '.jpg', '.mp3', '.mp4','.png', '.ico', '.gif','.svg', '.jpeg','.mpg', '.wmv', '.wma','mailto','javascript','data:image'] 163 | for suburl in self.getPageLinks(url): 164 | exit_flag = 0 165 | for ext in excludeext: 166 | if ext in suburl: 167 | print "break:" + suburl 168 | exit_flag = 1 169 | break 170 | if exit_flag == 0: 171 | if re.findall(r'/', suburl): 172 | if re.findall(r':', suburl): 173 | true_url.append(suburl) 174 | else: 175 | true_url.append(self.urlprotocol + '://' + self.domain_url + '/' + suburl) 176 | else: 177 | true_url.append(self.urlprotocol + '://' + self.domain_url + '/' + suburl) 178 | 179 | for suburl in true_url: 180 | print('from:' + url + ' get suburl:' + suburl) 181 | 182 | return true_url 183 | 184 | def sameTargetUrl(self,url): 185 | same_target_url = [] 186 | for suburl in self.processUrl(url): 187 | if re.findall(self.domain_url,suburl): 188 | same_target_url.append(suburl) 189 | else: 190 | self.linkQuence.addExternalUrl(suburl) 191 | return same_target_url 192 | 193 | def unrepectUrl(self,url): 194 | ''' 195 | 删除重复url 196 | ''' 197 | unrepect_url = [] 198 | for suburl in self.sameTargetUrl(url): 199 | if suburl not in unrepect_url: 200 | unrepect_url.append(suburl) 201 | return unrepect_url 202 | 203 | def crawler(self,crawl_deepth=1): 204 | ''' 205 | 正式的爬取,并依据深度进行爬取层级控制 206 | ''' 207 | self.current_deepth=0 208 | print "current_deepth:", self.current_deepth 209 | while self.current_deepth < crawl_deepth: 210 | if self.linkQuence.unvisitedUrlEmpty():break 211 | links=[] 212 | while not self.linkQuence.unvisitedUrlEmpty(): 213 | visitedUrl = self.linkQuence.popUnvisitedUrl() 214 | if visitedUrl is None or visitedUrl == '': 215 | continue 216 | print("#"*30 + visitedUrl +" :begin"+"#"*30) 217 | for sublurl in self.unrepectUrl(visitedUrl): 218 | links.append(sublurl) 219 | # links = self.unrepectUrl(visitedUrl) 220 | self.linkQuence.addVisitedUrl(visitedUrl) 221 | print("#"*30 + visitedUrl +" :end"+"#"*30 +'\n') 222 | for link in links: 223 | self.linkQuence.addUnvisitedUrl(link) 224 | self.current_deepth += 1 225 | # print(self.linkQuence.visited) 226 | # print (self.linkQuence.unvisited) 227 | urllist=[] 228 | urllist.append("#" * 30 + ' VisitedUrl ' + "#" * 30) 229 | for suburl in self.linkQuence.getVisitedUrl(): 230 | urllist.append(suburl) 231 | urllist.append('\n'+"#" * 30 + ' UnVisitedUrl ' + "#" * 30) 232 | for suburl in self.linkQuence.getUnvisitedUrl(): 233 | urllist.append(suburl) 234 | urllist.append('\n'+"#" * 30 + ' External_link ' + "#" * 30) 235 | for sublurl in self.linkQuence.getExternal_link(): 236 | urllist.append(sublurl) 237 | urllist.append('\n'+"#" * 30 + ' Active_link ' + "#" * 30) 238 | actives = ['?', '.asp', '.jsp', '.php', '.aspx', '.do', '.action'] 239 | active_urls = [] 240 | for sublurl in urllist: 241 | for active in actives: 242 | if active in sublurl: 243 | active_urls.append(sublurl) 244 | break 245 | for active_url in active_urls: 246 | urllist.append(active_url) 247 | return urllist 248 | def writelog(log,urllist): 249 | filename=log 250 | outfile=open(filename,'w') 251 | for suburl in urllist: 252 | outfile.write(suburl+'\n') 253 | outfile.close() 254 | 255 | def urlspider(rooturl,crawl_deepth=3): 256 | # ext_link = [] 257 | urlprotocol = url_protocol(url) 258 | domain_url = same_url(urlprotocol,url) 259 | print "domain_url:"+domain_url 260 | spider = Spider(url,domain_url,urlprotocol) 261 | urllist=spider.crawler(crawl_deepth) 262 | writelog(domain_url,urllist) 263 | print '-' * 20 + url + '-' * 20 264 | for sublurl in urllist: 265 | print sublurl 266 | print '\n' + 'Result record in:' + domain_url + '.txt' 267 | 268 | def SRC_spider(url, log,crawl_deepth=3): 269 | # url = 'http://2014.liaocheng.gov.cn' 270 | 271 | urlprotocol = url_protocol(url) 272 | domain_url = same_url(urlprotocol, url) 273 | print "domain_url:" + domain_url 274 | spider = Spider(url,domain_url,urlprotocol) 275 | urllist = spider.crawler(crawl_deepth) 276 | writelog(log, urllist) 277 | print '-' * 20 + url + '-' * 20 278 | # for sublurl in urllist: 279 | # print sublurl 280 | print '\n' + 'Result record in:' + log 281 | 282 | 283 | if __name__ == '__main__': 284 | url = 'http://www.wuhubtv.com' 285 | craw_deepth =5 286 | usage = ''' 287 | python spider_v3.py url 5 --> url为待爬取的网站地址,5为爬取深度,可以不设,默认为5。 288 | ''' 289 | try: 290 | if len(sys.argv) ==2: 291 | url = sys.argv[1] 292 | craw_deepth = 5 293 | elif len(sys.argv) ==3: 294 | url = sys.argv[1] 295 | craw_deepth = int(sys.argv[2]) 296 | else: 297 | print usage 298 | exit(0) 299 | 300 | urlprotocol = url_protocol(url) 301 | domain_url = same_url(urlprotocol, url) 302 | print "domain_url:" + domain_url 303 | spider = Spider(url, domain_url, urlprotocol) 304 | urllist = spider.crawler(craw_deepth) 305 | writelog(domain_url+'.txt', urllist) 306 | # print urllist 307 | print '-' * 20 + url + '-' * 20 308 | for sublurl in urllist: 309 | print sublurl 310 | print len(urllist) 311 | print '\n' + 'Result record in:' + domain_url + '.txt' 312 | except: 313 | pass 314 | 315 | -------------------------------------------------------------------------------- /www.wuhubtv.com.txt: -------------------------------------------------------------------------------- 1 | ############################## VisitedUrl ############################## 2 | http://www.wuhubtv.com 3 | 4 | ############################## UnVisitedUrl ############################## 5 | http://www.wuhubtv.com/search/index.php 6 | http://www.wuhubtv.com/t/jscell/28/1_0_0_0_21297.php 7 | http://www.wuhubtv.com/t/1/5/js/jquery.SuperSlide.2.1.1.js 8 | http://www.wuhubtv.com/t/1/5/js/jquery.switchable.min.js 9 | http://www.wuhubtv.com/t/1/5/js/jquery.js 10 | http://www.wuhubtv.com/t/1/5/css/color.css 11 | http://www.wuhubtv.com/t/1/5/css/style.css 12 | http://www.wuhubtv.com/t/1/5/css/base.css 13 | http://www.wuhubtv.com/#top 14 | http://www.wuhubtv.com/folder45/folder48/2017-09-15/38429.html 15 | http://www.wuhubtv.com/folder45/folder48/2017-09-16/38468.html 16 | http://www.wuhubtv.com/folder45/folder48/ 17 | http://www.wuhubtv.com/folder45/folder47/2017-07-26/35914.html 18 | http://www.wuhubtv.com/folder45/folder47/2017-07-26/35915.html 19 | http://www.wuhubtv.com/folder45/folder47/ 20 | http://www.wuhubtv.com/folder45/folder46/2017-07-26/35891.html 21 | http://www.wuhubtv.com/folder45/folder46/2018-12-06/49094.html 22 | http://www.wuhubtv.com/folder45/folder46/ 23 | http://www.wuhubtv.com/folder45/ 24 | http://www.wuhubtv.com/folder49/folder50/2017-09-11/38304.html 25 | http://www.wuhubtv.com/folder49/folder50/2017-09-13/38380.html 26 | http://www.wuhubtv.com/folder49/folder50/2017-09-13/38381.html 27 | http://www.wuhubtv.com/folder49/folder50/2017-09-13/38382.html 28 | http://www.wuhubtv.com/folder49/folder50/2017-09-13/38383.html 29 | http://www.wuhubtv.com/folder49/folder50/2017-10-27/39346.html 30 | http://www.wuhubtv.com/folder49/folder50/ 31 | http://www.wuhubtv.com/folder49/folder52/2017-09-06/38044.html 32 | http://www.wuhubtv.com/folder49/folder55/2017-09-06/38051.html 33 | http://www.wuhubtv.com/folder49/folder52/2017-09-06/38070.html 34 | http://www.wuhubtv.com/folder49/folder52/2017-09-06/38075.html 35 | http://www.wuhubtv.com/folder49/folder52/2017-09-07/38150.html 36 | http://www.wuhubtv.com/folder49/folder54/2017-09-09/38244.html 37 | http://www.wuhubtv.com/folder49/folder55/2017-09-09/38246.html 38 | http://www.wuhubtv.com/folder49/ 39 | http://www.wuhubtv.com/photos/scenery/2017-08-15/37153.html 40 | http://www.wuhubtv.com/photos/huodong/2017-08-15/37170.html 41 | http://www.wuhubtv.com/photos/huodong/2017-08-18/37319.html 42 | http://www.wuhubtv.com/photos/scenery/2017-08-18/37320.html 43 | http://www.wuhubtv.com/photos/huodong/2017-09-01/37893.html 44 | http://www.wuhubtv.com/photos/scenery/2017-09-01/37894.html 45 | http://www.wuhubtv.com/photos/character/2017-09-01/37895.html 46 | http://www.wuhubtv.com/photos/huodong/ 47 | http://www.wuhubtv.com/photos/scenery/ 48 | http://www.wuhubtv.com/photos/character/ 49 | http://www.wuhubtv.com/photos/ 50 | http://www.wuhubtv.com/folder73/folder76/2018-08-02/46543.html 51 | http://www.wuhubtv.com/folder73/folder76/2018-08-08/46675.html 52 | http://www.wuhubtv.com/folder73/folder76/2018-08-22/47023.html 53 | http://www.wuhubtv.com/folder73/folder76/2018-11-02/48521.html 54 | http://www.wuhubtv.com/folder73/folder76/2018-12-10/49152.html 55 | http://www.wuhubtv.com/folder73/folder76/2019-01-03/49607.html 56 | http://www.wuhubtv.com/folder73/folder76/ 57 | http://www.wuhubtv.com/folder73/folder126/2018-02-13/42316.html 58 | http://www.wuhubtv.com/folder73/folder126/2018-09-18/47679.html 59 | http://www.wuhubtv.com/folder73/folder126/2018-09-18/47680.html 60 | http://www.wuhubtv.com/folder73/folder74/2017-07-14/35100.html 61 | http://www.wuhubtv.com/folder73/folder74/2018-07-09/45890.html 62 | http://www.wuhubtv.com/folder73/folder75/2018-02-06/41942.html 63 | http://www.wuhubtv.com/folder73/folder75/2018-05-17/44687.html 64 | http://www.wuhubtv.com/folder73/folder75/2018-06-14/45336.html 65 | http://www.wuhubtv.com/folder73/folder75/2018-09-26/47839.html 66 | http://www.wuhubtv.com/folder73/folder75/2018-12-18/49275.html 67 | http://www.wuhubtv.com/folder73/folder75/2018-12-18/49276.html 68 | http://www.wuhubtv.com/folder73/folder126/ 69 | http://www.wuhubtv.com/folder73/folder74/ 70 | http://www.wuhubtv.com/folder73/folder75/ 71 | http://www.wuhubtv.com/folder101/folder109/2017-08-15/37151.html 72 | http://www.wuhubtv.com/folder101/folder109/ 73 | http://www.wuhubtv.com/folder101/folder103/2017-08-22/37442.html 74 | http://www.wuhubtv.com/folder101/folder103/2017-08-24/37543.html 75 | http://www.wuhubtv.com/folder101/folder103/2017-08-31/37832.html 76 | http://www.wuhubtv.com/folder101/folder103/2017-09-13/38397.html 77 | http://www.wuhubtv.com/folder101/folder103/ 78 | http://www.wuhubtv.com/folder101/folder110/ 79 | http://www.wuhubtv.com/folder101/ 80 | http://www.wuhubtv.com/# 81 | http://www.wuhubtv.com/special/2017-06-20/33683.html 82 | http://www.wuhubtv.com/special/2017-06-20/33684.html 83 | http://www.wuhubtv.com/special/2017-06-23/33850.html 84 | http://www.wuhubtv.com/special/2017-06-26/34039.html 85 | http://www.wuhubtv.com/special/2017-06-28/34211.html 86 | http://www.wuhubtv.com/special/2017-06-28/34212.html 87 | http://www.wuhubtv.com/special/2017-06-28/34213.html 88 | http://www.wuhubtv.com/special/2017-07-14/35143.html 89 | http://www.wuhubtv.com/special/2017-08-01/36312.html 90 | http://www.wuhubtv.com/special/2018-02-09/42010.html 91 | http://www.wuhubtv.com/special/ 92 | http://www.wuhubtv.com/video/folder56/2019-01-03/49615.html 93 | http://www.wuhubtv.com/video/folder57/2019-01-04/49623.html 94 | http://www.wuhubtv.com/video/folder124/2019-01-04/49624.html 95 | http://www.wuhubtv.com/video/folder58/2019-01-04/49625.html 96 | http://www.wuhubtv.com/video/folder59/2019-01-04/49626.html 97 | http://www.wuhubtv.com/video/folder56/2019-01-04/49634.html 98 | http://www.wuhubtv.com/video/folder56/2019-01-05/49641.html 99 | http://www.wuhubtv.com/video/folder56/2019-01-06/49644.html 100 | http://www.wuhubtv.com/video/folder61/folder62/2018-12-13/49197.html 101 | http://www.wuhubtv.com/video/folder61/folder62/2018-12-13/49198.html 102 | http://www.wuhubtv.com/video/folder61/folder62/2018-12-25/49394.html 103 | http://www.wuhubtv.com/video/folder61/folder62/2018-12-25/49395.html 104 | http://www.wuhubtv.com/video/folder61/folder62/2018-12-26/49409.html 105 | http://www.wuhubtv.com/news/local/2018-12-28/49454.html 106 | http://www.wuhubtv.com/news/local/2018-12-29/49499.html 107 | http://www.wuhubtv.com/news/local/2018-12-29/49500.html 108 | http://www.wuhubtv.com/news/local/2019-01-02/49582.html 109 | http://www.wuhubtv.com/news/local/2019-01-02/49583.html 110 | http://www.wuhubtv.com/news/local/2019-01-03/49602.html 111 | http://www.wuhubtv.com/news/local/2019-01-03/49603.html 112 | http://www.wuhubtv.com/news/local/2019-01-04/49618.html 113 | http://www.wuhubtv.com/news/local/2019-01-04/49619.html 114 | http://www.wuhubtv.com/news/local/2019-01-07/49647.html 115 | http://www.wuhubtv.com/news/local/ 116 | http://www.wuhubtv.com/news/local/2019-01-07/49648.html 117 | http://www.wuhubtv.com/news/Politics/2019-01-02/49584.html 118 | http://www.wuhubtv.com/news/Politics/2019-01-03/49600.html 119 | http://www.wuhubtv.com/news/Politics/2019-01-04/49621.html 120 | http://www.wuhubtv.com/news/Politics/2019-01-07/49649.html 121 | http://www.wuhubtv.com/news/Politics/ 122 | http://www.wuhubtv.com/video/folder61/folder64/2018-11-30/49017.html 123 | http://www.wuhubtv.com/video/folder61/folder64/2018-11-30/49018.html 124 | http://www.wuhubtv.com/video/folder61/folder64/2018-12-04/49053.html 125 | http://www.wuhubtv.com/video/folder61/folder64/2018-12-04/49054.html 126 | http://www.wuhubtv.com/video/folder61/folder64/2018-12-05/49073.html 127 | http://www.wuhubtv.com/video/folder61/folder64/2018-12-06/49084.html 128 | http://www.wuhubtv.com/video/folder61/folder64/2018-12-10/49140.html 129 | http://www.wuhubtv.com/news/headlines/2018-12-10/49147.html 130 | http://www.wuhubtv.com/news/headlines/2018-12-18/49268.html 131 | http://www.wuhubtv.com/news/headlines/ 132 | http://www.wuhubtv.com/news/headlines/2018-05-22/44812.html 133 | http://www.wuhubtv.com/news/local/2018-09-29/47922.html 134 | http://www.wuhubtv.com/news/local/2018-10-16/48253.html 135 | http://www.wuhubtv.com/folder73/ 136 | http://www.wuhubtv.com/video/folder61/ 137 | http://www.wuhubtv.com/video/ 138 | http://www.wuhubtv.com/live/ 139 | http://www.wuhubtv.com/news/ 140 | http://www.wuhubtv.com/Assistant/ 141 | http://www.wuhubtv.com/ 142 | 143 | ############################## External_link ############################## 144 | http://dcs.conac.cn/js/13/207/0000/40661395/CA132070000406613950000.js 145 | http://stat.cloud.hoge.cn/js/webpv/?site_id=249 146 | http://adv.wuhubtv.com/script/hg_ad.js 147 | http://www.ah12377.cn/ 148 | http://www.12377.cn/ 149 | http://www.ewoho.com/ 150 | http://www.eqoho.com/ 151 | http://www.hfbtv.com/ 152 | http://www.myyoco.com/ 153 | http://www.aqbtv.cn/ 154 | http://www.newsxc.com/ 155 | http://www.panguso.com/ 156 | http://www.chinaso.com/ 157 | http://www.xinhuanet.com/ 158 | http://www.cctv.com/ 159 | https://mp.weixin.qq.com/s/BSw9gmx6jrJe9nIXG5YgCQ 160 | http://www.ahrtv.cn/news/system/2017/05/25/004265987.shtml 161 | http://news.anhuinews.com/system/2017/08/01/007679690.shtml 162 | http://ah.anhuinews.com/system/2017/09/05/007704637.shtml 163 | http://www1.wuhunews.cn/special/showclass.asp?classid=1631 164 | 165 | ############################## Active_link ############################## 166 | http://www.wuhubtv.com/search/index.php 167 | http://www.wuhubtv.com/t/jscell/28/1_0_0_0_21297.php 168 | http://stat.cloud.hoge.cn/js/webpv/?site_id=249 169 | http://www1.wuhunews.cn/special/showclass.asp?classid=1631 170 | --------------------------------------------------------------------------------