├── Class.py ├── Class.pyc ├── Index.html ├── MagnetFinder.html ├── MagnetFinder.py ├── MagnetFinder.rar ├── MagnetFinder.tar.gz ├── Proxy.py ├── Proxy.pyc ├── README.md └── proxy_list.txt /Class.py: -------------------------------------------------------------------------------- 1 | #-*- coding:utf-8 -* 2 | 3 | class ProxyServer: 4 | __slots__ = ('proxy_address','proxy_http','speed','proxy_type','country') 5 | 6 | def __init__(self,proxy_address,proxy_http,speed,proxy_type,country): 7 | self.proxy_address=proxy_address 8 | self.proxy_http=proxy_http 9 | self.speed=speed 10 | self.proxy_type=proxy_type 11 | self.country=country 12 | 13 | def __getattr__(self,attr): 14 | raise AttributeError('ProxyServer object has no attribute %s'%attr) 15 | 16 | def __call__(): 17 | print 'ProxyServer object called' 18 | 19 | class FanHao: 20 | __slots__ = ('title','file_size','downloading_count','file_number','magnet_url','resource','resource_url') 21 | 22 | def __init__(self,title,file_size,downloading_count,file_number,magnet_url,resource,resource_url): 23 | self.title = title 24 | self.file_size = file_size 25 | self.downloading_count = downloading_count 26 | self.file_number = file_number 27 | self.magnet_url = magnet_url 28 | self.resource = resource 29 | self.resource_url = resource_url 30 | 31 | def __getattr__(self,attr): 32 | raise AttributeError('Fanhao object has no attribute %s'%attr) 33 | 34 | def __call__(): 35 | print 'Fanhao object called' 36 | -------------------------------------------------------------------------------- /Class.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hxinhan/MagnetFinder/8896500dc329b0f36821be73e8edc7d5ff698090/Class.pyc -------------------------------------------------------------------------------- /Index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 |

16 | 19 |

A real-time crawler for searching P2P magnet url. Enjoy it :)

20 |

Version 1.0.1

21 |

Coded by HansonHH

22 |

Github:https://github.com/HansonHH

23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 |

#	名称	大小	热度	文件数	磁力链接	来源

39 | 40 |

41 | 42 | 47 | 48 | 49 | 50 | -------------------------------------------------------------------------------- /MagnetFinder.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 |

13 | 16 |

A real-time crawler for searching P2P magnet url. Enjoy it :)

17 |

Version 1.0.1

18 |

Coded by HansonHH

19 |

Github:https://github.com/HansonHH

20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 |

#	名称	大小	热度	文件数	磁力链接	来源
36	SNIS344AVI	1.16 GB	397	53	点击下载	BTDB
35	0213snis344	980.56 MB	221	42	点击下载	BTDB
34	SNIS-344	1.37 GB	56	3	点击下载	BTKU
33	SNIS-344.1080p.mkv	3.25 GB	44	1	点击下载	BTKU
32	SNIS-344	1.72 GB	38	15	点击下载	BTKU
31	第一會所新片@SIS001@(S1)(SNIS-344)桜井彩がイクときの絶叫	3.25 GB	25	9	点击下载	BTKU
30	SNIS-344.mkv	3.25 GB	22	1	点击下载	BTKU
29	【ses23.com】SNIS-344.1080p	3.25 GB	17	5	点击下载	BTKU
28	SNIS-344	3.25 GB	11	3	点击下载	BTKU
27	033_3xplanet_SNIS-344.mp4	1.33 GB	10	1	点击下载	BTKU
26	SNIS-344.1080p	3.26 GB	9	3	点击下载	BTKU
25	【贴心话】SNIS-344 yingjingca	804.87 MB	5	15	点击下载	BTKU
24	snis-344.mp4	432.32 MB	5	1	点击下载	BTKU
23	snis344	976.34 MB	5	4	点击下载	BTDB
22	snis344	976.3 MB	1	1	点击下载	micili
21	snis344	976MB	0	4	点击下载	Qululu
20	Switch [SW-344]	1266 MB	--	--	点击下载	zhongzi.in
19	344.mnmnicolejohnny	630 MB	--	--	点击下载	zhongzi.in
18	RHJ-344 レッドホットジャム Vol.344 Model Collection 瑠菜, 早川メアリー [UNCENSORED].mp4	1492 MB	--	--	点击下载	zhongzi.in
17	RHJ-344 – Red Hot Jam Vol.344 – Runa, Meari Hayakawa (瑠菜, 早川メアリー).mp4	1373 MB	--	--	点击下载	zhongzi.in
16	[RHJ-344] Red Hot Jam Vol.344 Model Collection Runa, Meari Hayakawa	1201 MB	--	--	点击下载	zhongzi.in
15	第一會所新片@SIS001@(S1)(SNIS-344)桜井彩がイクときの絶叫	2048 MB	--	--	点击下载	zhongzi.in
14	033_3xplanet_SNIS-344.mp4	1364 MB	--	--	点击下载	zhongzi.in
13	【ses23.com】SNIS-344.1080p	2048 MB	--	--	点击下载	zhongzi.in
12	SNIS-344.1080p.mkv	2047 MB	--	--	点击下载	zhongzi.in
11	SNIS-344	975 MB	--	--	点击下载	zhongzi.in
10	SNIS-344@unz	1406 MB	--	--	点击下载	zhongzi.in
9	SNIS-344-MP4	999 MB	--	--	点击下载	zhongzi.in
8	SNIS-344	1762 MB	--	--	点击下载	zhongzi.in
7	snis344	976 MB	--	--	点击下载	zhongzi.in
6	0213snis344	980 MB	--	--	点击下载	zhongzi.in
5	SNIS-344.mkv	2047 MB	--	--	点击下载	zhongzi.in
4	【贴心话】SNIS-344 yingjingca	804 MB	--	--	点击下载	zhongzi.in
3	SNIS-344	2048 MB	--	--	点击下载	zhongzi.in
2	SNIS-344	1406 MB	--	--	点击下载	zhongzi.in
1	SNIS-344.1080p	2048 MB	--	--	点击下载	zhongzi.in

35 |

36 | 41 | 42 | 43 | -------------------------------------------------------------------------------- /MagnetFinder.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | #coding=utf-8 3 | 4 | __AUTOR__= "HansonHH" 5 | __DATA__= "13/08/15" 6 | __VERSAO__= "1.0.1" 7 | __GITHUB__= "https://github.com/HansonHH" 8 | 9 | """ 10 | Copyright (C) 2015 Xin Han 11 | This program is free software; you can redistribute it and/or modify 12 | it under the terms of the GNU General Public License as published by 13 | the Free Software Foundation; either version 2 of the License, or 14 | (at your option) any later version. 15 | This program is distributed in the hope that it will be useful, 16 | but WITHOUT ANY WARRANTY; without even the implied warranty of 17 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.See the 18 | GNU General Public License for more details. 19 | """ 20 | import sys 21 | import urllib 22 | import urllib2 23 | import re 24 | import random 25 | import threading 26 | import time 27 | import os 28 | import webbrowser 29 | from Proxy import get_proxy_list 30 | from Proxy import proxy_setting 31 | from Proxy import proxy_test 32 | from bs4 import BeautifulSoup 33 | from Class import FanHao 34 | 35 | type = sys.getfilesystemencoding() 36 | 37 | def cili_parse(fanhao,proxy_headers): 38 | global cili_fanhaos 39 | cili_fanhaos = [] 40 | try: 41 | fanhao_url = 'http://www.cili.tv/search/%s_ctime_1.html'%urllib.quote(fanhao.decode(sys.stdin.encoding).encode('utf8')) 42 | proxy_request = urllib2.Request(fanhao_url,headers=proxy_headers) 43 | response = urllib2.urlopen(proxy_request,timeout=5) 44 | fanhao_html = response.read() 45 | except Exception: 46 | return cili_fanhaos 47 | 48 | soup = BeautifulSoup(fanhao_html) 49 | soup_items = soup.find_all("div",attrs={"class":"item"}) 50 | if soup_items: 51 | for item in soup_items: 52 | title = item.a.text.strip() 53 | info = item.find("div",attrs={"class":"info"}) 54 | spans = info.find_all("span") 55 | file_size = str(spans[1].b.text) 56 | downloading_count = int(str(spans[2].b.string)) 57 | magnet_url = str(spans[3].find("a").get('href')) 58 | resource = 'Cili' 59 | resource_url = 'http://www.cili.tv' 60 | fanhao = FanHao(title,file_size,downloading_count,None,magnet_url,resource,resource_url) 61 | cili_fanhaos.append(fanhao) 62 | return cili_fanhaos 63 | 64 | def btdb_parse(fanhao,proxy_headers): 65 | global btdb_fanhaos 66 | btdb_fanhaos = [] 67 | try: 68 | fanhao_url = 'http://btdb.in/q/%s/'%urllib.quote(fanhao.decode(sys.stdin.encoding).encode('utf8')) 69 | proxy_request = urllib2.Request(fanhao_url,headers=proxy_headers) 70 | response = urllib2.urlopen(proxy_request,timeout=10) 71 | fanhao_html = response.read() 72 | except Exception: 73 | return btdb_fanhaos 74 | 75 | soup = BeautifulSoup(fanhao_html) 76 | soup_items = soup.find_all("li",attrs={"class":"search-ret-item"}) 77 | if soup_items: 78 | for item in soup_items: 79 | title = item.find("h1").find("a").get("title") 80 | info = item.find("div",attrs={"class":"item-meta-info"}).find_all("span",attrs={"class":"item-meta-info-value"}) 81 | file_size = info[0].text 82 | downloading_count = int(info[-1].text) 83 | file_number = int(info[1].text) 84 | magnet_url = item.find("div",attrs={"class":"item-meta-info"}).find("a",attrs={"class":"magnet"}).get("href") 85 | resource = 'BTDB' 86 | resource_url = 'http://btdb.in' 87 | fanhao = FanHao(title,file_size,downloading_count,file_number,magnet_url,resource,resource_url) 88 | btdb_fanhaos.append(fanhao) 89 | return btdb_fanhaos 90 | 91 | def btbook_parse(fanhao,proxy_headers): 92 | global btbook_fanhaos 93 | btbook_fanhaos = [] 94 | try: 95 | fanhao_url = 'http://www.btbook.net/search/'+urllib.quote(fanhao.decode(sys.stdin.encoding).encode('utf8'))+'.html' 96 | proxy_request = urllib2.Request(fanhao_url,headers=proxy_headers) 97 | response = urllib2.urlopen(proxy_request,timeout=10) 98 | fanhao_html = response.read() 99 | except Exception: 100 | return btbook_fanhaos 101 | 102 | soup = BeautifulSoup(fanhao_html) 103 | soup_items = soup.find_all("div",attrs={"class":"search-item"}) 104 | if soup_items: 105 | for item in soup_items: 106 | title = item.find("h3").find("a").find("b").text 107 | info = item.find("div",attrs={"class":"item-bar"}).find_all("span") 108 | file_size = info[2].b.text 109 | downloading_count = int(info[3].b.text) 110 | magnet_url = item.find("div",attrs={"class":"item-bar"}).find("a").get("href") 111 | resource = 'Btbook' 112 | resource_url = 'http://www.btbook.net' 113 | fanhao = FanHao(title,file_size,downloading_count,None,magnet_url,resource,resource_url) 114 | btbook_fanhaos.append(fanhao) 115 | return btbook_fanhaos 116 | 117 | def btcherry_parse(fanhao,proxy_headers): 118 | global btcherry_fanhaos 119 | btcherry_fanhaos = [] 120 | 121 | try: 122 | fanhao_url = 'http://www.btcherry.net/search?keyword='+urllib.quote(fanhao.decode(sys.stdin.encoding).encode('utf8')) 123 | proxy_request = urllib2.Request(fanhao_url,headers=proxy_headers) 124 | response = urllib2.urlopen(proxy_request,timeout=10) 125 | fanhao_html = response.read() 126 | except Exception: 127 | return btcherry_fanhaos 128 | 129 | soup = BeautifulSoup(fanhao_html) 130 | soup_items = soup.find_all("div",attrs={"class":"r"}) 131 | if soup_items: 132 | for item in soup_items: 133 | try: 134 | title = item.find("h5",attrs={"class":"h"}).text 135 | info = item.find("div").find_all("span") 136 | file_size = info[2].find("span",attrs={"class":"prop_val"}).text 137 | file_number = int(info[4].find("span",attrs={"class":"prop_val"}).text) 138 | magnet_url = item.find("div").find("a").get("href") 139 | except Exception: 140 | pass 141 | 142 | resource = 'BTCherry' 143 | resource_url = 'http://www.btcherry.net' 144 | fanhao = FanHao(title,file_size,None,file_number,magnet_url,resource,resource_url) 145 | btcherry_fanhaos.append(fanhao) 146 | return btcherry_fanhaos 147 | 148 | def zhongziIn_parse(fanhao,proxy_headers): 149 | global zhongziIn_fanhaos 150 | zhongziIn_fanhaos = [] 151 | 152 | try: 153 | fanhao_url = 'http://www.zhongzi.in/s/'+urllib.quote(fanhao.decode(sys.stdin.encoding).encode('utf8')) 154 | proxy_request = urllib2.Request(fanhao_url,headers=proxy_headers) 155 | response = urllib2.urlopen(proxy_request,timeout=10) 156 | fanhao_html = response.read() 157 | except Exception: 158 | return zhongziIn_fanhaos 159 | 160 | soup = BeautifulSoup(fanhao_html) 161 | soup_items = soup.find("div",attrs={"class":"wx_list"}).find_all("li") 162 | 163 | if soup_items: 164 | for item in soup_items: 165 | title = item.find("a").get('title') 166 | info = item.find("span",attrs={"class":"j_size"}) 167 | file_size = info.text.split(":")[1] 168 | magnet_url = info.find("a").get('href') 169 | resource = 'zhongzi.in' 170 | resource_url = 'http://www.zhongzi.in' 171 | fanhao = FanHao(title,file_size,None,None,magnet_url,resource,resource_url) 172 | zhongziIn_fanhaos.append(fanhao) 173 | return zhongziIn_fanhaos 174 | 175 | def micili_parse(fanhao,proxy_headers): 176 | global micili_fanhaos 177 | micili_fanhaos = [] 178 | 179 | try: 180 | fanhao_url = 'http://www.micili.com/list/'+urllib.quote(fanhao.decode(sys.stdin.encoding).encode('utf8'))+'/?c=&s=create_time' 181 | proxy_request = urllib2.Request(fanhao_url,headers=proxy_headers) 182 | response = urllib2.urlopen(proxy_request,timeout=10) 183 | fanhao_html = response.read() 184 | except Exception: 185 | return micili_fanhaos 186 | 187 | soup = BeautifulSoup(fanhao_html) 188 | soup_items = soup.find("ul",attrs={"class":"collection z-depth-1"}).find_all("li") 189 | 190 | if soup_items: 191 | for item in soup_items: 192 | title = item.find("h6").find("a").get('title') 193 | info = item.find("span",attrs={"class":"mt10"}) 194 | file_number=int(info.text.split(':')[1].split(u'大小')[0].strip()) 195 | file_size=info.text.split(':')[2].split(u'请求数')[0].strip() 196 | downloading_count=int(info.text.split(u'请求数:')[1].split(u'磁力链接')[0].strip()) 197 | magnet_url = info.find("a").get('href') 198 | resource = 'micili' 199 | resource_url = 'http://www.micili.com' 200 | fanhao = FanHao(title,file_size,downloading_count,file_number,magnet_url,resource,resource_url) 201 | micili_fanhaos.append(fanhao) 202 | return micili_fanhaos 203 | 204 | def btku_parse(fanhao,proxy_headers): 205 | global btku_fanhaos 206 | btku_fanhaos = [] 207 | 208 | try: 209 | fanhao_url = 'http://www.btku.me/q/%s/'%urllib.quote(fanhao.decode(sys.stdin.encoding).encode('utf8')) 210 | proxy_request = urllib2.Request(fanhao_url,headers=proxy_headers) 211 | response = urllib2.urlopen(proxy_request,timeout=10) 212 | fanhao_html = response.read() 213 | except Exception: 214 | return btku_fanhaos 215 | 216 | soup = BeautifulSoup(fanhao_html) 217 | soup_items = soup.find("div",attrs={"id":"search_Results"}).find_all("li",attrs={"class":"results"}) 218 | if soup_items: 219 | for item in soup_items: 220 | title = item.find("h2").find("a").text 221 | info = item.find("p",attrs={"class":"resultsIntroduction"}) 222 | file_number = int(info.find_all("label")[0].string) 223 | file_size = info.find_all("label")[1].string 224 | downloading_count = int(info.find_all("label")[2].string) 225 | magnet_url = info.find("span",attrs={"class":"downLink"}).find_all("a")[1].get('href') 226 | resource = 'BTKU' 227 | resource_url = 'http://www.btku.me' 228 | fanhao = FanHao(title,file_size,downloading_count,file_number,magnet_url,resource,resource_url) 229 | btku_fanhaos.append(fanhao) 230 | return btku_fanhaos 231 | 232 | def Qululu_parse(fanhao,proxy_headers): 233 | global Qululu_fanhaos 234 | Qululu_fanhaos = [] 235 | 236 | try: 237 | fanhao_url = 'http://www.qululu.cn/search1/b/%s/1/hot_d'%fanhao.decode(sys.stdin.encoding).encode('utf8').encode('hex') 238 | proxy_request = urllib2.Request(fanhao_url,headers=proxy_headers) 239 | response = urllib2.urlopen(proxy_request,timeout=10) 240 | fanhao_html = response.read() 241 | except Exception: 242 | return Qululu_fanhaos 243 | 244 | soup = BeautifulSoup(fanhao_html) 245 | soup_items = soup.find("ul",attrs={"class":"mlist"}).find_all("li") 246 | if soup_items: 247 | for item in soup_items: 248 | title = item.find("div",attrs={"class":"T1"}).find("a").string 249 | title = re.sub('','',re.sub('','',title.decode('hex'))) 250 | info = item.find("dl",attrs={"class":"BotInfo"}).find("dt").find_all("span") 251 | file_size = info[0].string.replace(' ','') 252 | file_number = int(info[1].string) 253 | downloading_count = int(info[3].string) 254 | magnet_url = item.find("div",attrs={"class":"dInfo"}).find("a").get('href') 255 | resource = 'Qululu' 256 | resource_url = 'http://www.qululu.cn' 257 | fanhao = FanHao(title,file_size,downloading_count,file_number,magnet_url,resource,resource_url) 258 | Qululu_fanhaos.append(fanhao) 259 | return Qululu_fanhaos 260 | 261 | def nimasou_parse(fanhao,proxy_headers): 262 | global nimasou_fanhaos 263 | nimasou_fanhaos = [] 264 | 265 | try: 266 | fanhao_url = 'http://www.nimasou.com/l/%s-hot-desc-1'%urllib.quote(fanhao.decode(sys.stdin.encoding).encode('utf8')) 267 | proxy_request = urllib2.Request(fanhao_url,headers=proxy_headers) 268 | response = urllib2.urlopen(proxy_request,timeout=10) 269 | fanhao_html = response.read() 270 | except Exception: 271 | return nimasou_fanhaos 272 | 273 | soup = BeautifulSoup(fanhao_html) 274 | try: 275 | soup_items = soup.find("table",attrs={"class":"table"}).find_all("tr") 276 | except Exception: 277 | return nimasou_fanhaos 278 | if soup_items: 279 | for item in soup_items: 280 | title = item.find("td",attrs={"class":"x-item"}).find("a",attrs={"class":"title"}).text 281 | info = item.find("td",attrs={"class":"x-item"}).find("div",attrs={"class":"tail"}).text.split(':') 282 | file_size = info[2].split(' ')[1] + info[2].split(' ')[2] 283 | downloading_count = int(info[3].split(' ')[1]) 284 | magnet_url = item.find("td",attrs={"class":"x-item"}).find("div",attrs={"class":"tail"}).find("a").get('href') 285 | resource = 'NiMaSou' 286 | resource_url = 'http://www.nimasou.com' 287 | fanhao = FanHao(title,file_size,downloading_count,None,magnet_url,resource,resource_url) 288 | nimasou_fanhaos.append(fanhao) 289 | return nimasou_fanhaos 290 | 291 | def print_result(fanhaos): 292 | if fanhaos: 293 | for fanhao in fanhaos: 294 | try: 295 | print u'名称:%s'%fanhao.title 296 | print u'文件大小:%s'%fanhao.file_size 297 | if fanhao.downloading_count: 298 | print u'热度:%d'%fanhao.downloading_count 299 | else: 300 | print u'热度:--' 301 | if fanhao.file_number: 302 | print u'文件数:%s'%str(fanhao.file_number) 303 | else: 304 | print u'文件数:--' 305 | print u'磁力链接:%s'%fanhao.magnet_url 306 | print u'来源:%s'%fanhao.resource 307 | print '-'*40 308 | except Exception: 309 | pass 310 | print u'资源数:%d个'%len(fanhaos) 311 | else: 312 | print u'抱歉未找到相关资源！' 313 | 314 | 315 | def set_headers(): 316 | headers1 = {'User-Agent':'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US; rv:1.9.1.6) Gecko/20091201 Firefox/3.5.6','Accept':'text/html;q=0.9,*/*;q=0.8','Accept-Charset':'ISO-8859-1,utf-8;q=0.7,*;q=0.3'} 317 | headers2 = {'User-Agent':'Mozilla/5.0 (Windows; U; Windows NT 5.1; zh-CN; rv:1.9.1) Gecko/20090624 Firefox/3.5'} 318 | headers3 = {'User-Agent':'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.0.1) Gecko/2008071615 Fedora/3.0.1-1.fc9 Firefox/3.0.1'} 319 | headers4 = {'User-Agent:':'Mozilla/5.0 (X11; Linux i686) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.1916.153 Safari/537.36'} 320 | headers = [headers1,headers2,headers3,headers4] 321 | return random.choice(headers) 322 | 323 | 324 | def create_url(fanhaos): 325 | fanhao_html = open("Index.html","r").read() 326 | soup = BeautifulSoup(fanhao_html) 327 | fanhao_tbody_html = soup.find("tbody") 328 | for index,fanhao in enumerate(fanhaos): 329 | tr_tag = soup.new_tag('tr') 330 | fanhao_tbody_html.insert(0,tr_tag) 331 | 332 | fanhao_tbody_tr = fanhao_tbody_html.find('tr') 333 | th_tag = soup.new_tag('th') 334 | th_tag.string = str(index+1) 335 | fanhao_tbody_tr.insert(0,th_tag) 336 | 337 | title_tag = soup.new_tag('td') 338 | title_tag.string = fanhao.title 339 | fanhao_tbody_tr.insert(1,title_tag) 340 | 341 | file_size_tag = soup.new_tag('td') 342 | file_size_tag.string = fanhao.file_size 343 | fanhao_tbody_tr.insert(2,file_size_tag) 344 | 345 | downloading_count_tag = soup.new_tag('td') 346 | if fanhao.downloading_count is not None: 347 | downloading_count_tag.string = str(fanhao.downloading_count) 348 | else: 349 | downloading_count_tag.string = '--' 350 | fanhao_tbody_tr.insert(3,downloading_count_tag) 351 | 352 | file_number_tag = soup.new_tag('td') 353 | if fanhao.file_number is not None: 354 | file_number_tag.string = str(fanhao.file_number) 355 | else: 356 | file_number_tag.string = '--' 357 | fanhao_tbody_tr.insert(4,file_number_tag) 358 | 359 | magnet_url_tag = soup.new_tag('td') 360 | magnet_url_tag['class'] = 'magnet' 361 | fanhao_tbody_tr.insert(5,magnet_url_tag) 362 | fanhao_magnet_td = fanhao_tbody_tr.find('td',attrs={'class':'magnet'}) 363 | magnet_url_a = soup.new_tag('a',href=fanhao.magnet_url) 364 | magnet_url_a.string = u'点击下载' 365 | magnet_url_a['class'] = 'btn btn-success' 366 | fanhao_magnet_td.insert(0,magnet_url_a) 367 | 368 | resource_tag = soup.new_tag('td') 369 | resource_tag.string = fanhao.resource 370 | fanhao_tbody_tr.insert(6,resource_tag) 371 | 372 | return soup 373 | 374 | def open_browser(soup): 375 | new_html = file("MagnetFinder.html","wb") 376 | new_html.write(str(soup)) 377 | new_html.close() 378 | 379 | html_url = 'file://'+os.getcwd()+'/MagnetFinder.html' 380 | webbrowser.open(html_url,new=2) 381 | 382 | if __name__ == '__main__': 383 | print '*'*40 384 | print '*' 385 | print '* Magnet Finder' 386 | print '*' 387 | print '* V 1.0.1' 388 | print '* Coded by Hanson' 389 | print '* Github https://github.com/HansonHH' 390 | print '*' 391 | print '*'*40 392 | 393 | enable_proxy = False 394 | 395 | # Do you want to configure proxy 396 | proxy_select = raw_input(unicode('是否设置代理?(Y/N):','utf-8').encode(type)) 397 | if proxy_select == 'Y' or proxy_select == 'y': 398 | enable_proxy = True 399 | else: 400 | enable_proxy = False 401 | 402 | if enable_proxy == True: 403 | proxy_list = get_proxy_list() 404 | proxy_configured = False 405 | while not proxy_configured: 406 | current_proxy,proxy_list = proxy_setting(proxy_list) 407 | proxy_configured = proxy_test(proxy_configured) 408 | print 'Current Proxy Address %s'%current_proxy.proxy_address 409 | print 'Current Proxy Location %s'%current_proxy.country 410 | 411 | while True: 412 | # Input title to search 413 | fanhao = raw_input(unicode('请输入想要搜索的番号或标题:','utf-8').encode(type)) 414 | # Counting time start point 415 | start_time = time.time() 416 | 417 | threads = [] 418 | 419 | btdb_thread = threading.Thread(target=btdb_parse,args=(fanhao,set_headers(),)) 420 | threads.append(btdb_thread) 421 | 422 | btbook_thread = threading.Thread(target=btbook_parse,args=(fanhao,set_headers(),)) 423 | threads.append(btbook_thread) 424 | 425 | cili_thread = threading.Thread(target=cili_parse,args=(fanhao,set_headers(),)) 426 | threads.append(cili_thread) 427 | 428 | btcherry_thread = threading.Thread(target=btcherry_parse,args=(fanhao,set_headers(),)) 429 | threads.append(btcherry_thread) 430 | 431 | zhongziIn_thread = threading.Thread(target=zhongziIn_parse,args=(fanhao,set_headers(),)) 432 | threads.append(zhongziIn_thread) 433 | 434 | micili_thread = threading.Thread(target=micili_parse,args=(fanhao,set_headers(),)) 435 | threads.append(micili_thread) 436 | 437 | btku_thread = threading.Thread(target=btku_parse,args=(fanhao,set_headers(),)) 438 | threads.append(btku_thread) 439 | 440 | Qululu_thread = threading.Thread(target=Qululu_parse,args=(fanhao,set_headers(),)) 441 | threads.append(Qululu_thread) 442 | 443 | nimasou_thread = threading.Thread(target=nimasou_parse,args=(fanhao,set_headers(),)) 444 | threads.append(nimasou_thread) 445 | 446 | for t in threads: 447 | t.start() 448 | 449 | for t in threads: 450 | t.join() 451 | 452 | fanhaos=btdb_fanhaos+btbook_fanhaos+cili_fanhaos+btcherry_fanhaos+zhongziIn_fanhaos+micili_fanhaos+btku_fanhaos+Qululu_fanhaos+nimasou_fanhaos 453 | 454 | # Sorting bt by descending 455 | fanhaos.sort(key=lambda fanhao:fanhao.downloading_count) 456 | 457 | print_result(fanhaos) 458 | 459 | # Counting time end point 460 | finish_time = time.time() 461 | elapsed = finish_time - start_time 462 | print u'耗时:%s 秒'%elapsed 463 | 464 | soup = create_url(fanhaos) 465 | open_browser(soup) 466 | -------------------------------------------------------------------------------- /MagnetFinder.rar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hxinhan/MagnetFinder/8896500dc329b0f36821be73e8edc7d5ff698090/MagnetFinder.rar -------------------------------------------------------------------------------- /MagnetFinder.tar.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hxinhan/MagnetFinder/8896500dc329b0f36821be73e8edc7d5ff698090/MagnetFinder.tar.gz -------------------------------------------------------------------------------- /Proxy.py: -------------------------------------------------------------------------------- 1 | import urllib 2 | import urllib2 3 | import re 4 | from Class import ProxyServer 5 | 6 | def proxy_test(proxy_configured): 7 | print 'Proxy Testing...' 8 | test_headers = {'User-Agent:':'Mozilla/5.0 (X11; Linux i686) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.1916.153 Safari/537.36'} 9 | test_url = 'http://www.google.com' 10 | test_request = urllib2.Request(test_url,headers=test_headers) 11 | try: 12 | test_response = urllib2.urlopen(test_request,timeout=10) 13 | if test_response.getcode()==200: 14 | print u'Configured proxy successfully!' 15 | return True 16 | else: 17 | return False 18 | except Exception: 19 | print u'Failed to configure proxy!' 20 | 21 | def find_highest_speed(proxy_list): 22 | temp_proxy = None 23 | highest_speed = 0 24 | for proxy_server in proxy_list: 25 | proxy_server_speed = proxy_server.speed.split('kbit')[0] 26 | print 'proxy_server.proxy_address = %s'%proxy_server.proxy_address 27 | print 'proxy_server.speed = %skbit/s'%proxy_server_speed 28 | if proxy_server_speed !='-': 29 | if float(proxy_server_speed) > highest_speed: 30 | highest_speed = float(proxy_server_speed) 31 | temp_proxy = proxy_server 32 | print '*'*40 33 | print 'Temp Proxy Address %s'%temp_proxy.proxy_address 34 | print 'Temp Proxy Speed %s/s'%temp_proxy.speed 35 | print '*'*40 36 | proxy_list.remove(temp_proxy) 37 | return temp_proxy,proxy_list 38 | 39 | def get_proxy_list(): 40 | proxy_headers = {'User-Agent:':'Mozilla/5.0 (X11; Linux i686) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.1916.153 Safari/537.36'} 41 | proxy_url = 'http://proxy-list.org/english/index.php' 42 | proxy_request = urllib2.Request(proxy_url,headers=proxy_headers) 43 | 44 | try: 45 | response = urllib2.urlopen(proxy_request,timeout=10) 46 | html = response.read() 47 | except urllib2.HTTPError,e: 48 | print e.code 49 | 50 | name_ul = re.compile("(?isu)

(.*?)") 51 | name_li = re.compile("(?isu)]*>(.*?)") 52 | 53 | proxy_list_txt = open('proxy_list.txt','w') 54 | proxy_list=[] 55 | 56 | for row in name_ul.findall(html): 57 | proxy_address = ''.join(name_li.findall(row)[0:1]) 58 | proxy_http = ''.join(name_li.findall(row)[1:2]) 59 | speed = ''.join(name_li.findall(row)[2:3]) 60 | proxy_type = ''.join(name_li.findall(row)[3:4]) 61 | name_country = re.compile('title="(.*?)"') 62 | country_name=None 63 | for country in name_li.findall(row)[4:5]: 64 | country_name = ''.join(name_country.findall(country)) 65 | if ' ' in country_name: 66 | country_name=country_name.split(' ')[0]+' '+country_name.split(' ')[1] 67 | 68 | proxy_server = ProxyServer(proxy_address,proxy_http,speed,proxy_type,country_name) 69 | proxy_list.append(proxy_server) 70 | proxy_list_txt.write(proxy_server.proxy_address+'\n') 71 | 72 | proxy_list_txt.close() 73 | 74 | return proxy_list 75 | 76 | def proxy_setting(proxy_list): 77 | try: 78 | random_proxy,new_proxy_list = find_highest_speed(proxy_list) 79 | except Exception: 80 | print 'Failed to Configure Proxy!' 81 | 82 | proxy_handler = urllib2.ProxyHandler({'http':'http://%s'%random_proxy.proxy_address}) 83 | opener = urllib2.build_opener(proxy_handler) 84 | urllib2.install_opener(opener) 85 | print 'Proxy Configuring...' 86 | return random_proxy,new_proxy_list 87 | 88 | 89 | -------------------------------------------------------------------------------- /Proxy.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hxinhan/MagnetFinder/8896500dc329b0f36821be73e8edc7d5ff698090/Proxy.pyc -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # MagnetFinder 2 | A real-time crawler for searching P2P magnet url. 3 | 4 | ### 欢迎来到MagnetFinder 5 | MagnetFinder是一个无广告的、短小精悍的爬虫程序，用于从互联网上实时地爬取磁力链接。如果您厌倦了BT网站上成堆的广告，MagnetFinder将会给您带来简洁清爽的体验。 6 | 7 | * 版本 1.0.1 8 | * coded by HansonHH 9 | * 下载[Windows](https://github.com/HansonHH/MagnetFinder/blob/master/MagnetFinder.rar?raw=true)版本. 10 | * 下载[Linux/Mac OS X](https://github.com/HansonHH/MagnetFinder/blob/master/MagnetFinder.tar.gz?raw=true)版本. 11 | 12 | #### 对于Linux/OS X用户: 13 | 14 | * 在使用MagnetFinder前，请先安装BeacutifulSoup4 :) 15 | * 使用例子: python MagnetFinder.py 16 | 17 | #### 对于Windows用户: 18 | 19 | * 直接执行MagnetFinder.exe程序 20 | 21 | ### Welcome to MagnetFinder 22 | MagnetFinder is a real-time, teeny and non-ads crawler for searching magnet urls from a dozen of BT websites. If you are tired of BT websites with plenty of ads, MagnetFinder is the one offers you a concise experience. Give a try and enjoy it :) 23 | 24 | * version 1.0.1 25 | * coded by HansonHH 26 | * [HomePage](http://hansonhh.github.io/MagnetFinder/) 27 | * Download [Windows](https://github.com/HansonHH/MagnetFinder/blob/master/MagnetFinder.rar?raw=true) version. 28 | * Download [Linux/Mac OS X](https://github.com/HansonHH/MagnetFinder/blob/master/MagnetFinder.tar.gz?raw=true) version. 29 | 30 | #### For Linux/OS X users: 31 | 32 | * Install BeacutifulSoup4 before use MagnetFinder :) 33 | * Example: python MagnetFinder.py 34 | 35 | #### For Windows users: 36 | 37 | * Execute MagnetFinder.exe 38 | 39 | 40 | -------------------------------------------------------------------------------- /proxy_list.txt: -------------------------------------------------------------------------------- 1 | 202.182.185.125:8080 2 | 91.217.42.3:8080 3 | 63.149.202.234:8080 4 | 154.72.197.46:3128 5 | 218.76.84.167:3128 6 | 216.162.88.234:8080 7 | 125.217.199.148:80 8 | 182.93.226.14:8080 9 | 190.248.153.162:8080 10 | 182.92.153.89:3128 11 | 186.24.48.242:3128 12 | 104.145.233.117:7808 13 | 82.139.114.74:80 14 | 177.103.145.240:3128 15 | --------------------------------------------------------------------------------