├── Class.py
├── Class.pyc
├── Index.html
├── MagnetFinder.html
├── MagnetFinder.py
├── MagnetFinder.rar
├── MagnetFinder.tar.gz
├── Proxy.py
├── Proxy.pyc
├── README.md
└── proxy_list.txt
/Class.py:
--------------------------------------------------------------------------------
1 | #-*- coding:utf-8 -*
2 |
3 | class ProxyServer:
4 | __slots__ = ('proxy_address','proxy_http','speed','proxy_type','country')
5 |
6 | def __init__(self,proxy_address,proxy_http,speed,proxy_type,country):
7 | self.proxy_address=proxy_address
8 | self.proxy_http=proxy_http
9 | self.speed=speed
10 | self.proxy_type=proxy_type
11 | self.country=country
12 |
13 | def __getattr__(self,attr):
14 | raise AttributeError('ProxyServer object has no attribute %s'%attr)
15 |
16 | def __call__():
17 | print 'ProxyServer object called'
18 |
19 | class FanHao:
20 | __slots__ = ('title','file_size','downloading_count','file_number','magnet_url','resource','resource_url')
21 |
22 | def __init__(self,title,file_size,downloading_count,file_number,magnet_url,resource,resource_url):
23 | self.title = title
24 | self.file_size = file_size
25 | self.downloading_count = downloading_count
26 | self.file_number = file_number
27 | self.magnet_url = magnet_url
28 | self.resource = resource
29 | self.resource_url = resource_url
30 |
31 | def __getattr__(self,attr):
32 | raise AttributeError('Fanhao object has no attribute %s'%attr)
33 |
34 | def __call__():
35 | print 'Fanhao object called'
36 |
--------------------------------------------------------------------------------
/Class.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hxinhan/MagnetFinder/8896500dc329b0f36821be73e8edc7d5ff698090/Class.pyc
--------------------------------------------------------------------------------
/Index.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
19 |
A real-time crawler for searching P2P magnet url. Enjoy it :)
20 |
Version 1.0.1
21 |
Coded by HansonHH
22 |
Github:https://github.com/HansonHH
23 |
24 |
25 |
26 |
27 | # |
28 | 名称 |
29 | 大小 |
30 | 热度 |
31 | 文件数 |
32 | 磁力链接 |
33 | 来源 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
47 |
48 |
49 |
50 |
--------------------------------------------------------------------------------
/MagnetFinder.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
16 |
A real-time crawler for searching P2P magnet url. Enjoy it :)
17 |
Version 1.0.1
18 |
Coded by HansonHH
19 |
Github:https://github.com/HansonHH
20 |
21 |
22 |
23 | # |
24 | 名称 |
25 | 大小 |
26 | 热度 |
27 | 文件数 |
28 | 磁力链接 |
29 | 来源 |
30 |
31 |
32 | 36 | SNIS344AVI | 1.16 GB | 397 | 53 | 点击下载 | BTDB |
---|
35 | 0213snis344 | 980.56 MB | 221 | 42 | 点击下载 | BTDB |
---|
34 | SNIS-344 | 1.37 GB | 56 | 3 | 点击下载 | BTKU |
---|
33 | SNIS-344.1080p.mkv | 3.25 GB | 44 | 1 | 点击下载 | BTKU |
---|
32 | SNIS-344 | 1.72 GB | 38 | 15 | 点击下载 | BTKU |
---|
31 | 第一會所新片@SIS001@(S1)(SNIS-344)桜井彩がイクときの絶叫 | 3.25 GB | 25 | 9 | 点击下载 | BTKU |
---|
30 | SNIS-344.mkv | 3.25 GB | 22 | 1 | 点击下载 | BTKU |
---|
29 | 【ses23.com】SNIS-344.1080p | 3.25 GB | 17 | 5 | 点击下载 | BTKU |
---|
28 | SNIS-344 | 3.25 GB | 11 | 3 | 点击下载 | BTKU |
---|
27 | 033_3xplanet_SNIS-344.mp4 | 1.33 GB | 10 | 1 | 点击下载 | BTKU |
---|
26 | SNIS-344.1080p | 3.26 GB | 9 | 3 | 点击下载 | BTKU |
---|
25 | 【贴心话】SNIS-344 yingjingca | 804.87 MB | 5 | 15 | 点击下载 | BTKU |
---|
24 | snis-344.mp4 | 432.32 MB | 5 | 1 | 点击下载 | BTKU |
---|
23 | snis344 | 976.34 MB | 5 | 4 | 点击下载 | BTDB |
---|
22 | snis344 | 976.3 MB | 1 | 1 | 点击下载 | micili |
---|
21 | snis344 | 976MB | 0 | 4 | 点击下载 | Qululu |
---|
20 | Switch [SW-344] | 1266 MB | -- | -- | 点击下载 | zhongzi.in |
---|
19 | 344.mnmnicolejohnny | 630 MB | -- | -- | 点击下载 | zhongzi.in |
---|
18 | RHJ-344 レッドホットジャム Vol.344 Model Collection 瑠菜, 早川メアリー [UNCENSORED].mp4 | 1492 MB | -- | -- | 点击下载 | zhongzi.in |
---|
17 | RHJ-344 – Red Hot Jam Vol.344 – Runa, Meari Hayakawa (瑠菜, 早川メアリー).mp4 | 1373 MB | -- | -- | 点击下载 | zhongzi.in |
---|
16 | [RHJ-344] Red Hot Jam Vol.344 Model Collection Runa, Meari Hayakawa | 1201 MB | -- | -- | 点击下载 | zhongzi.in |
---|
15 | 第一會所新片@SIS001@(S1)(SNIS-344)桜井彩がイクときの絶叫 | 2048 MB | -- | -- | 点击下载 | zhongzi.in |
---|
14 | 033_3xplanet_SNIS-344.mp4 | 1364 MB | -- | -- | 点击下载 | zhongzi.in |
---|
13 | 【ses23.com】SNIS-344.1080p | 2048 MB | -- | -- | 点击下载 | zhongzi.in |
---|
12 | SNIS-344.1080p.mkv | 2047 MB | -- | -- | 点击下载 | zhongzi.in |
---|
11 | SNIS-344 | 975 MB | -- | -- | 点击下载 | zhongzi.in |
---|
10 | SNIS-344@unz | 1406 MB | -- | -- | 点击下载 | zhongzi.in |
---|
9 | SNIS-344-MP4 | 999 MB | -- | -- | 点击下载 | zhongzi.in |
---|
8 | SNIS-344 | 1762 MB | -- | -- | 点击下载 | zhongzi.in |
---|
7 | snis344 | 976 MB | -- | -- | 点击下载 | zhongzi.in |
---|
6 | 0213snis344 | 980 MB | -- | -- | 点击下载 | zhongzi.in |
---|
5 | SNIS-344.mkv | 2047 MB | -- | -- | 点击下载 | zhongzi.in |
---|
4 | 【贴心话】SNIS-344 yingjingca | 804 MB | -- | -- | 点击下载 | zhongzi.in |
---|
3 | SNIS-344 | 2048 MB | -- | -- | 点击下载 | zhongzi.in |
---|
2 | SNIS-344 | 1406 MB | -- | -- | 点击下载 | zhongzi.in |
---|
1 | SNIS-344.1080p | 2048 MB | -- | -- | 点击下载 | zhongzi.in |
33 |
34 |
35 |
36 |
41 |
42 |
43 |
--------------------------------------------------------------------------------
/MagnetFinder.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/python
2 | #coding=utf-8
3 |
4 | __AUTOR__= "HansonHH"
5 | __DATA__= "13/08/15"
6 | __VERSAO__= "1.0.1"
7 | __GITHUB__= "https://github.com/HansonHH"
8 |
9 | """
10 | Copyright (C) 2015 Xin Han
11 | This program is free software; you can redistribute it and/or modify
12 | it under the terms of the GNU General Public License as published by
13 | the Free Software Foundation; either version 2 of the License, or
14 | (at your option) any later version.
15 | This program is distributed in the hope that it will be useful,
16 | but WITHOUT ANY WARRANTY; without even the implied warranty of
17 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.See the
18 | GNU General Public License for more details.
19 | """
20 | import sys
21 | import urllib
22 | import urllib2
23 | import re
24 | import random
25 | import threading
26 | import time
27 | import os
28 | import webbrowser
29 | from Proxy import get_proxy_list
30 | from Proxy import proxy_setting
31 | from Proxy import proxy_test
32 | from bs4 import BeautifulSoup
33 | from Class import FanHao
34 |
35 | type = sys.getfilesystemencoding()
36 |
37 | def cili_parse(fanhao,proxy_headers):
38 | global cili_fanhaos
39 | cili_fanhaos = []
40 | try:
41 | fanhao_url = 'http://www.cili.tv/search/%s_ctime_1.html'%urllib.quote(fanhao.decode(sys.stdin.encoding).encode('utf8'))
42 | proxy_request = urllib2.Request(fanhao_url,headers=proxy_headers)
43 | response = urllib2.urlopen(proxy_request,timeout=5)
44 | fanhao_html = response.read()
45 | except Exception:
46 | return cili_fanhaos
47 |
48 | soup = BeautifulSoup(fanhao_html)
49 | soup_items = soup.find_all("div",attrs={"class":"item"})
50 | if soup_items:
51 | for item in soup_items:
52 | title = item.a.text.strip()
53 | info = item.find("div",attrs={"class":"info"})
54 | spans = info.find_all("span")
55 | file_size = str(spans[1].b.text)
56 | downloading_count = int(str(spans[2].b.string))
57 | magnet_url = str(spans[3].find("a").get('href'))
58 | resource = 'Cili'
59 | resource_url = 'http://www.cili.tv'
60 | fanhao = FanHao(title,file_size,downloading_count,None,magnet_url,resource,resource_url)
61 | cili_fanhaos.append(fanhao)
62 | return cili_fanhaos
63 |
64 | def btdb_parse(fanhao,proxy_headers):
65 | global btdb_fanhaos
66 | btdb_fanhaos = []
67 | try:
68 | fanhao_url = 'http://btdb.in/q/%s/'%urllib.quote(fanhao.decode(sys.stdin.encoding).encode('utf8'))
69 | proxy_request = urllib2.Request(fanhao_url,headers=proxy_headers)
70 | response = urllib2.urlopen(proxy_request,timeout=10)
71 | fanhao_html = response.read()
72 | except Exception:
73 | return btdb_fanhaos
74 |
75 | soup = BeautifulSoup(fanhao_html)
76 | soup_items = soup.find_all("li",attrs={"class":"search-ret-item"})
77 | if soup_items:
78 | for item in soup_items:
79 | title = item.find("h1").find("a").get("title")
80 | info = item.find("div",attrs={"class":"item-meta-info"}).find_all("span",attrs={"class":"item-meta-info-value"})
81 | file_size = info[0].text
82 | downloading_count = int(info[-1].text)
83 | file_number = int(info[1].text)
84 | magnet_url = item.find("div",attrs={"class":"item-meta-info"}).find("a",attrs={"class":"magnet"}).get("href")
85 | resource = 'BTDB'
86 | resource_url = 'http://btdb.in'
87 | fanhao = FanHao(title,file_size,downloading_count,file_number,magnet_url,resource,resource_url)
88 | btdb_fanhaos.append(fanhao)
89 | return btdb_fanhaos
90 |
91 | def btbook_parse(fanhao,proxy_headers):
92 | global btbook_fanhaos
93 | btbook_fanhaos = []
94 | try:
95 | fanhao_url = 'http://www.btbook.net/search/'+urllib.quote(fanhao.decode(sys.stdin.encoding).encode('utf8'))+'.html'
96 | proxy_request = urllib2.Request(fanhao_url,headers=proxy_headers)
97 | response = urllib2.urlopen(proxy_request,timeout=10)
98 | fanhao_html = response.read()
99 | except Exception:
100 | return btbook_fanhaos
101 |
102 | soup = BeautifulSoup(fanhao_html)
103 | soup_items = soup.find_all("div",attrs={"class":"search-item"})
104 | if soup_items:
105 | for item in soup_items:
106 | title = item.find("h3").find("a").find("b").text
107 | info = item.find("div",attrs={"class":"item-bar"}).find_all("span")
108 | file_size = info[2].b.text
109 | downloading_count = int(info[3].b.text)
110 | magnet_url = item.find("div",attrs={"class":"item-bar"}).find("a").get("href")
111 | resource = 'Btbook'
112 | resource_url = 'http://www.btbook.net'
113 | fanhao = FanHao(title,file_size,downloading_count,None,magnet_url,resource,resource_url)
114 | btbook_fanhaos.append(fanhao)
115 | return btbook_fanhaos
116 |
117 | def btcherry_parse(fanhao,proxy_headers):
118 | global btcherry_fanhaos
119 | btcherry_fanhaos = []
120 |
121 | try:
122 | fanhao_url = 'http://www.btcherry.net/search?keyword='+urllib.quote(fanhao.decode(sys.stdin.encoding).encode('utf8'))
123 | proxy_request = urllib2.Request(fanhao_url,headers=proxy_headers)
124 | response = urllib2.urlopen(proxy_request,timeout=10)
125 | fanhao_html = response.read()
126 | except Exception:
127 | return btcherry_fanhaos
128 |
129 | soup = BeautifulSoup(fanhao_html)
130 | soup_items = soup.find_all("div",attrs={"class":"r"})
131 | if soup_items:
132 | for item in soup_items:
133 | try:
134 | title = item.find("h5",attrs={"class":"h"}).text
135 | info = item.find("div").find_all("span")
136 | file_size = info[2].find("span",attrs={"class":"prop_val"}).text
137 | file_number = int(info[4].find("span",attrs={"class":"prop_val"}).text)
138 | magnet_url = item.find("div").find("a").get("href")
139 | except Exception:
140 | pass
141 |
142 | resource = 'BTCherry'
143 | resource_url = 'http://www.btcherry.net'
144 | fanhao = FanHao(title,file_size,None,file_number,magnet_url,resource,resource_url)
145 | btcherry_fanhaos.append(fanhao)
146 | return btcherry_fanhaos
147 |
148 | def zhongziIn_parse(fanhao,proxy_headers):
149 | global zhongziIn_fanhaos
150 | zhongziIn_fanhaos = []
151 |
152 | try:
153 | fanhao_url = 'http://www.zhongzi.in/s/'+urllib.quote(fanhao.decode(sys.stdin.encoding).encode('utf8'))
154 | proxy_request = urllib2.Request(fanhao_url,headers=proxy_headers)
155 | response = urllib2.urlopen(proxy_request,timeout=10)
156 | fanhao_html = response.read()
157 | except Exception:
158 | return zhongziIn_fanhaos
159 |
160 | soup = BeautifulSoup(fanhao_html)
161 | soup_items = soup.find("div",attrs={"class":"wx_list"}).find_all("li")
162 |
163 | if soup_items:
164 | for item in soup_items:
165 | title = item.find("a").get('title')
166 | info = item.find("span",attrs={"class":"j_size"})
167 | file_size = info.text.split(":")[1]
168 | magnet_url = info.find("a").get('href')
169 | resource = 'zhongzi.in'
170 | resource_url = 'http://www.zhongzi.in'
171 | fanhao = FanHao(title,file_size,None,None,magnet_url,resource,resource_url)
172 | zhongziIn_fanhaos.append(fanhao)
173 | return zhongziIn_fanhaos
174 |
175 | def micili_parse(fanhao,proxy_headers):
176 | global micili_fanhaos
177 | micili_fanhaos = []
178 |
179 | try:
180 | fanhao_url = 'http://www.micili.com/list/'+urllib.quote(fanhao.decode(sys.stdin.encoding).encode('utf8'))+'/?c=&s=create_time'
181 | proxy_request = urllib2.Request(fanhao_url,headers=proxy_headers)
182 | response = urllib2.urlopen(proxy_request,timeout=10)
183 | fanhao_html = response.read()
184 | except Exception:
185 | return micili_fanhaos
186 |
187 | soup = BeautifulSoup(fanhao_html)
188 | soup_items = soup.find("ul",attrs={"class":"collection z-depth-1"}).find_all("li")
189 |
190 | if soup_items:
191 | for item in soup_items:
192 | title = item.find("h6").find("a").get('title')
193 | info = item.find("span",attrs={"class":"mt10"})
194 | file_number=int(info.text.split(':')[1].split(u'大小')[0].strip())
195 | file_size=info.text.split(':')[2].split(u'请求数')[0].strip()
196 | downloading_count=int(info.text.split(u'请求数:')[1].split(u'磁力链接')[0].strip())
197 | magnet_url = info.find("a").get('href')
198 | resource = 'micili'
199 | resource_url = 'http://www.micili.com'
200 | fanhao = FanHao(title,file_size,downloading_count,file_number,magnet_url,resource,resource_url)
201 | micili_fanhaos.append(fanhao)
202 | return micili_fanhaos
203 |
204 | def btku_parse(fanhao,proxy_headers):
205 | global btku_fanhaos
206 | btku_fanhaos = []
207 |
208 | try:
209 | fanhao_url = 'http://www.btku.me/q/%s/'%urllib.quote(fanhao.decode(sys.stdin.encoding).encode('utf8'))
210 | proxy_request = urllib2.Request(fanhao_url,headers=proxy_headers)
211 | response = urllib2.urlopen(proxy_request,timeout=10)
212 | fanhao_html = response.read()
213 | except Exception:
214 | return btku_fanhaos
215 |
216 | soup = BeautifulSoup(fanhao_html)
217 | soup_items = soup.find("div",attrs={"id":"search_Results"}).find_all("li",attrs={"class":"results"})
218 | if soup_items:
219 | for item in soup_items:
220 | title = item.find("h2").find("a").text
221 | info = item.find("p",attrs={"class":"resultsIntroduction"})
222 | file_number = int(info.find_all("label")[0].string)
223 | file_size = info.find_all("label")[1].string
224 | downloading_count = int(info.find_all("label")[2].string)
225 | magnet_url = info.find("span",attrs={"class":"downLink"}).find_all("a")[1].get('href')
226 | resource = 'BTKU'
227 | resource_url = 'http://www.btku.me'
228 | fanhao = FanHao(title,file_size,downloading_count,file_number,magnet_url,resource,resource_url)
229 | btku_fanhaos.append(fanhao)
230 | return btku_fanhaos
231 |
232 | def Qululu_parse(fanhao,proxy_headers):
233 | global Qululu_fanhaos
234 | Qululu_fanhaos = []
235 |
236 | try:
237 | fanhao_url = 'http://www.qululu.cn/search1/b/%s/1/hot_d'%fanhao.decode(sys.stdin.encoding).encode('utf8').encode('hex')
238 | proxy_request = urllib2.Request(fanhao_url,headers=proxy_headers)
239 | response = urllib2.urlopen(proxy_request,timeout=10)
240 | fanhao_html = response.read()
241 | except Exception:
242 | return Qululu_fanhaos
243 |
244 | soup = BeautifulSoup(fanhao_html)
245 | soup_items = soup.find("ul",attrs={"class":"mlist"}).find_all("li")
246 | if soup_items:
247 | for item in soup_items:
248 | title = item.find("div",attrs={"class":"T1"}).find("a").string
249 | title = re.sub('','',re.sub('','',title.decode('hex')))
250 | info = item.find("dl",attrs={"class":"BotInfo"}).find("dt").find_all("span")
251 | file_size = info[0].string.replace(' ','')
252 | file_number = int(info[1].string)
253 | downloading_count = int(info[3].string)
254 | magnet_url = item.find("div",attrs={"class":"dInfo"}).find("a").get('href')
255 | resource = 'Qululu'
256 | resource_url = 'http://www.qululu.cn'
257 | fanhao = FanHao(title,file_size,downloading_count,file_number,magnet_url,resource,resource_url)
258 | Qululu_fanhaos.append(fanhao)
259 | return Qululu_fanhaos
260 |
261 | def nimasou_parse(fanhao,proxy_headers):
262 | global nimasou_fanhaos
263 | nimasou_fanhaos = []
264 |
265 | try:
266 | fanhao_url = 'http://www.nimasou.com/l/%s-hot-desc-1'%urllib.quote(fanhao.decode(sys.stdin.encoding).encode('utf8'))
267 | proxy_request = urllib2.Request(fanhao_url,headers=proxy_headers)
268 | response = urllib2.urlopen(proxy_request,timeout=10)
269 | fanhao_html = response.read()
270 | except Exception:
271 | return nimasou_fanhaos
272 |
273 | soup = BeautifulSoup(fanhao_html)
274 | try:
275 | soup_items = soup.find("table",attrs={"class":"table"}).find_all("tr")
276 | except Exception:
277 | return nimasou_fanhaos
278 | if soup_items:
279 | for item in soup_items:
280 | title = item.find("td",attrs={"class":"x-item"}).find("a",attrs={"class":"title"}).text
281 | info = item.find("td",attrs={"class":"x-item"}).find("div",attrs={"class":"tail"}).text.split(':')
282 | file_size = info[2].split(' ')[1] + info[2].split(' ')[2]
283 | downloading_count = int(info[3].split(' ')[1])
284 | magnet_url = item.find("td",attrs={"class":"x-item"}).find("div",attrs={"class":"tail"}).find("a").get('href')
285 | resource = 'NiMaSou'
286 | resource_url = 'http://www.nimasou.com'
287 | fanhao = FanHao(title,file_size,downloading_count,None,magnet_url,resource,resource_url)
288 | nimasou_fanhaos.append(fanhao)
289 | return nimasou_fanhaos
290 |
291 | def print_result(fanhaos):
292 | if fanhaos:
293 | for fanhao in fanhaos:
294 | try:
295 | print u'名称:%s'%fanhao.title
296 | print u'文件大小:%s'%fanhao.file_size
297 | if fanhao.downloading_count:
298 | print u'热度:%d'%fanhao.downloading_count
299 | else:
300 | print u'热度:--'
301 | if fanhao.file_number:
302 | print u'文件数:%s'%str(fanhao.file_number)
303 | else:
304 | print u'文件数:--'
305 | print u'磁力链接:%s'%fanhao.magnet_url
306 | print u'来源:%s'%fanhao.resource
307 | print '-'*40
308 | except Exception:
309 | pass
310 | print u'资源数:%d个'%len(fanhaos)
311 | else:
312 | print u'抱歉未找到相关资源!'
313 |
314 |
315 | def set_headers():
316 | headers1 = {'User-Agent':'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US; rv:1.9.1.6) Gecko/20091201 Firefox/3.5.6','Accept':'text/html;q=0.9,*/*;q=0.8','Accept-Charset':'ISO-8859-1,utf-8;q=0.7,*;q=0.3'}
317 | headers2 = {'User-Agent':'Mozilla/5.0 (Windows; U; Windows NT 5.1; zh-CN; rv:1.9.1) Gecko/20090624 Firefox/3.5'}
318 | headers3 = {'User-Agent':'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.0.1) Gecko/2008071615 Fedora/3.0.1-1.fc9 Firefox/3.0.1'}
319 | headers4 = {'User-Agent:':'Mozilla/5.0 (X11; Linux i686) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.1916.153 Safari/537.36'}
320 | headers = [headers1,headers2,headers3,headers4]
321 | return random.choice(headers)
322 |
323 |
324 | def create_url(fanhaos):
325 | fanhao_html = open("Index.html","r").read()
326 | soup = BeautifulSoup(fanhao_html)
327 | fanhao_tbody_html = soup.find("tbody")
328 | for index,fanhao in enumerate(fanhaos):
329 | tr_tag = soup.new_tag('tr')
330 | fanhao_tbody_html.insert(0,tr_tag)
331 |
332 | fanhao_tbody_tr = fanhao_tbody_html.find('tr')
333 | th_tag = soup.new_tag('th')
334 | th_tag.string = str(index+1)
335 | fanhao_tbody_tr.insert(0,th_tag)
336 |
337 | title_tag = soup.new_tag('td')
338 | title_tag.string = fanhao.title
339 | fanhao_tbody_tr.insert(1,title_tag)
340 |
341 | file_size_tag = soup.new_tag('td')
342 | file_size_tag.string = fanhao.file_size
343 | fanhao_tbody_tr.insert(2,file_size_tag)
344 |
345 | downloading_count_tag = soup.new_tag('td')
346 | if fanhao.downloading_count is not None:
347 | downloading_count_tag.string = str(fanhao.downloading_count)
348 | else:
349 | downloading_count_tag.string = '--'
350 | fanhao_tbody_tr.insert(3,downloading_count_tag)
351 |
352 | file_number_tag = soup.new_tag('td')
353 | if fanhao.file_number is not None:
354 | file_number_tag.string = str(fanhao.file_number)
355 | else:
356 | file_number_tag.string = '--'
357 | fanhao_tbody_tr.insert(4,file_number_tag)
358 |
359 | magnet_url_tag = soup.new_tag('td')
360 | magnet_url_tag['class'] = 'magnet'
361 | fanhao_tbody_tr.insert(5,magnet_url_tag)
362 | fanhao_magnet_td = fanhao_tbody_tr.find('td',attrs={'class':'magnet'})
363 | magnet_url_a = soup.new_tag('a',href=fanhao.magnet_url)
364 | magnet_url_a.string = u'点击下载'
365 | magnet_url_a['class'] = 'btn btn-success'
366 | fanhao_magnet_td.insert(0,magnet_url_a)
367 |
368 | resource_tag = soup.new_tag('td')
369 | resource_tag.string = fanhao.resource
370 | fanhao_tbody_tr.insert(6,resource_tag)
371 |
372 | return soup
373 |
374 | def open_browser(soup):
375 | new_html = file("MagnetFinder.html","wb")
376 | new_html.write(str(soup))
377 | new_html.close()
378 |
379 | html_url = 'file://'+os.getcwd()+'/MagnetFinder.html'
380 | webbrowser.open(html_url,new=2)
381 |
382 | if __name__ == '__main__':
383 | print '*'*40
384 | print '*'
385 | print '* Magnet Finder'
386 | print '*'
387 | print '* V 1.0.1'
388 | print '* Coded by Hanson'
389 | print '* Github https://github.com/HansonHH'
390 | print '*'
391 | print '*'*40
392 |
393 | enable_proxy = False
394 |
395 | # Do you want to configure proxy
396 | proxy_select = raw_input(unicode('是否设置代理?(Y/N):','utf-8').encode(type))
397 | if proxy_select == 'Y' or proxy_select == 'y':
398 | enable_proxy = True
399 | else:
400 | enable_proxy = False
401 |
402 | if enable_proxy == True:
403 | proxy_list = get_proxy_list()
404 | proxy_configured = False
405 | while not proxy_configured:
406 | current_proxy,proxy_list = proxy_setting(proxy_list)
407 | proxy_configured = proxy_test(proxy_configured)
408 | print 'Current Proxy Address %s'%current_proxy.proxy_address
409 | print 'Current Proxy Location %s'%current_proxy.country
410 |
411 | while True:
412 | # Input title to search
413 | fanhao = raw_input(unicode('请输入想要搜索的番号或标题:','utf-8').encode(type))
414 | # Counting time start point
415 | start_time = time.time()
416 |
417 | threads = []
418 |
419 | btdb_thread = threading.Thread(target=btdb_parse,args=(fanhao,set_headers(),))
420 | threads.append(btdb_thread)
421 |
422 | btbook_thread = threading.Thread(target=btbook_parse,args=(fanhao,set_headers(),))
423 | threads.append(btbook_thread)
424 |
425 | cili_thread = threading.Thread(target=cili_parse,args=(fanhao,set_headers(),))
426 | threads.append(cili_thread)
427 |
428 | btcherry_thread = threading.Thread(target=btcherry_parse,args=(fanhao,set_headers(),))
429 | threads.append(btcherry_thread)
430 |
431 | zhongziIn_thread = threading.Thread(target=zhongziIn_parse,args=(fanhao,set_headers(),))
432 | threads.append(zhongziIn_thread)
433 |
434 | micili_thread = threading.Thread(target=micili_parse,args=(fanhao,set_headers(),))
435 | threads.append(micili_thread)
436 |
437 | btku_thread = threading.Thread(target=btku_parse,args=(fanhao,set_headers(),))
438 | threads.append(btku_thread)
439 |
440 | Qululu_thread = threading.Thread(target=Qululu_parse,args=(fanhao,set_headers(),))
441 | threads.append(Qululu_thread)
442 |
443 | nimasou_thread = threading.Thread(target=nimasou_parse,args=(fanhao,set_headers(),))
444 | threads.append(nimasou_thread)
445 |
446 | for t in threads:
447 | t.start()
448 |
449 | for t in threads:
450 | t.join()
451 |
452 | fanhaos=btdb_fanhaos+btbook_fanhaos+cili_fanhaos+btcherry_fanhaos+zhongziIn_fanhaos+micili_fanhaos+btku_fanhaos+Qululu_fanhaos+nimasou_fanhaos
453 |
454 | # Sorting bt by descending
455 | fanhaos.sort(key=lambda fanhao:fanhao.downloading_count)
456 |
457 | print_result(fanhaos)
458 |
459 | # Counting time end point
460 | finish_time = time.time()
461 | elapsed = finish_time - start_time
462 | print u'耗时:%s 秒'%elapsed
463 |
464 | soup = create_url(fanhaos)
465 | open_browser(soup)
466 |
--------------------------------------------------------------------------------
/MagnetFinder.rar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hxinhan/MagnetFinder/8896500dc329b0f36821be73e8edc7d5ff698090/MagnetFinder.rar
--------------------------------------------------------------------------------
/MagnetFinder.tar.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hxinhan/MagnetFinder/8896500dc329b0f36821be73e8edc7d5ff698090/MagnetFinder.tar.gz
--------------------------------------------------------------------------------
/Proxy.py:
--------------------------------------------------------------------------------
1 | import urllib
2 | import urllib2
3 | import re
4 | from Class import ProxyServer
5 |
6 | def proxy_test(proxy_configured):
7 | print 'Proxy Testing...'
8 | test_headers = {'User-Agent:':'Mozilla/5.0 (X11; Linux i686) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.1916.153 Safari/537.36'}
9 | test_url = 'http://www.google.com'
10 | test_request = urllib2.Request(test_url,headers=test_headers)
11 | try:
12 | test_response = urllib2.urlopen(test_request,timeout=10)
13 | if test_response.getcode()==200:
14 | print u'Configured proxy successfully!'
15 | return True
16 | else:
17 | return False
18 | except Exception:
19 | print u'Failed to configure proxy!'
20 |
21 | def find_highest_speed(proxy_list):
22 | temp_proxy = None
23 | highest_speed = 0
24 | for proxy_server in proxy_list:
25 | proxy_server_speed = proxy_server.speed.split('kbit')[0]
26 | print 'proxy_server.proxy_address = %s'%proxy_server.proxy_address
27 | print 'proxy_server.speed = %skbit/s'%proxy_server_speed
28 | if proxy_server_speed !='-':
29 | if float(proxy_server_speed) > highest_speed:
30 | highest_speed = float(proxy_server_speed)
31 | temp_proxy = proxy_server
32 | print '*'*40
33 | print 'Temp Proxy Address %s'%temp_proxy.proxy_address
34 | print 'Temp Proxy Speed %s/s'%temp_proxy.speed
35 | print '*'*40
36 | proxy_list.remove(temp_proxy)
37 | return temp_proxy,proxy_list
38 |
39 | def get_proxy_list():
40 | proxy_headers = {'User-Agent:':'Mozilla/5.0 (X11; Linux i686) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.1916.153 Safari/537.36'}
41 | proxy_url = 'http://proxy-list.org/english/index.php'
42 | proxy_request = urllib2.Request(proxy_url,headers=proxy_headers)
43 |
44 | try:
45 | response = urllib2.urlopen(proxy_request,timeout=10)
46 | html = response.read()
47 | except urllib2.HTTPError,e:
48 | print e.code
49 |
50 | name_ul = re.compile("(?isu)")
51 | name_li = re.compile("(?isu)]*>(.*?)")
52 |
53 | proxy_list_txt = open('proxy_list.txt','w')
54 | proxy_list=[]
55 |
56 | for row in name_ul.findall(html):
57 | proxy_address = ''.join(name_li.findall(row)[0:1])
58 | proxy_http = ''.join(name_li.findall(row)[1:2])
59 | speed = ''.join(name_li.findall(row)[2:3])
60 | proxy_type = ''.join(name_li.findall(row)[3:4])
61 | name_country = re.compile('title="(.*?)"')
62 | country_name=None
63 | for country in name_li.findall(row)[4:5]:
64 | country_name = ''.join(name_country.findall(country))
65 | if ' ' in country_name:
66 | country_name=country_name.split(' ')[0]+' '+country_name.split(' ')[1]
67 |
68 | proxy_server = ProxyServer(proxy_address,proxy_http,speed,proxy_type,country_name)
69 | proxy_list.append(proxy_server)
70 | proxy_list_txt.write(proxy_server.proxy_address+'\n')
71 |
72 | proxy_list_txt.close()
73 |
74 | return proxy_list
75 |
76 | def proxy_setting(proxy_list):
77 | try:
78 | random_proxy,new_proxy_list = find_highest_speed(proxy_list)
79 | except Exception:
80 | print 'Failed to Configure Proxy!'
81 |
82 | proxy_handler = urllib2.ProxyHandler({'http':'http://%s'%random_proxy.proxy_address})
83 | opener = urllib2.build_opener(proxy_handler)
84 | urllib2.install_opener(opener)
85 | print 'Proxy Configuring...'
86 | return random_proxy,new_proxy_list
87 |
88 |
89 |
--------------------------------------------------------------------------------
/Proxy.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hxinhan/MagnetFinder/8896500dc329b0f36821be73e8edc7d5ff698090/Proxy.pyc
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # MagnetFinder
2 | A real-time crawler for searching P2P magnet url.
3 |
4 | ### 欢迎来到MagnetFinder
5 | MagnetFinder是一个无广告的、短小精悍的爬虫程序,用于从互联网上实时地爬取磁力链接。如果您厌倦了BT网站上成堆的广告,MagnetFinder将会给您带来简洁清爽的体验。
6 |
7 | * 版本 1.0.1
8 | * coded by HansonHH
9 | * 下载[Windows](https://github.com/HansonHH/MagnetFinder/blob/master/MagnetFinder.rar?raw=true)版本.
10 | * 下载[Linux/Mac OS X](https://github.com/HansonHH/MagnetFinder/blob/master/MagnetFinder.tar.gz?raw=true)版本.
11 |
12 | #### 对于Linux/OS X用户:
13 |
14 | * 在使用MagnetFinder前,请先安装BeacutifulSoup4 :)
15 | * 使用例子: python MagnetFinder.py
16 |
17 | #### 对于Windows用户:
18 |
19 | * 直接执行MagnetFinder.exe程序
20 |
21 | ### Welcome to MagnetFinder
22 | MagnetFinder is a real-time, teeny and non-ads crawler for searching magnet urls from a dozen of BT websites. If you are tired of BT websites with plenty of ads, MagnetFinder is the one offers you a concise experience. Give a try and enjoy it :)
23 |
24 | * version 1.0.1
25 | * coded by HansonHH
26 | * [HomePage](http://hansonhh.github.io/MagnetFinder/)
27 | * Download [Windows](https://github.com/HansonHH/MagnetFinder/blob/master/MagnetFinder.rar?raw=true) version.
28 | * Download [Linux/Mac OS X](https://github.com/HansonHH/MagnetFinder/blob/master/MagnetFinder.tar.gz?raw=true) version.
29 |
30 | #### For Linux/OS X users:
31 |
32 | * Install BeacutifulSoup4 before use MagnetFinder :)
33 | * Example: python MagnetFinder.py
34 |
35 | #### For Windows users:
36 |
37 | * Execute MagnetFinder.exe
38 |
39 |
40 |
--------------------------------------------------------------------------------
/proxy_list.txt:
--------------------------------------------------------------------------------
1 | 202.182.185.125:8080
2 | 91.217.42.3:8080
3 | 63.149.202.234:8080
4 | 154.72.197.46:3128
5 | 218.76.84.167:3128
6 | 216.162.88.234:8080
7 | 125.217.199.148:80
8 | 182.93.226.14:8080
9 | 190.248.153.162:8080
10 | 182.92.153.89:3128
11 | 186.24.48.242:3128
12 | 104.145.233.117:7808
13 | 82.139.114.74:80
14 | 177.103.145.240:3128
15 |
--------------------------------------------------------------------------------