├── run.bat
├── 视频转迅雷.bat
├── __pycache__
├── common.cpython-36.pyc
├── parse_list.cpython-36.pyc
├── parse_src.cpython-36.pyc
└── redisutil.cpython-36.pyc
├── run.py
├── src2file.py
├── redisutil.py
├── README.md
├── common.py
├── parse_list.py
├── parse_src.py
└── download.py
/run.bat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Rollingkeyboard/91/HEAD/run.bat
--------------------------------------------------------------------------------
/视频转迅雷.bat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Rollingkeyboard/91/HEAD/视频转迅雷.bat
--------------------------------------------------------------------------------
/__pycache__/common.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Rollingkeyboard/91/HEAD/__pycache__/common.cpython-36.pyc
--------------------------------------------------------------------------------
/__pycache__/parse_list.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Rollingkeyboard/91/HEAD/__pycache__/parse_list.cpython-36.pyc
--------------------------------------------------------------------------------
/__pycache__/parse_src.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Rollingkeyboard/91/HEAD/__pycache__/parse_src.cpython-36.pyc
--------------------------------------------------------------------------------
/__pycache__/redisutil.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Rollingkeyboard/91/HEAD/__pycache__/redisutil.cpython-36.pyc
--------------------------------------------------------------------------------
/run.py:
--------------------------------------------------------------------------------
1 | # -*- coding: UTF-8 -*-
2 | import parse_list, parse_src, time
3 |
4 | print("即将启动解析列表程序")
5 | parse_list.start()
6 |
7 | # 睡眠5分钟后启动
8 | print("即将启动解析视频程序")
9 | #time.sleep(2)
10 | parse_src.start()
--------------------------------------------------------------------------------
/src2file.py:
--------------------------------------------------------------------------------
1 | # -*- coding: UTF-8 -*-
2 | import redis, math, common, time
3 |
4 | # 将每个视频的url写入文件,然后用迅雷拖吧
5 | c = redis.StrictRedis("localhost", 6379)
6 | lst = c.lrange("91_src", 0, -1)
7 |
8 | total = len(lst)
9 | count = math.floor(total / 1000) + 1 # 比如 3005个,需要4个文件,每个文件1000个,最后一个文件5个
10 |
11 | for i in range(1, int(count + 1)):
12 | s = "\n\n" + time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())+ "\n\n"
13 | for a in lst[(i - 1) * 1000 : i * 1000]:
14 | src = a.decode("utf-8")
15 | if src != "None":
16 | s += src + "\n"
17 | c.lrem(common.KEY_SRC, 1, src)
18 | # print("remove from redis ", src)
19 |
20 | with open(common.TORRENT + "/" + str(i) + ".txt", 'a') as f:
21 | f.write(s)
22 | print("writing file ", i)
--------------------------------------------------------------------------------
/redisutil.py:
--------------------------------------------------------------------------------
1 | import redis
2 |
3 | def connect():
4 | r = redis.StrictRedis(host="localhost", port=6379, db=0, decode_responses=True)
5 | return r
6 |
7 | def setredis(url, key):
8 | r = connect()
9 | if not r.sismember(key, url):
10 | r.sadd(key, url)
11 |
12 | def exists(url, key):
13 | r = connect()
14 | lst = r.lrange(key, 0, -1)
15 | flag = -1
16 | for a in lst:
17 | if a == url:
18 | flag = 1
19 | break
20 | if flag == 1:
21 | return True
22 | else:
23 | return False
24 |
25 | def add(url, key):
26 | if not exists(url, key):
27 | r = connect()
28 | r.rpush(key, url)
29 |
30 | def remove(url, key):
31 | if exists(url, key):
32 | r = connect()
33 | r.lrem(key, 0, url)
34 |
35 | def total(key):
36 | r = connect()
37 | return r.llen(key)
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # 91
2 | [91 porn](http://91.91p17.space/) 是一个知名的自拍视频网站
3 | - 本程序获取所有视频url-
4 | - 然后将视频的下载地址保存到文件,每个文件1000个,可以直接拖到迅雷里下载,一晚上可以下载 没有任何问题
5 | - 可以输入指定的页码进行抓取
6 |
7 | # 安装
8 | - [python3](http://www.python.org)
9 | - [redis](redis.io)
10 |
11 | On Windows
12 |
13 | redis-server.exe redis.window.conf
14 |
15 | Or on Unix-like OS
16 |
17 | $ redis-server redis.conf
18 |
19 | 安装 requests, 快速获取html
20 |
21 | $ pip install requests
22 |
23 |
24 | # 启动
25 |
26 | python run.py
27 |
28 | 或者
29 |
30 | ### 双击 运行
31 |
32 | ## 说明
33 | - parse_list.py 将所有视频url下载下来
34 | - parse_src.py 将所有视频的source下载下来,source复制到浏览器可以直接观看
35 | - common.py 通用的文件,保存一些常量
36 | - run.py, run.bat 运行脚本
37 | - src2file.py 将所有视频source复制到文本中,将文本的内容 `Ctrl` + `C`, 然后`Ctrl` + `V` 复制到迅雷里疯狂的下载吧
38 |
39 | # 忠告
40 | 爱惜自己
41 |
42 | # 免责声明
43 | 本程序仅做学习交流之使用,如有其它用途并产生其它后果,本人概不负责。
44 |
--------------------------------------------------------------------------------
/common.py:
--------------------------------------------------------------------------------
1 | # -*- coding: UTF-8 -*-
2 | import requests, re, redisutil, time, random, threading
3 | from urllib3.util.retry import Retry
4 | from requests.adapters import HTTPAdapter
5 |
6 |
7 | cookies = requests.cookies.RequestsCookieJar()
8 | cookies.set("language", "cn_CN", domain=".91.91p17.space", path="/")
9 |
10 | #--------------------------------------
11 | # 91 的临时站点,可以随时更换
12 | URL = "http://91.91p17.space/"
13 | KEY = "91"
14 | KEY_SRC = "91_src" # 每个视频源url对于的redis key
15 | KEY_NONE = "91_none"
16 | LOG = "f:/log/visit.log"
17 | TORRENT = "f:/sed/"
18 | PARSE_LOG = "f:/log/parse.log"
19 | #----------------------------------------
20 | import os
21 | path = "/".join(LOG.split("/")[0:-1])
22 |
23 | if not os.path.exists(TORRENT):
24 | os.makedirs(TORRENT)
25 |
26 | if not os.path.exists(path):
27 | os.makedirs(path)
28 |
29 |
30 | '''
31 | 获取访问的主页面
32 | '''
33 | def getNumber():
34 | r = 0
35 | while True:
36 | num = input("请输入你想抓取的总页数:")
37 | try:
38 | r = int(num)
39 | break
40 | except:
41 | print("抱歉,您输入的不是有效的数字, 请重新输入.")
42 | continue
43 | return r
44 |
45 | '''
46 | 获取时长
47 | '''
48 | def getTime():
49 | r = 0
50 | while True:
51 | num = input("请输入想获取的时长(分钟):")
52 | try:
53 | r = int(num)
54 | break
55 | except:
56 | print("抱歉,您输入的不是有效的数字, 请重新输入.")
57 | continue
58 | return r
59 |
60 | '''
61 | 构造随机ip作为请求头访问目标站点
62 | '''
63 | def visit(url):
64 | randomIP = str(random.randint(0, 255)) + "." + str(random.randint(0,255)) + "." + str(random.randint(0,255)) + "." + str(random.randint(0,255))
65 | retries = Retry(total=5,backoff_factor=10, status_forcelist=[500,502,503,504])
66 | headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:53.0) Gecko/20100101 Firefox/53.0',
67 | 'X-Forwarded-For': randomIP}
68 | s = requests.Session()
69 | s.mount('http://', HTTPAdapter(max_retries=retries))
70 | html = s.get(url, headers=headers, cookies=cookies).text
71 | return html
--------------------------------------------------------------------------------
/parse_list.py:
--------------------------------------------------------------------------------
1 | # -*- coding: UTF-8 -*-
2 | import requests, re, redisutil, time, random, threading
3 | from urllib3.util.retry import Retry
4 | from requests.adapters import HTTPAdapter
5 | import common
6 |
7 | # 将列表页插入redis
8 | def parseList(url):
9 | lst = re.compile(r'http:\/\/91\.91p17\.space\/view_video\.php\?viewkey\=\w+').findall(common.visit(url))
10 | for a in set(lst):
11 | if not redisutil.exists(a, common.KEY):
12 | redisutil.add(a, common.KEY)
13 | print(threading.current_thread().name, " insert into redis ", a)
14 | else:
15 | print(threading.current_thread().name, " redis 已经存在,不再访问 ", a)
16 |
17 | '''
18 | 线程主方法
19 | '''
20 | def enter(**kwargs):
21 | start = kwargs["start"]
22 | end = kwargs["end"]
23 | for page in range(start, end):
24 | url = common.URL + "/v.php?next=watch&page=" + str(page)
25 | try:
26 | print(threading.current_thread().name, " 解析 ", page, " 页 ", url)
27 | parseList(url)
28 | time.sleep(random.randint(1, 3))
29 | except RuntimeError:
30 | print(threading.current_thread().name, " visiting page ", page, " occurs some errors ", RuntimeError.__with_traceback__)
31 | redisutil.add(url, "91_error")
32 | continue
33 | # current thread has finished, log it and we can easily know it
34 | with open(common.LOG, "a") as f:
35 | f.write("线程" + threading.current_thread().name + " 已经完成抓取 \n")
36 |
37 | # 运行方法
38 | def start():
39 | thread_list = []
40 | total = common.getNumber()
41 | thread_total = 5 # 线程总数,默认为5,如果抓取页面小于5,则线程总数就是抓取的页面总数
42 |
43 | if total <= 5:
44 | page_size = 1
45 | thread_total = total
46 | else:
47 | page_size = total / 5 # start 5 thread to visit
48 |
49 | for i in range(1, thread_total + 1):
50 | start = (i - 1) * page_size + 1
51 | end = i * page_size + 1
52 | name = "a" + str(i)
53 | t = threading.Thread(target=enter, name=name, kwargs={"start":start,"end":end})
54 | thread_list.append(t)
55 |
56 | for t in thread_list:
57 | t.start()
58 |
59 | for t in thread_list:
60 | t.join()
61 |
62 | print("all thread over")
63 |
--------------------------------------------------------------------------------
/parse_src.py:
--------------------------------------------------------------------------------
1 | # -*- coding: UTF-8 -*-
2 | import requests, re, redis, redisutil, time, random
3 | from pyquery import PyQuery as pq
4 | from urllib3.util.retry import Retry
5 | from requests.adapters import HTTPAdapter
6 | import threading
7 | import common
8 |
9 | # 将列表页插入redis
10 | def parse(url, c, ts):
11 | d = pq(common.visit(url))
12 | src = d("video").find("source").attr("src")
13 |
14 | m = d("#useraction .boxPart").html()
15 | cn = re.search(u'时长:(.*?)