├── run.bat ├── 视频转迅雷.bat ├── __pycache__ ├── common.cpython-36.pyc ├── parse_list.cpython-36.pyc ├── parse_src.cpython-36.pyc └── redisutil.cpython-36.pyc ├── run.py ├── src2file.py ├── redisutil.py ├── README.md ├── common.py ├── parse_list.py ├── parse_src.py └── download.py /run.bat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Rollingkeyboard/91/HEAD/run.bat -------------------------------------------------------------------------------- /视频转迅雷.bat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Rollingkeyboard/91/HEAD/视频转迅雷.bat -------------------------------------------------------------------------------- /__pycache__/common.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Rollingkeyboard/91/HEAD/__pycache__/common.cpython-36.pyc -------------------------------------------------------------------------------- /__pycache__/parse_list.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Rollingkeyboard/91/HEAD/__pycache__/parse_list.cpython-36.pyc -------------------------------------------------------------------------------- /__pycache__/parse_src.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Rollingkeyboard/91/HEAD/__pycache__/parse_src.cpython-36.pyc -------------------------------------------------------------------------------- /__pycache__/redisutil.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Rollingkeyboard/91/HEAD/__pycache__/redisutil.cpython-36.pyc -------------------------------------------------------------------------------- /run.py: -------------------------------------------------------------------------------- 1 | # -*- coding: UTF-8 -*- 2 | import parse_list, parse_src, time 3 | 4 | print("即将启动解析列表程序") 5 | parse_list.start() 6 | 7 | # 睡眠5分钟后启动 8 | print("即将启动解析视频程序") 9 | #time.sleep(2) 10 | parse_src.start() -------------------------------------------------------------------------------- /src2file.py: -------------------------------------------------------------------------------- 1 | # -*- coding: UTF-8 -*- 2 | import redis, math, common, time 3 | 4 | # 将每个视频的url写入文件,然后用迅雷拖吧 5 | c = redis.StrictRedis("localhost", 6379) 6 | lst = c.lrange("91_src", 0, -1) 7 | 8 | total = len(lst) 9 | count = math.floor(total / 1000) + 1 # 比如 3005个,需要4个文件,每个文件1000个,最后一个文件5个 10 | 11 | for i in range(1, int(count + 1)): 12 | s = "\n\n" + time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())+ "\n\n" 13 | for a in lst[(i - 1) * 1000 : i * 1000]: 14 | src = a.decode("utf-8") 15 | if src != "None": 16 | s += src + "\n" 17 | c.lrem(common.KEY_SRC, 1, src) 18 | # print("remove from redis ", src) 19 | 20 | with open(common.TORRENT + "/" + str(i) + ".txt", 'a') as f: 21 | f.write(s) 22 | print("writing file ", i) -------------------------------------------------------------------------------- /redisutil.py: -------------------------------------------------------------------------------- 1 | import redis 2 | 3 | def connect(): 4 | r = redis.StrictRedis(host="localhost", port=6379, db=0, decode_responses=True) 5 | return r 6 | 7 | def setredis(url, key): 8 | r = connect() 9 | if not r.sismember(key, url): 10 | r.sadd(key, url) 11 | 12 | def exists(url, key): 13 | r = connect() 14 | lst = r.lrange(key, 0, -1) 15 | flag = -1 16 | for a in lst: 17 | if a == url: 18 | flag = 1 19 | break 20 | if flag == 1: 21 | return True 22 | else: 23 | return False 24 | 25 | def add(url, key): 26 | if not exists(url, key): 27 | r = connect() 28 | r.rpush(key, url) 29 | 30 | def remove(url, key): 31 | if exists(url, key): 32 | r = connect() 33 | r.lrem(key, 0, url) 34 | 35 | def total(key): 36 | r = connect() 37 | return r.llen(key) -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # 91 2 | [91 porn](http://91.91p17.space/) 是一个知名的自拍视频网站 3 | - 本程序获取所有视频url- 4 | - 然后将视频的下载地址保存到文件,每个文件1000个,可以直接拖到迅雷里下载,一晚上可以下载 没有任何问题 5 | - 可以输入指定的页码进行抓取 6 | 7 | # 安装 8 | - [python3](http://www.python.org) 9 | - [redis](redis.io) 10 | 11 | On Windows 12 | 13 | redis-server.exe redis.window.conf 14 | 15 | Or on Unix-like OS 16 | 17 | $ redis-server redis.conf 18 | 19 | 安装 requests, 快速获取html 20 | 21 | $ pip install requests 22 | 23 | 24 | # 启动 25 | 26 | python run.py 27 | 28 | 或者 29 | 30 | ### 双击 运行 31 | 32 | ## 说明 33 | - parse_list.py 将所有视频url下载下来 34 | - parse_src.py 将所有视频的source下载下来,source复制到浏览器可以直接观看 35 | - common.py 通用的文件,保存一些常量 36 | - run.py, run.bat 运行脚本 37 | - src2file.py 将所有视频source复制到文本中,将文本的内容 `Ctrl` + `C`, 然后`Ctrl` + `V` 复制到迅雷里疯狂的下载吧 38 | 39 | # 忠告 40 | 爱惜自己 41 | 42 | # 免责声明 43 | 本程序仅做学习交流之使用,如有其它用途并产生其它后果,本人概不负责。 44 | -------------------------------------------------------------------------------- /common.py: -------------------------------------------------------------------------------- 1 | # -*- coding: UTF-8 -*- 2 | import requests, re, redisutil, time, random, threading 3 | from urllib3.util.retry import Retry 4 | from requests.adapters import HTTPAdapter 5 | 6 | 7 | cookies = requests.cookies.RequestsCookieJar() 8 | cookies.set("language", "cn_CN", domain=".91.91p17.space", path="/") 9 | 10 | #-------------------------------------- 11 | # 91 的临时站点,可以随时更换 12 | URL = "http://91.91p17.space/" 13 | KEY = "91" 14 | KEY_SRC = "91_src" # 每个视频源url对于的redis key 15 | KEY_NONE = "91_none" 16 | LOG = "f:/log/visit.log" 17 | TORRENT = "f:/sed/" 18 | PARSE_LOG = "f:/log/parse.log" 19 | #---------------------------------------- 20 | import os 21 | path = "/".join(LOG.split("/")[0:-1]) 22 | 23 | if not os.path.exists(TORRENT): 24 | os.makedirs(TORRENT) 25 | 26 | if not os.path.exists(path): 27 | os.makedirs(path) 28 | 29 | 30 | ''' 31 | 获取访问的主页面 32 | ''' 33 | def getNumber(): 34 | r = 0 35 | while True: 36 | num = input("请输入你想抓取的总页数:") 37 | try: 38 | r = int(num) 39 | break 40 | except: 41 | print("抱歉,您输入的不是有效的数字, 请重新输入.") 42 | continue 43 | return r 44 | 45 | ''' 46 | 获取时长 47 | ''' 48 | def getTime(): 49 | r = 0 50 | while True: 51 | num = input("请输入想获取的时长(分钟):") 52 | try: 53 | r = int(num) 54 | break 55 | except: 56 | print("抱歉,您输入的不是有效的数字, 请重新输入.") 57 | continue 58 | return r 59 | 60 | ''' 61 | 构造随机ip作为请求头访问目标站点 62 | ''' 63 | def visit(url): 64 | randomIP = str(random.randint(0, 255)) + "." + str(random.randint(0,255)) + "." + str(random.randint(0,255)) + "." + str(random.randint(0,255)) 65 | retries = Retry(total=5,backoff_factor=10, status_forcelist=[500,502,503,504]) 66 | headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:53.0) Gecko/20100101 Firefox/53.0', 67 | 'X-Forwarded-For': randomIP} 68 | s = requests.Session() 69 | s.mount('http://', HTTPAdapter(max_retries=retries)) 70 | html = s.get(url, headers=headers, cookies=cookies).text 71 | return html -------------------------------------------------------------------------------- /parse_list.py: -------------------------------------------------------------------------------- 1 | # -*- coding: UTF-8 -*- 2 | import requests, re, redisutil, time, random, threading 3 | from urllib3.util.retry import Retry 4 | from requests.adapters import HTTPAdapter 5 | import common 6 | 7 | # 将列表页插入redis 8 | def parseList(url): 9 | lst = re.compile(r'http:\/\/91\.91p17\.space\/view_video\.php\?viewkey\=\w+').findall(common.visit(url)) 10 | for a in set(lst): 11 | if not redisutil.exists(a, common.KEY): 12 | redisutil.add(a, common.KEY) 13 | print(threading.current_thread().name, " insert into redis ", a) 14 | else: 15 | print(threading.current_thread().name, " redis 已经存在,不再访问 ", a) 16 | 17 | ''' 18 | 线程主方法 19 | ''' 20 | def enter(**kwargs): 21 | start = kwargs["start"] 22 | end = kwargs["end"] 23 | for page in range(start, end): 24 | url = common.URL + "/v.php?next=watch&page=" + str(page) 25 | try: 26 | print(threading.current_thread().name, " 解析 ", page, " 页 ", url) 27 | parseList(url) 28 | time.sleep(random.randint(1, 3)) 29 | except RuntimeError: 30 | print(threading.current_thread().name, " visiting page ", page, " occurs some errors ", RuntimeError.__with_traceback__) 31 | redisutil.add(url, "91_error") 32 | continue 33 | # current thread has finished, log it and we can easily know it 34 | with open(common.LOG, "a") as f: 35 | f.write("线程" + threading.current_thread().name + " 已经完成抓取 \n") 36 | 37 | # 运行方法 38 | def start(): 39 | thread_list = [] 40 | total = common.getNumber() 41 | thread_total = 5 # 线程总数,默认为5,如果抓取页面小于5,则线程总数就是抓取的页面总数 42 | 43 | if total <= 5: 44 | page_size = 1 45 | thread_total = total 46 | else: 47 | page_size = total / 5 # start 5 thread to visit 48 | 49 | for i in range(1, thread_total + 1): 50 | start = (i - 1) * page_size + 1 51 | end = i * page_size + 1 52 | name = "a" + str(i) 53 | t = threading.Thread(target=enter, name=name, kwargs={"start":start,"end":end}) 54 | thread_list.append(t) 55 | 56 | for t in thread_list: 57 | t.start() 58 | 59 | for t in thread_list: 60 | t.join() 61 | 62 | print("all thread over") 63 | -------------------------------------------------------------------------------- /parse_src.py: -------------------------------------------------------------------------------- 1 | # -*- coding: UTF-8 -*- 2 | import requests, re, redis, redisutil, time, random 3 | from pyquery import PyQuery as pq 4 | from urllib3.util.retry import Retry 5 | from requests.adapters import HTTPAdapter 6 | import threading 7 | import common 8 | 9 | # 将列表页插入redis 10 | def parse(url, c, ts): 11 | d = pq(common.visit(url)) 12 | src = d("video").find("source").attr("src") 13 | 14 | m = d("#useraction .boxPart").html() 15 | cn = re.search(u'时长:(.*?)