├── clear_file.sh ├── history.db ├── config.py ├── add_data.py ├── README.MD ├── .gitignore ├── people.py └── downloader.py /clear_file.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | rm *.flv 4 | rm *.mp4 5 | rm *.xml 6 | -------------------------------------------------------------------------------- /history.db: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Rockyzsu/bilibili/master/history.db -------------------------------------------------------------------------------- /config.py: -------------------------------------------------------------------------------- 1 | START=1 2 | END=1 3 | ID='BV1oK411L7au' 4 | YOU_GET_PATH='/home/xda/othergit/you-get/you-get' 5 | MINS=1 6 | user_id = '518973111' 7 | total_page = 3 8 | -------------------------------------------------------------------------------- /add_data.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import sys 4 | import sqlite3 5 | import fire 6 | # args = sys.argv 7 | # id=args[1] 8 | # start=args[2] 9 | # end=args[3] 10 | 11 | def update_data(id,start,end): 12 | status=0 13 | conn = sqlite3.connect('history.db') 14 | cursor = conn.cursor() 15 | insert_sql ='insert into tb_record values(?,?,?,?)' 16 | 17 | try: 18 | cursor.execute(insert_sql,(id,start,end,status)) 19 | except Exception as e: 20 | print(e) 21 | print('Error') 22 | else: 23 | conn.commit() 24 | print("successfully insert") 25 | 26 | if __name__ == '__main__': 27 | fire.Fire(update_data) 28 | -------------------------------------------------------------------------------- /README.MD: -------------------------------------------------------------------------------- 1 | # B站视频下载 2 | 3 | * 自动批量下载B站一个系列的视频 4 | * 下载某个UP主的所有视频 5 | 6 | 7 | 8 | * 使用: 9 | 10 | 下载you-get库,git clone https://github.com/soimort/you-get.git 复制其本地路径,比如/root/you-get/you-get 11 | 12 | 初次运行,删除history.db 文件, 修改配置文件config.py 13 | 14 | ``` 15 | START=1 # 下载系列视频的 第一个 16 | END=1 # 下载系列视频的最后一个 , 比如一个系列教程有30个视频, start=5 ,end = 20 下载从第5个到第20个 17 | ID='BV1oK411L7au' # 视频的ID 18 | YOU_GET_PATH='/home/xda/othergit/you-get/you-get' # 你的you-get路径 19 | MINS=1 # 每次循环等待1分钟 20 | user_id = '518973111' # UP主的ID 21 | total_page = 3 # up主的视频的页数 22 | ``` 23 | 24 | 25 | 26 | 执行 python downloader.py ,进行下载循环 27 | 28 | python people.py ,把某个up主的视频链接加入到待下载队列 29 | 30 | python add_data.py --id=BV1oK411L7au --start=4 --end=8 下载视频id为BV1oK411L7au的系列教程,从第4开始,到第8个结束,如果只有一个的话,start和end设为1即可。 31 | 32 | 可以不断地往队列里面添加下载链接。 33 | 34 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | nohup.out 2 | node_modules/ 3 | .idea 4 | *.jar 5 | *.pyc 6 | *.swp 7 | *.mp3 8 | *.pkl 9 | *.xls 10 | *.xml 11 | *.csv 12 | *.pkl 13 | *.jpg 14 | *.png 15 | *.flv 16 | *.flv.download 17 | *.gif 18 | *.download 19 | *.zip 20 | *.xlsx 21 | *.log 22 | *.mp4 23 | *.srt 24 | ~$d.xlsx 25 | d.xlsx 26 | request_header 27 | header_toolkit.txt 28 | __pycache__/ 29 | wikizhword.text 30 | news_tensite_xml.dat 31 | news_tensite_xml.smarty.dat 32 | Download/ 33 | Download_IMG/ 34 | cookies 35 | httpcache 36 | full_name.dat 37 | .ipynb_checkpoints/ 38 | spider/headers.txt 39 | area_code.txt 40 | dataset/train-labels-idx1-ubyte.gz 41 | dataset/t10k-labels-idx1-ubyte.gz 42 | dataset/t10k-images-idx3-ubyte.gz 43 | dataset/train-images-idx3-ubyte.gz 44 | NLP/souhu.h5 45 | .vscode/settings.json 46 | autojs/.vscode/launch.json 47 | node_project/spider/.vscode/launch.json 48 | .vscode/ 49 | -------------------------------------------------------------------------------- /people.py: -------------------------------------------------------------------------------- 1 | import requests 2 | from add_data import update_data 3 | from config import user_id 4 | from config import total_page 5 | 6 | 7 | def visit(page): 8 | url = 'https://api.bilibili.com/x/space/arc/search?mid={}&ps=30&tid=0&pn={}&keyword=&order=pubdate&jsonp=jsonp'.format( 9 | user_id, page) 10 | headers = { 11 | "accept": "application/json, text/plain, */*", 12 | "accept-encoding": "gzip, deflate, br", 13 | "accept-language": "zh,en;q=0.9,en-US;q=0.8,zh-CN;q=0.7,zh-TW;q=0.6", 14 | "cookie": "buvid3=52EE1424-8352-DE0D-C2F9-8CEFBD6D7D2024853infoc; i-wanna-go-back=-1; _uuid=D7F4D7102-F510C-9EFD-B44C-5A15BB3D2B9825216infoc; buvid4=79C7023E-28E0-B231-6510-54E406718DAA25965-022021913-c0D4n8mIkOPQS7cPZ5EOlQ%3D%3D; CURRENT_BLACKGAP=0; LIVE_BUVID=AUTO7016452474409017; rpdid=|(Rlllkm)mY0J'uYRlkRmRum; buvid_fp_plain=undefined; blackside_state=0; fingerprint=6c8532a24d1ddc22356289c4c2d1958f; buvid_fp=34e58163f7b4e31c1736ba5b8416e000; SESSDATA=c35a2a31%2C1662290982%2Ca3c0d%2A31; bili_jct=de750fd4e484b47f40b8bb42a5a72869; DedeUserID=73827743; DedeUserID__ckMd5=9d571d9b5b827b73; sid=c3w73yp7; b_ut=5; hit-dyn-v2=1; nostalgia_conf=-1; PVID=2; innersign=0; b_lsid=B710CBE88_180E5C4ABA4; bp_video_offset_73827743=662643097963855900; CURRENT_FNVAL=80; b_timer=%7B%22ffp%22%3A%7B%22333.1007.fp.risk_52EE1424%22%3A%22180E5C4B0BF%22%2C%22333.337.fp.risk_52EE1424%22%3A%22180E5C521EF%22%2C%22333.999.fp.risk_52EE1424%22%3A%22180E5C5494B%22%7D%7D", 15 | "origin": "https://space.bilibili.com", 16 | "referer": "https://space.bilibili.com/518973111/video?tid=0&page=2&keyword=&order=pubdate", 17 | "user-agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/98.0.4758.102 Safari/537.36", 18 | } 19 | resp = requests.get( 20 | url=url, 21 | headers=headers 22 | ) 23 | 24 | # print(resp.json()) 25 | js = resp.json() 26 | vlist = js['data']['list']['vlist'] 27 | bvid_list = list(map(lambda x: x.get('bvid'), vlist)) 28 | return bvid_list 29 | 30 | 31 | bv_list = [] 32 | for i in range(1, total_page + 1): 33 | bv_list.extend(visit(i)) 34 | print(bv_list) 35 | list(map(lambda x:update_data(x,1,1),bv_list)) 36 | 37 | -------------------------------------------------------------------------------- /downloader.py: -------------------------------------------------------------------------------- 1 | # @Time : 2019/1/28 14:19 2 | # @File : youtube_downloader.py 3 | 4 | import logging 5 | import os 6 | import subprocess 7 | import sys 8 | import datetime 9 | import re 10 | import codecs 11 | import sqlite3 12 | import time 13 | from config import START, END, ID, YOU_GET_PATH,MINS 14 | CMD = 'python {} {}' 15 | filename = 'url.txt' 16 | 17 | 18 | class SQLite(): 19 | def __init__(self): 20 | self.conn = sqlite3.connect('history.db') 21 | self.cursor = self.conn.cursor() 22 | self.create_table() 23 | 24 | def create_table(self): 25 | create_sql = 'create table if not exists tb_download (url varchar(100),status tinyint,crawltime datetime)' 26 | create_record_tb = 'create table if not exists tb_record (idx varchar(100) PRIMARY KEY,start tinyint,end tinyint,status tinyint)' 27 | self.cursor.execute(create_record_tb) 28 | self.conn.commit() 29 | self.cursor.execute(create_sql) 30 | self.conn.commit() 31 | 32 | def exists(self,url): 33 | querySet = 'select * from tb_download where url = ? and status = 1' 34 | self.cursor.execute(querySet,(url,)) 35 | ret = self.cursor.fetchone() 36 | return True if ret else False 37 | 38 | def insert_history(self,url,status): 39 | query = 'select * from tb_download where url=?' 40 | self.cursor.execute(query,(url,)) 41 | ret = self.cursor.fetchone() 42 | current = datetime.datetime.now() 43 | 44 | if ret: 45 | insert_sql='update tb_download set status=?,crawltime=? where url = ?' 46 | args=(status,status,current,url) 47 | else: 48 | insert_sql = 'insert into tb_download values(?,?,?)' 49 | args=(url,status,current) 50 | 51 | try: 52 | self.cursor.execute(insert_sql,args) 53 | except: 54 | self.conn.rollback() 55 | return False 56 | else: 57 | self.conn.commit() 58 | return True 59 | 60 | def get(self): 61 | sql = 'select idx,start,end from tb_record where status=0' 62 | self.cursor.execute(sql) 63 | ret= self.cursor.fetchone() 64 | return ret 65 | 66 | def set(self,idx): 67 | print('set status =1') 68 | sql='update tb_record set status=1 where idx=?' 69 | self.cursor.execute(sql,(idx,)) 70 | self.conn.commit() 71 | 72 | def llogger(filename): 73 | logger = logging.getLogger(filename) # 不加名称设置root logger 74 | 75 | logger.setLevel(logging.DEBUG) # 设置输出级别 76 | 77 | formatter = logging.Formatter( 78 | '[%(asctime)s][%(filename)s][line: %(lineno)d]\[%(levelname)s] ## %(message)s)', 79 | datefmt='%Y-%m-%d %H:%M:%S') 80 | 81 | # 使用FileHandler输出到文件 82 | prefix = os.path.splitext(filename)[0] 83 | fh = logging.FileHandler(prefix + '.log') 84 | fh.setLevel(logging.DEBUG) 85 | fh.setFormatter(formatter) 86 | 87 | # 使用StreamHandler输出到屏幕 88 | ch = logging.StreamHandler() 89 | ch.setLevel(logging.DEBUG) 90 | ch.setFormatter(formatter) 91 | 92 | # 添加两个Handler 93 | logger.addHandler(ch) 94 | logger.addHandler(fh) 95 | return logger 96 | 97 | 98 | logger = llogger('download.log') 99 | sql_obj = SQLite() 100 | 101 | def run(): 102 | while 1: 103 | result = sql_obj.get() 104 | print(result) 105 | if result: 106 | idx=result[0] 107 | start=result[1] 108 | end=result[2] 109 | try: 110 | download_bilibili(idx,start,end) 111 | except: 112 | pass 113 | else: 114 | sql_obj.set(idx) 115 | else: 116 | time.sleep(MINS*60) 117 | 118 | def download_bilibili(id,start_page,total_page): 119 | global doc 120 | # 填入id和页面 121 | #id = ID 122 | #start_page = START 123 | #total_page = END 124 | 125 | bilibili_url = 'https://www.bilibili.com/video/{}?p={}' 126 | for i in range(start_page, total_page+1): 127 | 128 | next_url = bilibili_url.format(id, i) 129 | if sql_obj.exists(next_url): 130 | print('have download') 131 | continue 132 | 133 | try: 134 | command = CMD.format(YOU_GET_PATH, next_url) 135 | p = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, 136 | shell=True) 137 | 138 | output, error = p.communicate() 139 | 140 | except Exception as e: 141 | print('has execption') 142 | sql_obj.insert_history(next_url,status=0) 143 | logger.error(e) 144 | continue 145 | else: 146 | output_str = output.decode() 147 | if len(output_str) == 0: 148 | sql_obj.insert_history(next_url,status=0) 149 | logger.info('下载失败') 150 | continue 151 | 152 | logger.info('{} has been downloaded !'.format(next_url)) 153 | sql_obj.insert_history(next_url,status=1) 154 | 155 | #download_bilibili() 156 | run() 157 | --------------------------------------------------------------------------------