├── clear_file.sh
├── history.db
├── config.py
├── add_data.py
├── README.MD
├── .gitignore
├── people.py
└── downloader.py


/clear_file.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | rm *.flv
4 | rm *.mp4
5 | rm *.xml
6 | 


--------------------------------------------------------------------------------
/history.db:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Rockyzsu/bilibili/master/history.db


--------------------------------------------------------------------------------
/config.py:
--------------------------------------------------------------------------------
1 | START=1
2 | END=1
3 | ID='BV1oK411L7au'
4 | YOU_GET_PATH='/home/xda/othergit/you-get/you-get'
5 | MINS=1
6 | user_id = '518973111'
7 | total_page = 3
8 | 


--------------------------------------------------------------------------------
/add_data.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | import sys
 4 | import sqlite3
 5 | import fire
 6 | # args = sys.argv
 7 | # id=args[1]
 8 | # start=args[2]
 9 | # end=args[3]
10 | 
11 | def update_data(id,start,end):
12 |     status=0
13 |     conn = sqlite3.connect('history.db')
14 |     cursor = conn.cursor()
15 |     insert_sql ='insert into tb_record values(?,?,?,?)'
16 | 
17 |     try:
18 |         cursor.execute(insert_sql,(id,start,end,status))
19 |     except Exception as e:
20 |         print(e)
21 |         print('Error')
22 |     else:
23 |         conn.commit()
24 |         print("successfully insert")
25 | 
26 | if __name__ == '__main__':
27 |     fire.Fire(update_data)
28 | 


--------------------------------------------------------------------------------
/README.MD:
--------------------------------------------------------------------------------
 1 | # B站视频下载
 2 | 
 3 | * 自动批量下载B站一个系列的视频
 4 | * 下载某个UP主的所有视频
 5 | 
 6 | 
 7 | 
 8 | * 使用：
 9 | 
10 |   下载you-get库，git clone https://github.com/soimort/you-get.git 复制其本地路径，比如/root/you-get/you-get
11 | 
12 |   初次运行，删除history.db 文件， 修改配置文件config.py
13 | 
14 |   ```
15 |   START=1 # 下载系列视频的 第一个
16 |   END=1 # 下载系列视频的最后一个 ， 比如一个系列教程有30个视频， start=5 ,end = 20 下载从第5个到第20个
17 |   ID='BV1oK411L7au' # 视频的ID
18 |   YOU_GET_PATH='/home/xda/othergit/you-get/you-get' # 你的you-get路径
19 |   MINS=1 # 每次循环等待1分钟
20 |   user_id = '518973111' # UP主的ID
21 |   total_page = 3 # up主的视频的页数
22 |   ```
23 | 
24 |   
25 | 
26 |   执行 python downloader.py ，进行下载循环
27 | 
28 |   python people.py ，把某个up主的视频链接加入到待下载队列
29 | 
30 |   python add_data.py --id=BV1oK411L7au --start=4 --end=8 下载视频id为BV1oK411L7au的系列教程，从第4开始，到第8个结束，如果只有一个的话，start和end设为1即可。
31 | 
32 |   可以不断地往队列里面添加下载链接。
33 | 
34 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | nohup.out
 2 | node_modules/
 3 | .idea
 4 | *.jar
 5 | *.pyc
 6 | *.swp
 7 | *.mp3
 8 | *.pkl
 9 | *.xls
10 | *.xml
11 | *.csv
12 | *.pkl
13 | *.jpg
14 | *.png
15 | *.flv
16 | *.flv.download
17 | *.gif
18 | *.download
19 | *.zip
20 | *.xlsx
21 | *.log
22 | *.mp4
23 | *.srt
24 | ~$d.xlsx
25 | d.xlsx
26 | request_header
27 | header_toolkit.txt
28 | __pycache__/
29 | wikizhword.text
30 | news_tensite_xml.dat
31 | news_tensite_xml.smarty.dat
32 | Download/
33 | Download_IMG/
34 | cookies
35 | httpcache
36 | full_name.dat
37 | .ipynb_checkpoints/
38 | spider/headers.txt
39 | area_code.txt
40 | dataset/train-labels-idx1-ubyte.gz
41 | dataset/t10k-labels-idx1-ubyte.gz
42 | dataset/t10k-images-idx3-ubyte.gz
43 | dataset/train-images-idx3-ubyte.gz
44 | NLP/souhu.h5
45 | .vscode/settings.json
46 | autojs/.vscode/launch.json
47 | node_project/spider/.vscode/launch.json
48 | .vscode/
49 | 


--------------------------------------------------------------------------------
/people.py:
--------------------------------------------------------------------------------
 1 | import requests
 2 | from add_data import update_data
 3 | from config import user_id
 4 | from config import total_page
 5 | 
 6 | 
 7 | def visit(page):
 8 |     url = 'https://api.bilibili.com/x/space/arc/search?mid={}&ps=30&tid=0&pn={}&keyword=&order=pubdate&jsonp=jsonp'.format(
 9 |         user_id, page)
10 |     headers = {
11 |         "accept": "application/json, text/plain, */*",
12 |         "accept-encoding": "gzip, deflate, br",
13 |         "accept-language": "zh,en;q=0.9,en-US;q=0.8,zh-CN;q=0.7,zh-TW;q=0.6",
14 |         "cookie": "buvid3=52EE1424-8352-DE0D-C2F9-8CEFBD6D7D2024853infoc; i-wanna-go-back=-1; _uuid=D7F4D7102-F510C-9EFD-B44C-5A15BB3D2B9825216infoc; buvid4=79C7023E-28E0-B231-6510-54E406718DAA25965-022021913-c0D4n8mIkOPQS7cPZ5EOlQ%3D%3D; CURRENT_BLACKGAP=0; LIVE_BUVID=AUTO7016452474409017; rpdid=|(Rlllkm)mY0J'uYRlkRmRum; buvid_fp_plain=undefined; blackside_state=0; fingerprint=6c8532a24d1ddc22356289c4c2d1958f; buvid_fp=34e58163f7b4e31c1736ba5b8416e000; SESSDATA=c35a2a31%2C1662290982%2Ca3c0d%2A31; bili_jct=de750fd4e484b47f40b8bb42a5a72869; DedeUserID=73827743; DedeUserID__ckMd5=9d571d9b5b827b73; sid=c3w73yp7; b_ut=5; hit-dyn-v2=1; nostalgia_conf=-1; PVID=2; innersign=0; b_lsid=B710CBE88_180E5C4ABA4; bp_video_offset_73827743=662643097963855900; CURRENT_FNVAL=80; b_timer=%7B%22ffp%22%3A%7B%22333.1007.fp.risk_52EE1424%22%3A%22180E5C4B0BF%22%2C%22333.337.fp.risk_52EE1424%22%3A%22180E5C521EF%22%2C%22333.999.fp.risk_52EE1424%22%3A%22180E5C5494B%22%7D%7D",
15 |         "origin": "https://space.bilibili.com",
16 |         "referer": "https://space.bilibili.com/518973111/video?tid=0&page=2&keyword=&order=pubdate",
17 |         "user-agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/98.0.4758.102 Safari/537.36",
18 |     }
19 |     resp = requests.get(
20 |         url=url,
21 |         headers=headers
22 |     )
23 | 
24 |     # print(resp.json())
25 |     js = resp.json()
26 |     vlist = js['data']['list']['vlist']
27 |     bvid_list = list(map(lambda x: x.get('bvid'), vlist))
28 |     return bvid_list
29 | 
30 | 
31 | bv_list = []
32 | for i in range(1, total_page + 1):
33 |     bv_list.extend(visit(i))
34 |     print(bv_list)
35 | list(map(lambda x:update_data(x,1,1),bv_list))
36 | 
37 | 


--------------------------------------------------------------------------------
/downloader.py:
--------------------------------------------------------------------------------
  1 | # @Time : 2019/1/28 14:19
  2 | # @File : youtube_downloader.py
  3 | 
  4 | import logging
  5 | import os
  6 | import subprocess
  7 | import sys
  8 | import datetime
  9 | import re
 10 | import codecs
 11 | import sqlite3
 12 | import time
 13 | from config import START, END, ID, YOU_GET_PATH,MINS
 14 | CMD = 'python {} {}'
 15 | filename = 'url.txt'
 16 | 
 17 | 
 18 | class SQLite():
 19 |     def __init__(self):
 20 |         self.conn = sqlite3.connect('history.db')
 21 |         self.cursor = self.conn.cursor()
 22 |         self.create_table()
 23 | 
 24 |     def create_table(self):
 25 |         create_sql = 'create table if not exists tb_download (url varchar(100),status tinyint,crawltime datetime)'
 26 |         create_record_tb = 'create table if not exists tb_record (idx varchar(100) PRIMARY KEY,start tinyint,end tinyint,status tinyint)'
 27 |         self.cursor.execute(create_record_tb)
 28 |         self.conn.commit()
 29 |         self.cursor.execute(create_sql)
 30 |         self.conn.commit()
 31 | 
 32 |     def exists(self,url):
 33 |         querySet = 'select * from tb_download where url = ? and status = 1'
 34 |         self.cursor.execute(querySet,(url,))
 35 |         ret = self.cursor.fetchone()
 36 |         return True if ret else False
 37 | 
 38 |     def insert_history(self,url,status):
 39 |         query = 'select * from tb_download where url=?'
 40 |         self.cursor.execute(query,(url,))
 41 |         ret = self.cursor.fetchone()
 42 |         current = datetime.datetime.now()
 43 | 
 44 |         if ret:
 45 |             insert_sql='update tb_download set status=?,crawltime=? where url = ?'
 46 |             args=(status,status,current,url)
 47 |         else:
 48 |             insert_sql = 'insert into tb_download values(?,?,?)'
 49 |             args=(url,status,current)
 50 | 
 51 |         try:
 52 |             self.cursor.execute(insert_sql,args)
 53 |         except:
 54 |             self.conn.rollback()
 55 |             return False
 56 |         else:
 57 |             self.conn.commit()
 58 |             return True
 59 | 
 60 |     def get(self):
 61 |         sql = 'select idx,start,end from tb_record where status=0'
 62 |         self.cursor.execute(sql)
 63 |         ret= self.cursor.fetchone()
 64 |         return ret
 65 | 
 66 |     def set(self,idx):
 67 |         print('set status =1')
 68 |         sql='update tb_record set status=1 where idx=?'
 69 |         self.cursor.execute(sql,(idx,))
 70 |         self.conn.commit()
 71 | 
 72 | def llogger(filename):
 73 |     logger = logging.getLogger(filename)  # 不加名称设置root logger
 74 | 
 75 |     logger.setLevel(logging.DEBUG)  # 设置输出级别
 76 | 
 77 |     formatter = logging.Formatter(
 78 |         '[%(asctime)s][%(filename)s][line: %(lineno)d]\[%(levelname)s] ## %(message)s)',
 79 |         datefmt='%Y-%m-%d %H:%M:%S')
 80 | 
 81 |     # 使用FileHandler输出到文件
 82 |     prefix = os.path.splitext(filename)[0]
 83 |     fh = logging.FileHandler(prefix + '.log')
 84 |     fh.setLevel(logging.DEBUG)
 85 |     fh.setFormatter(formatter)
 86 | 
 87 |     # 使用StreamHandler输出到屏幕
 88 |     ch = logging.StreamHandler()
 89 |     ch.setLevel(logging.DEBUG)
 90 |     ch.setFormatter(formatter)
 91 | 
 92 |     # 添加两个Handler
 93 |     logger.addHandler(ch)
 94 |     logger.addHandler(fh)
 95 |     return logger
 96 | 
 97 | 
 98 | logger = llogger('download.log')
 99 | sql_obj = SQLite()
100 | 
101 | def run():
102 |     while 1:
103 |         result = sql_obj.get()
104 |         print(result)
105 |         if result:
106 |             idx=result[0]
107 |             start=result[1]
108 |             end=result[2]
109 |             try:
110 |                 download_bilibili(idx,start,end)
111 |             except:
112 |                 pass
113 |             else:
114 |                 sql_obj.set(idx)
115 |         else:
116 |             time.sleep(MINS*60)
117 | 
118 | def download_bilibili(id,start_page,total_page):
119 |     global doc
120 |     # 填入id和页面
121 |     #id = ID
122 |     #start_page = START
123 |     #total_page = END
124 | 
125 |     bilibili_url = 'https://www.bilibili.com/video/{}?p={}'
126 |     for i in range(start_page, total_page+1):
127 | 
128 |         next_url = bilibili_url.format(id, i)
129 |         if sql_obj.exists(next_url):
130 |             print('have download')
131 |             continue
132 | 
133 |         try:
134 |             command = CMD.format(YOU_GET_PATH, next_url)
135 |             p = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE,
136 |                                  shell=True)
137 | 
138 |             output, error = p.communicate()
139 | 
140 |         except Exception as e:
141 |             print('has execption')
142 |             sql_obj.insert_history(next_url,status=0)
143 |             logger.error(e)
144 |             continue
145 |         else:
146 |             output_str = output.decode()
147 |             if len(output_str) == 0:
148 |                 sql_obj.insert_history(next_url,status=0)
149 |                 logger.info('下载失败')
150 |                 continue
151 | 
152 |             logger.info('{} has been downloaded !'.format(next_url))
153 |             sql_obj.insert_history(next_url,status=1)
154 | 
155 | #download_bilibili()
156 | run()
157 | 


--------------------------------------------------------------------------------