├── .gitignore
├── README.md
├── config.json
├── get_list.py
└── get_sub.py


/.gitignore:
--------------------------------------------------------------------------------
1 | __pycache__
2 | playlist
3 | D*
4 | *his*
5 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | ## Python Youtube Subtitles Download Script
 2 | 
 3 | 基于[zhuwei.me](https://api.zhuwei.me)字幕接口编写下载脚本<br>
 4 | 
 5 | >在config.json里修改配置下载选项:<br>
 6 | ```javascript
 7 | {
 8 | 	
 9 | 	"test_token":"a2d09c7d76fced01f8be4b1f4cce8bec",  	//测试用api-key
10 | 	"token":"", 				       		//个人申请的api-key
11 | 	"single_language":"zh-Hans",		//单语言下载选项,"en","zh-Hans",...
12 | 	"multilanguage":false,			//是否下载双语,true 或false
13 | 	"which_language_to_zh":"en",		//哪种语言=>简中,"en","kr","jp"...
14 | 	"notimeline":false,			//无字幕时间线 ,ture 或false
15 | 	"play_list_file":""			//从指定文件读取视频链接下载字幕
16 | }
17 | ```
18 | * test_token : 接口测试的api,可下载单个字幕
19 | * token : 私人api-key, 在[api.zhuwei.me](https://api.zhuwei.me)申请
20 | * single_language : 单语言字幕下载
21 | * multilanguage : 是否下载双语字幕,若为true则single_language选项无效
22 | * which_language_to_zh : xxx语言+简中(机翻)字幕 (接口暂只支持xxx=>简中)
23 | * notimeline : 字幕不要时间线
24 | * play_list_file : 从所填文件读取链接下载(文件可自行创建,手动添加url,一行一个链接)
25 | <br>
26 | 
27 | > 环境:Python3, 依赖: requests (pip install requests)
28 | 
29 | ### Usage:
30 | 1 . 命令行输入单个视频链接或playlist链接(需申请api-key)即可下载字幕:
31 | 
32 | > python get_sub.py
33 | 
34 | 2 . 从本地文件批量读取视频链接下载字幕(需申请api-key):
35 | >1 . 手动创建文件,一行一个视频链接<br>
36 | >2 . "play_list_file" 填入文件**全称**(.xxx后缀不要忘记加)<br>3 . 运行 python get_sub.py
37 | 
38 | ３.将playlist所有视频链接暂存文件再下载
39 | >1 . 运行 python get_list.py 将自动把输入的playlis链接里的所有视频链接保存到文件<br> 2 . python get_sub.py 将自动下载以上文件
40 | ## Other:
41 | 
42 | * 字幕语言支持详见[api.zhuwei.me](https://api.zhuwei.me)


--------------------------------------------------------------------------------
/config.json:
--------------------------------------------------------------------------------
1 | {"test_token": "a2d09c7d76fced01f8be4b1f4cce8bec", "token": "", "single_language": "zh-Hans", "multilanguage": false, "which_language_to_zh": "en", "notimeline": false, "play_list_file": ""}


--------------------------------------------------------------------------------
/get_list.py:
--------------------------------------------------------------------------------
 1 | import requests
 2 | import json
 3 | import os,time
 4 | 
 5 | with open('config.json','r') as conf:
 6 | 		config=json.load(conf)
 7 | 		token=config['token']
 8 | 
 9 | #获取listurl数据
10 | def get_list(list_url):
11 | 	list_id=list_url[list_url.find('list=')+5:]
12 | 	list_api_url='https://api.zhuwei.me/v1/videos/playlists/'
13 | 	list_res=requests.get(list_api_url+list_id+'?api-key='+token).json()
14 | 	if list_res['meta']['code']==200:
15 | 		print('Find video list!')
16 | 		v_list=list_res['response']['playlist']['videos']
17 | 		for i in range(len(v_list)):
18 | 			v_list[i]='https://www.youtube.com/watch?v='+v_list[i]
19 | 		return v_list
20 | 	else:
21 | 		print('Can\'t find the video list! check your url or api-key!')
22 | 		return False
23 | 
24 | #存listurl到文件
25 | def save_list(res):
26 | 	data='\n'.join(res)
27 | 	# if not os.path.exists('playlist'):
28 | 	# 	os.mkdir('playlist')
29 | 	file_name=time.strftime("playlist%m%d-%H-%M", time.localtime())
30 | 	with open('%s.txt' % file_name,'w') as txt:
31 | 		txt.write(data)
32 | 	with open('config.json','w') as c:
33 | 		config['play_list_file']=file_name+'.txt'
34 | 		json.dump(config,c,indent=4)
35 | 	print('playlist download complete!')
36 | 
37 | #main
38 | def main():
39 | 	list_url=input('please input playlist url:')
40 | 	save_list(get_list(list_url))
41 | 
42 | if __name__ == '__main__':
43 | 	main()
44 | 	
45 | 	
46 | 
47 | 


--------------------------------------------------------------------------------
/get_sub.py:
--------------------------------------------------------------------------------
  1 | import requests
  2 | # import multiprocessing
  3 | import html,json,time,os,re
  4 | from concurrent import futures
  5 | from get_list import get_list
  6 | 
  7 | class Sub_getter(object):
  8 | 
  9 | 	with open('config.json','r') as conf:
 10 | 		config=json.load(conf)
 11 | 
 12 | 	def __init__(self):
 13 | 		self.api_url='https://api.zhuwei.me/v1/captions/'
 14 | 		config=self.config
 15 | 		if config['token']:
 16 | 			self.token=config['token']
 17 | 		else:
 18 | 			self.token=config['test_token']
 19 | 		self.play_list=config['play_list_file']
 20 | 
 21 | 	#下载字幕到文件
 22 | 	def get_sub(self,api_url,title,**kw):
 23 | 		
 24 | 		#构造自定义配置的字幕url
 25 | 		sub_url=api_url+'?api-key='+self.token\
 26 | 		+('&multilanguage=multilanguage' if kw['multilanguage'] else '')\
 27 | 		+('&notimeline=notimeline' if kw['notimeline'] else '')
 28 | 		
 29 | 		#获取字幕url数据
 30 | 		sub_res=requests.get(sub_url)
 31 | 		sub_content=sub_res.json().get('contents').get('content')
 32 | 		# useless=['&quot;','?']
 33 | 		# for i in useless:
 34 | 		# 	title=title.replace(i,'')
 35 | 		
 36 | 		#写入字幕文件
 37 | 		if not os.path.exists('Download_subtitles'):
 38 | 			os.mkdir('Download_subtitles')
 39 | 
 40 | 		if os.name=='nt':
 41 | 		#windows文件替换非法字符
 42 | 			with open('Download_subtitles/%s.srt' % re.sub('[\/:?"*<>|]','-',html.unescape(title)),'w') as sub_file:
 43 | 				sub_file.write(html.unescape(sub_content))
 44 | 		else:
 45 | 			with open('Download_subtitles/%s.srt' % html.unescape(title).replace('/','-'),'w') as sub_file:
 46 | 				sub_file.write(html.unescape(sub_content))
 47 | 		self.complete+=1
 48 | 		print('Download 【'+title+'.srt】 complete!')
 49 | 
 50 | 
 51 | 	#查询字幕支持列表
 52 | 	def req_api(self,v_url):
 53 | 		have_sub=requests.get(self.api_url+v_url[-11:]+'?'+'api-key='+self.token).json()
 54 | 		
 55 | 		#返回200ok,得到字幕列表
 56 | 		if have_sub['meta']['code']==200:
 57 | 			res=have_sub['response']['captions']
 58 | 			sub_title=res['title']
 59 | 			sub_list=res['available_captions']
 60 | 			# print(sub_list)
 61 | 			
 62 | 			#设置目标语言字幕找到与否状态
 63 | 			find=False
 64 | 			for i in sub_list:
 65 | 				#寻找目标双语字幕
 66 | 				if self.config['multilanguage']:
 67 | 					if self.config['which_language_to_zh'] in i['language']:
 68 | 						print('Find （'+sub_title+'） 【'+i['language']+' and zh-Hans】 subtitle!')
 69 | 						self.get_sub(i['caption_content_url'],sub_title,**self.config)
 70 | 						find=True
 71 | 						break
 72 | 				#单语言字幕
 73 | 				else:
 74 | 					if i['language'] in self.config['single_language']:
 75 | 						print('Find （'+sub_title+'） 【'+i['language']+'】 subtitle!')
 76 | 						self.get_sub(i['caption_content_url'],sub_title,**self.config)
 77 | 						find=True
 78 | 						break
 79 | 			#找到目标字幕写入成功下载历史
 80 | 			if find:
 81 | 				with open('Success_history.txt','a') as succ_log:
 82 | 					succ=html.unescape(sub_title)+'   '+time.strftime("%Y-%m-%d【%H-%M】", time.localtime())\
 83 | 					+'\n'+v_url+'\n\n'
 84 | 					succ_log.write(succ) 
 85 | 			else:
 86 | 				print('Can\'t find '+i['language']+' subtitle!')
 87 | 
 88 | 		#未获取到字幕列表写入失败历史文件
 89 | 		else:
 90 | 			print('Can\'t find '+v_url+' sub! check video id!')
 91 | 			with open('Failure_history.txt','a') as fail_log:
 92 | 					fail=v_url+'   '+time.strftime("%Y-%m-%d【%H-%M】", time.localtime())+'\n\n'
 93 | 					fail_log.write(fail) 
 94 | 
 95 | 	#多线程下载字幕列表
 96 | 	def download_list(self,tasks):
 97 | 		# cpu_count=multiprocessing.cpu_count()
 98 | 		# pool=multiprocessing.Pool(cpu_count)
 99 | 		# pool.map(self.req_api,tasks)
100 | 		with futures.ThreadPoolExecutor(5) as e:
101 | 			e.map(self.req_api,tasks)
102 | 		
103 | 		print('Download complete,Success get:【',self.complete,'】subtitles. Failure:【',len(tasks)-self.complete,'】')
104 | 
105 | 	#入口
106 | 	def run(self):
107 | 		self.complete=0
108 | 
109 | 		if self.play_list:
110 | 			try:
111 | 				with open('%s'% self.play_list,'r') as v_list:
112 | 					tasks=v_list.read().split('\n')
113 | 				self.download_list(tasks)
114 | 			except Exception as e:
115 | 				print('Can\'t find list! check your play_list\'s path!',e)
116 | 
117 | 		else:
118 | 			v_url=input('Please input video or playlist url:')
119 | 			if 'list=' in v_url:	
120 | 				try:
121 | 					self.download_list(get_list(v_url))
122 | 				except Exception:
123 | 					print('Check your playlist url or api-key!')
124 | 			else:
125 | 				self.req_api(v_url)
126 | 
127 | 
128 | if __name__ == '__main__':
129 | 	app=Sub_getter()
130 | 	app.run()
131 | 


--------------------------------------------------------------------------------