├── .gitignore
├── README.md
├── config.json
├── get_list.py
└── get_sub.py
/.gitignore:
--------------------------------------------------------------------------------
1 | __pycache__
2 | playlist
3 | D*
4 | *his*
5 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | ## Python Youtube Subtitles Download Script
2 |
3 | 基于[zhuwei.me](https://api.zhuwei.me)字幕接口编写下载脚本
4 |
5 | >在config.json里修改配置下载选项:
6 | ```javascript
7 | {
8 |
9 | "test_token":"a2d09c7d76fced01f8be4b1f4cce8bec", //测试用api-key
10 | "token":"", //个人申请的api-key
11 | "single_language":"zh-Hans", //单语言下载选项,"en","zh-Hans",...
12 | "multilanguage":false, //是否下载双语,true 或false
13 | "which_language_to_zh":"en", //哪种语言=>简中,"en","kr","jp"...
14 | "notimeline":false, //无字幕时间线 ,ture 或false
15 | "play_list_file":"" //从指定文件读取视频链接下载字幕
16 | }
17 | ```
18 | * test_token : 接口测试的api,可下载单个字幕
19 | * token : 私人api-key, 在[api.zhuwei.me](https://api.zhuwei.me)申请
20 | * single_language : 单语言字幕下载
21 | * multilanguage : 是否下载双语字幕,若为true则single_language选项无效
22 | * which_language_to_zh : xxx语言+简中(机翻)字幕 (接口暂只支持xxx=>简中)
23 | * notimeline : 字幕不要时间线
24 | * play_list_file : 从所填文件读取链接下载(文件可自行创建,手动添加url,一行一个链接)
25 |
26 |
27 | > 环境:Python3, 依赖: requests (pip install requests)
28 |
29 | ### Usage:
30 | 1 . 命令行输入单个视频链接或playlist链接(需申请api-key)即可下载字幕:
31 |
32 | > python get_sub.py
33 |
34 | 2 . 从本地文件批量读取视频链接下载字幕(需申请api-key):
35 | >1 . 手动创建文件,一行一个视频链接
36 | >2 . "play_list_file" 填入文件**全称**(.xxx后缀不要忘记加)
3 . 运行 python get_sub.py
37 |
38 | 3.将playlist所有视频链接暂存文件再下载
39 | >1 . 运行 python get_list.py 将自动把输入的playlis链接里的所有视频链接保存到文件
2 . python get_sub.py 将自动下载以上文件
40 | ## Other:
41 |
42 | * 字幕语言支持详见[api.zhuwei.me](https://api.zhuwei.me)
--------------------------------------------------------------------------------
/config.json:
--------------------------------------------------------------------------------
1 | {"test_token": "a2d09c7d76fced01f8be4b1f4cce8bec", "token": "", "single_language": "zh-Hans", "multilanguage": false, "which_language_to_zh": "en", "notimeline": false, "play_list_file": ""}
--------------------------------------------------------------------------------
/get_list.py:
--------------------------------------------------------------------------------
1 | import requests
2 | import json
3 | import os,time
4 |
5 | with open('config.json','r') as conf:
6 | config=json.load(conf)
7 | token=config['token']
8 |
9 | #获取listurl数据
10 | def get_list(list_url):
11 | list_id=list_url[list_url.find('list=')+5:]
12 | list_api_url='https://api.zhuwei.me/v1/videos/playlists/'
13 | list_res=requests.get(list_api_url+list_id+'?api-key='+token).json()
14 | if list_res['meta']['code']==200:
15 | print('Find video list!')
16 | v_list=list_res['response']['playlist']['videos']
17 | for i in range(len(v_list)):
18 | v_list[i]='https://www.youtube.com/watch?v='+v_list[i]
19 | return v_list
20 | else:
21 | print('Can\'t find the video list! check your url or api-key!')
22 | return False
23 |
24 | #存listurl到文件
25 | def save_list(res):
26 | data='\n'.join(res)
27 | # if not os.path.exists('playlist'):
28 | # os.mkdir('playlist')
29 | file_name=time.strftime("playlist%m%d-%H-%M", time.localtime())
30 | with open('%s.txt' % file_name,'w') as txt:
31 | txt.write(data)
32 | with open('config.json','w') as c:
33 | config['play_list_file']=file_name+'.txt'
34 | json.dump(config,c,indent=4)
35 | print('playlist download complete!')
36 |
37 | #main
38 | def main():
39 | list_url=input('please input playlist url:')
40 | save_list(get_list(list_url))
41 |
42 | if __name__ == '__main__':
43 | main()
44 |
45 |
46 |
47 |
--------------------------------------------------------------------------------
/get_sub.py:
--------------------------------------------------------------------------------
1 | import requests
2 | # import multiprocessing
3 | import html,json,time,os,re
4 | from concurrent import futures
5 | from get_list import get_list
6 |
7 | class Sub_getter(object):
8 |
9 | with open('config.json','r') as conf:
10 | config=json.load(conf)
11 |
12 | def __init__(self):
13 | self.api_url='https://api.zhuwei.me/v1/captions/'
14 | config=self.config
15 | if config['token']:
16 | self.token=config['token']
17 | else:
18 | self.token=config['test_token']
19 | self.play_list=config['play_list_file']
20 |
21 | #下载字幕到文件
22 | def get_sub(self,api_url,title,**kw):
23 |
24 | #构造自定义配置的字幕url
25 | sub_url=api_url+'?api-key='+self.token\
26 | +('&multilanguage=multilanguage' if kw['multilanguage'] else '')\
27 | +('¬imeline=notimeline' if kw['notimeline'] else '')
28 |
29 | #获取字幕url数据
30 | sub_res=requests.get(sub_url)
31 | sub_content=sub_res.json().get('contents').get('content')
32 | # useless=['"','?']
33 | # for i in useless:
34 | # title=title.replace(i,'')
35 |
36 | #写入字幕文件
37 | if not os.path.exists('Download_subtitles'):
38 | os.mkdir('Download_subtitles')
39 |
40 | if os.name=='nt':
41 | #windows文件替换非法字符
42 | with open('Download_subtitles/%s.srt' % re.sub('[\/:?"*<>|]','-',html.unescape(title)),'w') as sub_file:
43 | sub_file.write(html.unescape(sub_content))
44 | else:
45 | with open('Download_subtitles/%s.srt' % html.unescape(title).replace('/','-'),'w') as sub_file:
46 | sub_file.write(html.unescape(sub_content))
47 | self.complete+=1
48 | print('Download 【'+title+'.srt】 complete!')
49 |
50 |
51 | #查询字幕支持列表
52 | def req_api(self,v_url):
53 | have_sub=requests.get(self.api_url+v_url[-11:]+'?'+'api-key='+self.token).json()
54 |
55 | #返回200ok,得到字幕列表
56 | if have_sub['meta']['code']==200:
57 | res=have_sub['response']['captions']
58 | sub_title=res['title']
59 | sub_list=res['available_captions']
60 | # print(sub_list)
61 |
62 | #设置目标语言字幕找到与否状态
63 | find=False
64 | for i in sub_list:
65 | #寻找目标双语字幕
66 | if self.config['multilanguage']:
67 | if self.config['which_language_to_zh'] in i['language']:
68 | print('Find ('+sub_title+') 【'+i['language']+' and zh-Hans】 subtitle!')
69 | self.get_sub(i['caption_content_url'],sub_title,**self.config)
70 | find=True
71 | break
72 | #单语言字幕
73 | else:
74 | if i['language'] in self.config['single_language']:
75 | print('Find ('+sub_title+') 【'+i['language']+'】 subtitle!')
76 | self.get_sub(i['caption_content_url'],sub_title,**self.config)
77 | find=True
78 | break
79 | #找到目标字幕写入成功下载历史
80 | if find:
81 | with open('Success_history.txt','a') as succ_log:
82 | succ=html.unescape(sub_title)+' '+time.strftime("%Y-%m-%d【%H-%M】", time.localtime())\
83 | +'\n'+v_url+'\n\n'
84 | succ_log.write(succ)
85 | else:
86 | print('Can\'t find '+i['language']+' subtitle!')
87 |
88 | #未获取到字幕列表写入失败历史文件
89 | else:
90 | print('Can\'t find '+v_url+' sub! check video id!')
91 | with open('Failure_history.txt','a') as fail_log:
92 | fail=v_url+' '+time.strftime("%Y-%m-%d【%H-%M】", time.localtime())+'\n\n'
93 | fail_log.write(fail)
94 |
95 | #多线程下载字幕列表
96 | def download_list(self,tasks):
97 | # cpu_count=multiprocessing.cpu_count()
98 | # pool=multiprocessing.Pool(cpu_count)
99 | # pool.map(self.req_api,tasks)
100 | with futures.ThreadPoolExecutor(5) as e:
101 | e.map(self.req_api,tasks)
102 |
103 | print('Download complete,Success get:【',self.complete,'】subtitles. Failure:【',len(tasks)-self.complete,'】')
104 |
105 | #入口
106 | def run(self):
107 | self.complete=0
108 |
109 | if self.play_list:
110 | try:
111 | with open('%s'% self.play_list,'r') as v_list:
112 | tasks=v_list.read().split('\n')
113 | self.download_list(tasks)
114 | except Exception as e:
115 | print('Can\'t find list! check your play_list\'s path!',e)
116 |
117 | else:
118 | v_url=input('Please input video or playlist url:')
119 | if 'list=' in v_url:
120 | try:
121 | self.download_list(get_list(v_url))
122 | except Exception:
123 | print('Check your playlist url or api-key!')
124 | else:
125 | self.req_api(v_url)
126 |
127 |
128 | if __name__ == '__main__':
129 | app=Sub_getter()
130 | app.run()
131 |
--------------------------------------------------------------------------------