├── .gitignore ├── README.md ├── config.json ├── get_list.py └── get_sub.py /.gitignore: -------------------------------------------------------------------------------- 1 | __pycache__ 2 | playlist 3 | D* 4 | *his* 5 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ## Python Youtube Subtitles Download Script 2 | 3 | 基于[zhuwei.me](https://api.zhuwei.me)字幕接口编写下载脚本
4 | 5 | >在config.json里修改配置下载选项:
6 | ```javascript 7 | { 8 | 9 | "test_token":"a2d09c7d76fced01f8be4b1f4cce8bec", //测试用api-key 10 | "token":"", //个人申请的api-key 11 | "single_language":"zh-Hans", //单语言下载选项,"en","zh-Hans",... 12 | "multilanguage":false, //是否下载双语,true 或false 13 | "which_language_to_zh":"en", //哪种语言=>简中,"en","kr","jp"... 14 | "notimeline":false, //无字幕时间线 ,ture 或false 15 | "play_list_file":"" //从指定文件读取视频链接下载字幕 16 | } 17 | ``` 18 | * test_token : 接口测试的api,可下载单个字幕 19 | * token : 私人api-key, 在[api.zhuwei.me](https://api.zhuwei.me)申请 20 | * single_language : 单语言字幕下载 21 | * multilanguage : 是否下载双语字幕,若为true则single_language选项无效 22 | * which_language_to_zh : xxx语言+简中(机翻)字幕 (接口暂只支持xxx=>简中) 23 | * notimeline : 字幕不要时间线 24 | * play_list_file : 从所填文件读取链接下载(文件可自行创建,手动添加url,一行一个链接) 25 |
26 | 27 | > 环境:Python3, 依赖: requests (pip install requests) 28 | 29 | ### Usage: 30 | 1 . 命令行输入单个视频链接或playlist链接(需申请api-key)即可下载字幕: 31 | 32 | > python get_sub.py 33 | 34 | 2 . 从本地文件批量读取视频链接下载字幕(需申请api-key): 35 | >1 . 手动创建文件,一行一个视频链接
36 | >2 . "play_list_file" 填入文件**全称**(.xxx后缀不要忘记加)
3 . 运行 python get_sub.py 37 | 38 | 3.将playlist所有视频链接暂存文件再下载 39 | >1 . 运行 python get_list.py 将自动把输入的playlis链接里的所有视频链接保存到文件
2 . python get_sub.py 将自动下载以上文件 40 | ## Other: 41 | 42 | * 字幕语言支持详见[api.zhuwei.me](https://api.zhuwei.me) -------------------------------------------------------------------------------- /config.json: -------------------------------------------------------------------------------- 1 | {"test_token": "a2d09c7d76fced01f8be4b1f4cce8bec", "token": "", "single_language": "zh-Hans", "multilanguage": false, "which_language_to_zh": "en", "notimeline": false, "play_list_file": ""} -------------------------------------------------------------------------------- /get_list.py: -------------------------------------------------------------------------------- 1 | import requests 2 | import json 3 | import os,time 4 | 5 | with open('config.json','r') as conf: 6 | config=json.load(conf) 7 | token=config['token'] 8 | 9 | #获取listurl数据 10 | def get_list(list_url): 11 | list_id=list_url[list_url.find('list=')+5:] 12 | list_api_url='https://api.zhuwei.me/v1/videos/playlists/' 13 | list_res=requests.get(list_api_url+list_id+'?api-key='+token).json() 14 | if list_res['meta']['code']==200: 15 | print('Find video list!') 16 | v_list=list_res['response']['playlist']['videos'] 17 | for i in range(len(v_list)): 18 | v_list[i]='https://www.youtube.com/watch?v='+v_list[i] 19 | return v_list 20 | else: 21 | print('Can\'t find the video list! check your url or api-key!') 22 | return False 23 | 24 | #存listurl到文件 25 | def save_list(res): 26 | data='\n'.join(res) 27 | # if not os.path.exists('playlist'): 28 | # os.mkdir('playlist') 29 | file_name=time.strftime("playlist%m%d-%H-%M", time.localtime()) 30 | with open('%s.txt' % file_name,'w') as txt: 31 | txt.write(data) 32 | with open('config.json','w') as c: 33 | config['play_list_file']=file_name+'.txt' 34 | json.dump(config,c,indent=4) 35 | print('playlist download complete!') 36 | 37 | #main 38 | def main(): 39 | list_url=input('please input playlist url:') 40 | save_list(get_list(list_url)) 41 | 42 | if __name__ == '__main__': 43 | main() 44 | 45 | 46 | 47 | -------------------------------------------------------------------------------- /get_sub.py: -------------------------------------------------------------------------------- 1 | import requests 2 | # import multiprocessing 3 | import html,json,time,os,re 4 | from concurrent import futures 5 | from get_list import get_list 6 | 7 | class Sub_getter(object): 8 | 9 | with open('config.json','r') as conf: 10 | config=json.load(conf) 11 | 12 | def __init__(self): 13 | self.api_url='https://api.zhuwei.me/v1/captions/' 14 | config=self.config 15 | if config['token']: 16 | self.token=config['token'] 17 | else: 18 | self.token=config['test_token'] 19 | self.play_list=config['play_list_file'] 20 | 21 | #下载字幕到文件 22 | def get_sub(self,api_url,title,**kw): 23 | 24 | #构造自定义配置的字幕url 25 | sub_url=api_url+'?api-key='+self.token\ 26 | +('&multilanguage=multilanguage' if kw['multilanguage'] else '')\ 27 | +('¬imeline=notimeline' if kw['notimeline'] else '') 28 | 29 | #获取字幕url数据 30 | sub_res=requests.get(sub_url) 31 | sub_content=sub_res.json().get('contents').get('content') 32 | # useless=['"','?'] 33 | # for i in useless: 34 | # title=title.replace(i,'') 35 | 36 | #写入字幕文件 37 | if not os.path.exists('Download_subtitles'): 38 | os.mkdir('Download_subtitles') 39 | 40 | if os.name=='nt': 41 | #windows文件替换非法字符 42 | with open('Download_subtitles/%s.srt' % re.sub('[\/:?"*<>|]','-',html.unescape(title)),'w') as sub_file: 43 | sub_file.write(html.unescape(sub_content)) 44 | else: 45 | with open('Download_subtitles/%s.srt' % html.unescape(title).replace('/','-'),'w') as sub_file: 46 | sub_file.write(html.unescape(sub_content)) 47 | self.complete+=1 48 | print('Download 【'+title+'.srt】 complete!') 49 | 50 | 51 | #查询字幕支持列表 52 | def req_api(self,v_url): 53 | have_sub=requests.get(self.api_url+v_url[-11:]+'?'+'api-key='+self.token).json() 54 | 55 | #返回200ok,得到字幕列表 56 | if have_sub['meta']['code']==200: 57 | res=have_sub['response']['captions'] 58 | sub_title=res['title'] 59 | sub_list=res['available_captions'] 60 | # print(sub_list) 61 | 62 | #设置目标语言字幕找到与否状态 63 | find=False 64 | for i in sub_list: 65 | #寻找目标双语字幕 66 | if self.config['multilanguage']: 67 | if self.config['which_language_to_zh'] in i['language']: 68 | print('Find ('+sub_title+') 【'+i['language']+' and zh-Hans】 subtitle!') 69 | self.get_sub(i['caption_content_url'],sub_title,**self.config) 70 | find=True 71 | break 72 | #单语言字幕 73 | else: 74 | if i['language'] in self.config['single_language']: 75 | print('Find ('+sub_title+') 【'+i['language']+'】 subtitle!') 76 | self.get_sub(i['caption_content_url'],sub_title,**self.config) 77 | find=True 78 | break 79 | #找到目标字幕写入成功下载历史 80 | if find: 81 | with open('Success_history.txt','a') as succ_log: 82 | succ=html.unescape(sub_title)+' '+time.strftime("%Y-%m-%d【%H-%M】", time.localtime())\ 83 | +'\n'+v_url+'\n\n' 84 | succ_log.write(succ) 85 | else: 86 | print('Can\'t find '+i['language']+' subtitle!') 87 | 88 | #未获取到字幕列表写入失败历史文件 89 | else: 90 | print('Can\'t find '+v_url+' sub! check video id!') 91 | with open('Failure_history.txt','a') as fail_log: 92 | fail=v_url+' '+time.strftime("%Y-%m-%d【%H-%M】", time.localtime())+'\n\n' 93 | fail_log.write(fail) 94 | 95 | #多线程下载字幕列表 96 | def download_list(self,tasks): 97 | # cpu_count=multiprocessing.cpu_count() 98 | # pool=multiprocessing.Pool(cpu_count) 99 | # pool.map(self.req_api,tasks) 100 | with futures.ThreadPoolExecutor(5) as e: 101 | e.map(self.req_api,tasks) 102 | 103 | print('Download complete,Success get:【',self.complete,'】subtitles. Failure:【',len(tasks)-self.complete,'】') 104 | 105 | #入口 106 | def run(self): 107 | self.complete=0 108 | 109 | if self.play_list: 110 | try: 111 | with open('%s'% self.play_list,'r') as v_list: 112 | tasks=v_list.read().split('\n') 113 | self.download_list(tasks) 114 | except Exception as e: 115 | print('Can\'t find list! check your play_list\'s path!',e) 116 | 117 | else: 118 | v_url=input('Please input video or playlist url:') 119 | if 'list=' in v_url: 120 | try: 121 | self.download_list(get_list(v_url)) 122 | except Exception: 123 | print('Check your playlist url or api-key!') 124 | else: 125 | self.req_api(v_url) 126 | 127 | 128 | if __name__ == '__main__': 129 | app=Sub_getter() 130 | app.run() 131 | --------------------------------------------------------------------------------