├── README.md └── src ├── audiohelper.py ├── audiostatistic.py ├── autosubtitle.py ├── dataset └── ted80001.wav ├── ffmpeg.md ├── realtimerecognize.py ├── speechrecognize.py ├── temp └── ted80001 │ └── ted80001.srt ├── translate.py └── utility.py /README.md: -------------------------------------------------------------------------------- 1 | # audio learning # 2 | ## feature ## 3 | * auto generating subtitle(srt format ) or text for audio data 4 | * cut audio data(wav format ,1 channel) to small part by speakers pause 5 | 6 | ## notice ## 7 | * only support 1 channel wav file 8 | * user need to retrieve audio data from videos 9 | * generate subtitle/text for video data 10 | * the recognize rate depend on many factors: the qulity of the video data etc 11 | * please apply Baidu api key for using ,contact if you have any question 12 | * ted80001.wav is generated from https://ia800204.us.archive.org/25/items/AomawaShields_2015U/AomawaShields_2015U.mp4 13 | * ted80001.srt is auto generated by ted80001.wav 14 | 15 | ## FYI ## 16 | 17 | ## contact ## 18 | wj3235@126.com 19 | 20 | ## 更新日志 ## 21 | ### Ver 0.0.1 ### 22 | * change mdedian filter arithmetic 23 | * arithmetic improvement for insert sutiable audio info 24 | * no need to split wav file,use stream to do baidu query 25 | * use ffmpeg for caption 26 | * other bug fix and improvements 27 | 28 | # 语音学习 # 29 | ## 功能 ## 30 | * 自动生成语音字幕 31 | * 可以根据说话人的停顿,进行自动片段剪辑 32 | 33 | ## 注意事项 ## 34 | * 只支持 1 个通道的 wav 文件 35 | * 如果进行视频字幕自动生成,用户需要自己提取一个通道的wav文件 36 | * 语音文件识别的字幕格式srt 37 | * 也可以进行语音转化成文字 38 | * 识别率还可以,依赖音频文件的噪声,演讲,朗读音频较好 39 | * 底层使用的Baidu的语音识别,如果使用请自行申请,如果有问题可以联系我 40 | * ted80001.wav 来源于视频 https://ia800204.us.archive.org/25/items/AomawaShields_2015U/AomawaShields_2015U.mp4 41 | * ted80001.srt 由ted80001.wav 自动生成 42 | 43 | ## 仅供参考## 44 | 知乎上详细的说明 https://zhuanlan.zhihu.com/p/28347508 45 | 音乐切割小音频 https://pan.baidu.com/s/1hrXxEJU 46 | 演讲切割小音频 https://pan.baidu.com/s/1jIrC0F8#list/path=%2F 47 | ## 联系方式 ## 48 | wj3235@126.com 49 | 50 | 51 | ## 更新日志 ## 52 | ### Ver 0.0.1 ### 53 | * 中值滤波scipy.signal.medfilt计算速度较慢,更新计算方法 54 | * get_wave_statistic函数添加framerate(采样率)参数,支持8000/16000,添加处理(无声音时长超过17s切为多个16.999s的无声音时长) 55 | * calculate_other_statistic_info函数添加framerate(采样率)参数,支持8000/16000 56 | * 修改原来循环排序生成间隔小于17s时间点数组算法(每次循环采用折半插入排序,因为插入的是排好序的数组,原来每次循环采用sort,视频时长超过1小时的话基本算不完了...) 57 | * 去掉原来将wav切成具体的小文件步骤,直接使用流访问百度api 58 | * 修改保存字幕格式可以直接使用ffmpeg将字幕烧制到视频中 59 | * 修改speech_recognizai_baidu方法接受流,不再去读文件 60 | * 添加注释 61 | * 添加ffmpeg分离音频,烧制字幕指令 62 | 63 | ©2017 alex All Rights Reserved. 64 | 65 | 66 | 67 | 68 | 69 | -------------------------------------------------------------------------------- /src/audiohelper.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Sun Aug 06 05:26:49 2017 4 | @author: alex 5 | @contact wj3235@126.com 6 | """ 7 | 8 | import wave 9 | from pydub import AudioSegment 10 | 11 | def load_wave(wavepath): 12 | 13 | f = wave.open(wavepath,'rb') 14 | params = f.getparams() 15 | print(params) 16 | nchannels, sampwidth, framerate, nframes = params[:4] 17 | strData = f.readframes(nframes)#读取音频,字符串格式 18 | print(nframes,len(strData)) 19 | f.close() 20 | return nchannels, sampwidth, framerate, nframes,strData 21 | #wav_data1 = struct.unpack('%dh' % nframes, strData) 22 | 23 | def audio_to_export(sourcepath,wavepath,start,end): 24 | 25 | wav = AudioSegment.from_wav(sourcepath) 26 | 27 | wav[start*1000:end*1000].export(wavepath, format="wav") # 切e 28 | 29 | 30 | -------------------------------------------------------------------------------- /src/audiostatistic.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/goodskillprogramer/audiolearning/c03ed3f43e30936ff79bc87347eb9de5e647bda1/src/audiostatistic.py -------------------------------------------------------------------------------- /src/autosubtitle.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Sun Aug 06 05:26:49 2017 4 | @author: alex 5 | @contact wj3235@126.com 6 | """ 7 | 8 | import os 9 | import shutil 10 | import sys 11 | import time 12 | 13 | #NumPy系统是Python的一种开源的数值计算扩展。这种工具可用来存储和处理大型矩阵 14 | 15 | import numpy as np 16 | from scipy import signal 17 | from pydub import AudioSegment 18 | from audiohelper import load_wave 19 | import matplotlib.pyplot as plt 20 | from speechrecognize import speech_recognizai_baidu 21 | from utility import write_txt_to_file 22 | from utility import seconds_to_timestamp_str 23 | from audiostatistic import get_wave_statistic 24 | from audiostatistic import calculate_other_statistic_info 25 | 26 | def plot_data(waveData,nframes,framerate): 27 | time = np.arange(0,nframes)*(1.0 / framerate) 28 | fig=plt.figure(figsize=(100,2)) 29 | plt.plot(time,waveData) 30 | plt.xlabel("Time(s)") 31 | plt.ylabel("Amplitude") 32 | plt.title("Single channel wavedata") 33 | plt.grid('on')#标尺,on:有,off:无。 34 | fig.savefig('test2png.png', dpi=100) 35 | #print len(waveData) 36 | #print (waveData[0:1000]) 37 | 38 | def timevalidate(timetable,endtime,timerange): 39 | timetable+=[endtime] 40 | for i in range(len(timetable)-1): 41 | if timetable[i+1]-timetable[i]>timerange: 42 | return False 43 | return True 44 | 45 | #折半插入排序 46 | def insertsort(timearr,item): 47 | if len(timearr)==1: 48 | timearr.append(item) if item>timearr[0] else timearr.insert(0,item) 49 | elif len(timearr)==2: 50 | if item>timearr[0]: 51 | timearr.append(item) if item>timearr[1] else timearr.insert(1,item) 52 | else: 53 | timearr.insert(0,item) 54 | else: 55 | if itemtimearr[len(timearr)-1]: 58 | timearr.append(item) 59 | else: 60 | middle(timearr,0,len(timearr)-1,item) 61 | #折半插入排序递归方法 62 | def middle(timearr,start,end,item): 63 | if end-start==1: 64 | timearr.insert(end,item) 65 | else: 66 | middle(timearr,start,(start+end)//2,item) if timearr[(start+end)//2]>item else middle(timearr,(start+end)//2,end,item) 67 | 68 | def time_transform(seconds): 69 | m, s = divmod(seconds, 60) 70 | h, m = divmod(m, 60) 71 | print ("%02d:%02d:%02d" % (h, m, s)) 72 | if __name__ =="__main__": 73 | wavepath=r'./dataset/ted80001.wav' 74 | 75 | laguage='zh' 76 | laguage='en' 77 | nchannels, sampwidth, framerate, nframes,strData=load_wave(wavepath) 78 | 79 | # 使用字符串创建矩阵,简单的转换实现了ASCII码的转换,int16使得每2个字符(16位)转化成10进制的数组 80 | waveData = np.fromstring(strData,dtype=np.int16) 81 | 82 | #中值滤波medfilt计算太慢,使用下面方法速度提升一倍 83 | #waveData=signal.medfilt(waveData) 84 | middata=[0]*len(waveData) 85 | if(len(waveData)>2): 86 | middata[0]=sorted([0,waveData[1],waveData[2]])[1] 87 | middata[len(waveData)-1]=sorted([0,waveData[len(waveData)-1],waveData[len(waveData)-2]])[1] 88 | for i in range(1,len(waveData)-1): 89 | 90 | middata[i]=(waveData[i] if waveData[i] > waveData[i+1] else\ 91 | (waveData[i+1] if waveData[i-1] > waveData[i+1] else waveData[i-1]))\ 92 | if waveData[i-1] > waveData[i] else\ 93 | (waveData[i-1] if waveData[i-1] > waveData[i+1] else\ 94 | (waveData[i+1] if waveData[i] > waveData[i+1] else waveData[i])); 95 | for j in range(len(waveData)): 96 | waveData[j]=middata[j] 97 | 98 | #wave幅值归一化 取数组中最大的值为分母,每个元素作为分母 99 | waveData = waveData*1.0/(max(abs(waveData))) 100 | if nchannels>1: 101 | waveData = np.reshape(waveData,[nframes,nchannels]) 102 | waveData=waveData[:,0] 103 | 104 | #plot_data(waveData,nframes,framerate)#绘制音频波谱图片 105 | #统计有声音和没声音的分段每段时长 106 | wavestatistic=get_wave_statistic(waveData,framerate) 107 | #print 'len(wavestatistic)',(len(wavestatistic)) 108 | 109 | calculate_other_statistic_info(wavestatistic,framerate) 110 | 111 | sortedwavestatistic=sorted(wavestatistic,key=lambda x:(x[0],-x[1])) 112 | 113 | splittimestamp=[0] 114 | for split in sortedwavestatistic: 115 | #splittimestamp+=[split[4]] 116 | 117 | #split[4]为音频时间点,表示到目前时长总长,单位为s 118 | #插入并折半排序 119 | insertsort(splittimestamp,split[4]) 120 | 121 | #nframes字节总长度/采样率 122 | #print(nframes*1.0/8000); 输出329.537625等于5分29秒,如果相邻2段都小于17秒则结束 123 | if timevalidate(splittimestamp,nframes*1.0/framerate,17): 124 | break 125 | splittimestamp.pop() 126 | 127 | #splittimestamp=sorted(splittimestamp) 128 | #print splittimestamp 129 | basename=os.path.basename(wavepath) 130 | filename=os.path.splitext(basename)[0] 131 | savefilefolder=os.path.join(r'./temp',filename) 132 | #print 'save floder:',savefilefolder 133 | if os.path.exists(savefilefolder): 134 | shutil.rmtree(savefilefolder) 135 | 136 | os.mkdir(savefilefolder) 137 | wav = AudioSegment.from_wav(wavepath) # 打开mp3文件 138 | srtid=1 139 | maxrecognize=50 140 | lentimestamp=len(splittimestamp) 141 | 142 | 143 | for i in range(len(splittimestamp)-1): 144 | #if i>maxrecognize: 145 | #print 'end recognize ',maxrecognize 146 | #break 147 | starttime=splittimestamp[i] 148 | endtime=splittimestamp[i+1] 149 | if endtime>nframes*1.0 / framerate: 150 | #print 'out of time',endtime,nframes*1.0 / framerate 151 | endtime=nframes*1.0 / framerate 152 | #print starttime,endtime 153 | savefilename=str(i)+"_"+str(starttime)+"_"+str(endtime) 154 | savepath=os.path.join(savefilefolder,savefilename+'.wav')#overwrite 155 | #print 'savepath',savepath 156 | #try: 157 | #print 'export timerage:',starttime,endtime,' completion ',i,lentimestamp,i*1.0/lentimestamp 158 | #wav[starttime*1000:endtime*1000].export(savepath, format="wav") # 切e 159 | #except Exception as e: 160 | #print 'export exception',str(e) 161 | #continue 162 | 163 | if framerate==8000 or framerate==16000: 164 | #调用百度api 165 | response=speech_recognizai_baidu(wav[starttime*1000:endtime*1000].raw_data,framerate,laguage) 166 | savejsonpath=os.path.join(savefilefolder,savefilename+'.json') 167 | #print str(response) 168 | #解析百度的回复{'corpus_no': '6462566101916659365', 'result': ['he,', 'e,', 'here,', 'each,', 'a,'], 169 | #'err_msg': 'success.', 'err_no': 0, 'sn': '949466616181504683425'}百度给出前5种可能的识别结果 170 | if response['err_no']==0: 171 | recognize_txt=(response['result'][0]) 172 | #for r in recognize_txt: 173 | #print(r) 174 | #print '' 175 | #write_txt_to_file(savejsonpath,recognize_txt.encode('utf-8')) 176 | startstr=seconds_to_timestamp_str(starttime) 177 | endstr=seconds_to_timestamp_str(endtime) 178 | srtpath=os.path.join(savefilefolder,filename+'.srt') 179 | asspath=os.path.join(savefilefolder,filename+'.ass ') 180 | #if srtid<10: 181 | #recognize_txt=("%s %s")%(u'subtitle(autocreated) wj3235@126.com =>',recognize_txt) 182 | 183 | srttxt=('%s\n%s --> %s\n%s\n')%(srtid,startstr,endstr,recognize_txt) 184 | #asstxt=('Dialogue:0,%s,%s,AGMStyle,NTP,0000,0000,0000,,%s\n')%(startstr,endstr,recognize_txt) 185 | write_txt_to_file(srtpath,srttxt.encode('utf-8')) 186 | #write_txt_to_file(asspath,asstxt.encode('utf-8')) 187 | 188 | srtid+=1 189 | #print 'save ok' 190 | 191 | -------------------------------------------------------------------------------- /src/dataset/ted80001.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/goodskillprogramer/audiolearning/c03ed3f43e30936ff79bc87347eb9de5e647bda1/src/dataset/ted80001.wav -------------------------------------------------------------------------------- /src/ffmpeg.md: -------------------------------------------------------------------------------- 1 | 一,提取音频文件,从视频中 2 | -ar 音频采样率 3 | -ac 频道数 4 | 5 | ffmpeg -i AomawaShields_2015U.mp4 -ac 1 -ar 16000 -f wav -vn ted80001.wav 6 | 7 | 提取效率分析 8 | 大概是10-15s/G的提取速度 9 | 1.6G->85M音频 91min 10 | 55M->5M音频 5min 11 | 200M->66M 72min 12 | 视频质量越高提取率越低 大概5%-30%左右 但是生成音频大小约为 50M/h 13 | 14 | 15 | 二,合成字幕 1-2min/h 16 | 17 | 嵌入外挂字幕 ffmpeg -i AomawaShields_2015U.mp4 -i ted80001.srt -c:s mov_text -c:v copy -c:a copy AomawaShields_2015U2.mp4 18 | 19 | 合成带字幕视频ffmpeg -i AomawaShields_2015U.mp4 -vf subtitles=ted80001.srt AomawaShields_2015U2.mp4 20 | 21 | 三,剪辑视频 22 | 23 | ffmpeg -i AomawaShields_2015U.mp4 -vcodec copy -acodec copy -ss 00:00:00 -to 00:04:30 4.flv -y -------------------------------------------------------------------------------- /src/realtimerecognize.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | """ 4 | Created on Sun Aug 06 05:26:49 2017 5 | @author: alex 6 | @contact wj3235@126.com 7 | """ 8 | 9 | import speech_recognition as sr 10 | from os import path 11 | import time 12 | import wave 13 | 14 | from aip import AipSpeech 15 | 16 | def listen_translate(): 17 | while(True): 18 | # obtain audio from the microphone 19 | r = sr.Recognizer() 20 | with sr.Microphone(sample_rate=8000) as source: 21 | print("Say something!") 22 | # print(5), 23 | # time.sleep(1) 24 | # print(4), 25 | # time.sleep(1) 26 | # print(3), 27 | # time.sleep(1) 28 | # print(2), 29 | # time.sleep(1) 30 | # print(1), 31 | # time.sleep(1) 32 | audio = r.listen(source)#,timeout=5,phrase_time_limit=0.05 33 | 34 | # r = sr.Recognizer() 35 | # with sr.AudioFile('./english.wav') as source: 36 | # audio = r.record(source) # read the entire audio file 37 | 38 | # write audio to a WAV file ````` 39 | with open("microphone-results.wav", "wb") as f: 40 | f.write(audio.get_wav_data()) 41 | 42 | # recognize speech using Sphinx 43 | try: 44 | print("Sphinx thinks you said :" + r.recognize_sphinx(audio)) 45 | except sr.UnknownValueError: 46 | print("Sphinx could not understand audio") 47 | except sr.RequestError as e: 48 | print("Sphinx error; {0}".format(e)) 49 | audiolist=[] 50 | 51 | def callback(recognizer,audio): 52 | audiolist.append(audio) 53 | 54 | def translate(r,audio): 55 | try: 56 | s=time.time() 57 | print(str(len(audiolist))+" Sphinx thinks you said :" + r.recognize_sphinx(audio)) 58 | print time.time()-s 59 | except sr.UnknownValueError: 60 | print("Sphinx could not understand audio") 61 | except sr.RequestError as e: 62 | print("Sphinx error; {0}".format(e)) 63 | def listen_and_recognize(): 64 | r = sr.Recognizer() 65 | m = sr.Microphone(sample_rate=8000) 66 | r.listen_in_background(m,callback,phrase_time_limit=1) 67 | 68 | while(True): 69 | lastlen=0 70 | if len(audiolist)==0: 71 | time.sleep(10) 72 | continue 73 | if lastlen==len(audiolist): 74 | time.sleep(10) 75 | continue 76 | output = wave.open('microphone-results.wav', 'wb') 77 | output.setnchannels(1) 78 | setparam=False 79 | para=None 80 | for audio in audiolist: 81 | with open("temps.wav", "wb") as f: 82 | f.write(audio.get_wav_data()) 83 | temps = wave.open('temps.wav', 'rb') 84 | #print temps.getparams() 85 | if not setparam: 86 | para=temps.getparams() 87 | output.setparams(para) 88 | setparam=True 89 | output.writeframes(temps.readframes(temps.getnframes())) 90 | 91 | output.close() 92 | # output = wavefile.open('microphone-results.wav', 'rb') 93 | # outputaudio=sr.AudioData(output.readframes(output.getnframes()),para[2],para[1]) 94 | # translate(r,outputaudio) 95 | #baidu('microphone-results.wav') 96 | lastlen=len(audiolist) 97 | time.sleep(10) 98 | 99 | def play_audio(): 100 | 101 | r = sr.Recognizer() 102 | 103 | with sr.AudioFile('./english.wav') as source: 104 | audio = r.record(source) # read the entire audio file 105 | print audio 106 | 107 | # recognize speech using Sphinx 108 | try: 109 | print("Sphinx thinks you said :" + r.recognize_sphinx(audio)) 110 | except sr.UnknownValueError: 111 | print("Sphinx could not understand audio") 112 | except sr.RequestError as e: 113 | print("Sphinx error; {0}".format(e)) 114 | 115 | def baidu(filePath,samplerate): 116 | 117 | APP_ID = '1234567' 118 | API_KEY = 'a8hBD6w0Dh1oXBSlAn5natYe' 119 | SECRET_KEY = '9b76b95ade9e7063088dfa52c208748e' 120 | 121 | aipSpeech = AipSpeech(APP_ID, API_KEY, SECRET_KEY) 122 | 123 | 124 | def get_file_content(filePath): 125 | with open(filePath, 'rb') as fp: 126 | return fp.read() 127 | # 128 | response=aipSpeech.asr(get_file_content(filePath), 'wav', samplerate, { 129 | 'lan': 'en', 130 | }) 131 | print response 132 | # # ��URL��ȡ�ļ�ʶ�� 133 | # aipSpeech.asr('', 'wav', 44100, { 134 | # 'url': 'http://121.40.195.233/res/16k_test.pcm', 135 | # 'callback': 'http://xxx.com/receive', 136 | # }) 137 | 138 | #listen_and_recognize() 139 | -------------------------------------------------------------------------------- /src/speechrecognize.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Sun Aug 06 05:26:49 2017 4 | @author: alex 5 | @contact wj3235@126.com 6 | """ 7 | import os 8 | from aip import AipSpeech 9 | from utility import write_txt_to_file 10 | 11 | APP_ID = '1234567' 12 | API_KEY = 'natYea8hBD6w0Dh1oXBSlAn5' 13 | SECRET_KEY = '8e9b76b95ade9e7063088dfa52c20874' 14 | 15 | def baidu(filePath,samplerate,language): 16 | global APP_ID,API_KEY,SECRET_KEY 17 | aipSpeech = AipSpeech(APP_ID, API_KEY, SECRET_KEY) 18 | 19 | def get_file_content(filePath): 20 | with open(filePath, 'rb') as fp: 21 | return fp.read() 22 | 23 | response=aipSpeech.asr(get_file_content(filePath), 'wav', samplerate, { 24 | 'lan': language, 25 | }) 26 | return response 27 | 28 | def speech_recognizai_baidu(filepath,samplerate,language='zh'): 29 | return baidu2(filepath,samplerate,language) 30 | 31 | def baidu2(filestream,samplerate,language): 32 | global APP_ID,API_KEY,SECRET_KEY 33 | aipSpeech = AipSpeech(APP_ID, API_KEY, SECRET_KEY) 34 | 35 | response=aipSpeech.asr(filestream, 'wav', samplerate, { 36 | 'lan': language, 37 | }) 38 | return response -------------------------------------------------------------------------------- /src/temp/ted80001/ted80001.srt: -------------------------------------------------------------------------------- 1 | 1 2 | 00:00:01.739-->00:00:09.668 3 | subtitle(autocreated) wj3235@126.com) => the,the, 4 | 5 | 2 6 | 00:00:09.668-->00:00:17.786 7 | subtitle(autocreated) wj3235@126.com) => i am in search of another planet in the universe where life exists, 8 | 9 | 3 10 | 00:00:17.786-->00:00:25.325 11 | subtitle(autocreated) wj3235@126.com) => ican'tsee this planet with my naked eyes,or even with the most powerful telescopes we currently possess, 12 | 13 | 4 14 | 00:00:25.325-->00:00:27.406 15 | subtitle(autocreated) wj3235@126.com) => but i know thatit'sthere, 16 | 17 | 5 18 | 00:00:27.406-->00:00:32.676 19 | subtitle(autocreated) wj3235@126.com) => and understanding contradictions that occur in nature will help find, 20 | 21 | 6 22 | 00:00:32.676-->00:00:36.456 23 | subtitle(autocreated) wj3235@126.com) => on our planet where there is water there is life, 24 | 25 | 7 26 | 00:00:36.456-->00:00:41.564 27 | subtitle(autocreated) wj3235@126.com) => so we look for planets that orbit at just the right distance from their stars, 28 | 29 | 8 30 | 00:00:41.564-->00:00:54.209 31 | subtitle(autocreated) wj3235@126.com) => at this distance shown in blue on this diagram for stars of different temperatures planets could be warm enough for water to flow on their surfaces has lakes and oceans where life might reside, 32 | 33 | 9 34 | 00:00:54.209-->00:01:00.609 35 | subtitle(autocreated) wj3235@126.com) => some astronomers focus their time and energy on finding planets that these distances from their stars, 36 | 37 | 10 38 | 00:01:00.609-->00:01:03.490 39 | subtitle(autocreated) wj3235@126.com) => what i do takes up where their job ends, 40 | 41 | 11 42 | 00:01:03.490-->00:01:06.921 43 | subtitle(autocreated) wj3235@126.com) => i model the possible climates affect the planet, 44 | 45 | 12 46 | 00:01:06.921-->00:01:15.641 47 | subtitle(autocreated) wj3235@126.com) => andhere'swhythat'simportant there are many factors besides distance from its star that control whether a planet can support life, 48 | 49 | 13 50 | 00:01:15.641-->00:01:17.987 51 | subtitle(autocreated) wj3235@126.com) => take the planet venus, 52 | 53 | 14 54 | 00:01:17.987-->00:01:26.071 55 | subtitle(autocreated) wj3235@126.com) => IT'snamed after the roman goddess of love and beauty because of its benign if serial appearance in the sky, 56 | 57 | 15 58 | 00:01:26.071-->00:01:29.681 59 | subtitle(autocreated) wj3235@126.com) => but spacecraft measurements revealed a different story, 60 | 61 | 16 62 | 00:01:29.681-->00:01:35.580 63 | subtitle(autocreated) wj3235@126.com) => the surface temperature is close to nine hundred degrees fahrenheit five hundred celsius, 64 | 65 | 17 66 | 00:01:35.580-->00:01:38.658 67 | subtitle(autocreated) wj3235@126.com) => that'shot enough to melt lead, 68 | 69 | 18 70 | 00:01:38.658-->00:01:49.731 71 | subtitle(autocreated) wj3235@126.com) => its thick atmosphere not its distance from the sun is the reason causes the greenhouse effect on steroids trapping heat from the sun is scorching the planet surface, 72 | 73 | 19 74 | 00:01:49.731-->00:01:55.124 75 | subtitle(autocreated) wj3235@126.com) => the reality totally contradicted initial perceptions on this planet, 76 | 77 | 20 78 | 00:01:55.124-->00:02:04.303 79 | subtitle(autocreated) wj3235@126.com) => from these lessons from our own solar systemwe'velearned that aplanet'satmosphere is crucial to its climate and potential to host life, 80 | 81 | 21 82 | 00:02:04.303-->00:02:14.090 83 | subtitle(autocreated) wj3235@126.com) => wedon'tknow what the atmospheres of these planets are like because the planets are so small in game compared to their stars,and so far away from us, 84 | 85 | 22 86 | 00:02:14.090-->00:02:28.739 87 | subtitle(autocreated) wj3235@126.com) => for example one of the closest planet that could support surface water is called least six six seven cc touch a glamorous name write, my phone number for a nameit'stwenty three light years away, 88 | 89 | 23 90 | 00:02:28.739-->00:02:32.122 91 | subtitle(autocreated) wj3235@126.com) => sothat'smore than one hundred trillion miles, 92 | 93 | 24 94 | 00:02:32.122-->00:02:39.088 95 | subtitle(autocreated) wj3235@126.com) => trying to measure the atmospheric composition of expo planet passing in front of its host star is hard, 96 | 97 | 25 98 | 00:02:39.088-->00:02:43.601 99 | subtitle(autocreated) wj3235@126.com) => IT'slike trying to see a fruit fly passing in front of cars headlight, 100 | 101 | 26 102 | 00:02:43.601-->00:02:51.870 103 | subtitle(autocreated) wj3235@126.com) => they now imagine that cause one hundred trillion miles away and you want to know the precise color of the fly, 104 | 105 | 27 106 | 00:02:51.870-->00:03:00.597 107 | subtitle(autocreated) wj3235@126.com) => so i use computer models to calculate the kind of atmosphere a planet would need you have suitable climate for water and life, 108 | 109 | 28 110 | 00:03:00.597-->00:03:06.964 111 | subtitle(autocreated) wj3235@126.com) => here'sanartist'sconcept of the planet kepler sixty to ask with your for reference, 112 | 113 | 29 114 | 00:03:06.964-->00:03:11.925 115 | subtitle(autocreated) wj3235@126.com) => IT'stwelve hundred light years away and just forty percent larger than earth, 116 | 117 | 30 118 | 00:03:11.925-->00:03:25.692 119 | subtitle(autocreated) wj3235@126.com) => or in excess funded work found that IT could be warm enough for open water for many types of atmospheres and orientations of its orbit to live like feature telescope to follow up on this planet to look for signs of life, 120 | 121 | 31 122 | 00:03:25.692-->00:03:35.357 123 | subtitle(autocreated) wj3235@126.com) => i am on a planet surface is also important for climate, absorb longer redder wavelengths of light and reflects shorter bluer light, 124 | 125 | 32 126 | 00:03:35.357-->00:03:45.607 127 | subtitle(autocreated) wj3235@126.com) => that'swhy the iceberg in the photo looks so blue the redder light from the sun is absorbed on its way through the ice only the blue light makes IT all the way to the bottom, 128 | 129 | 33 130 | 00:03:45.607-->00:03:50.111 131 | subtitle(autocreated) wj3235@126.com) => then a gets reflected back up to our eyes when we see blue wife, 132 | 133 | 34 134 | 00:03:50.111-->00:04:03.836 135 | subtitle(autocreated) wj3235@126.com) => my model show that planets orbiting cooler stars could actually be warmer than planets orbiting hotter stars she does another contradiction that ice absorbs the longer wavelength light from cooler stars that light that energy, 136 | 137 | 35 138 | 00:04:03.836-->00:04:05.789 139 | subtitle(autocreated) wj3235@126.com) => heats the ice, 140 | 141 | 36 142 | 00:04:05.789-->00:04:15.437 143 | subtitle(autocreated) wj3235@126.com) => using climate models,to explore how these contradictions can affect planetary climate is vital to the search for life elsewhere, 144 | 145 | 37 146 | 00:04:15.437-->00:04:17.644 147 | subtitle(autocreated) wj3235@126.com) => andit'sno surprise that this, 148 | 149 | 38 150 | 00:04:17.644-->00:04:19.380 151 | subtitle(autocreated) wj3235@126.com) => my specialty, 152 | 153 | 39 154 | 00:04:19.380-->00:04:33.401 155 | subtitle(autocreated) wj3235@126.com) => i'mafrican american female astronomer,and a classically trained actor who love to wear make up and read fashion magazines so i am uniquely positioned to appreciate contradictions in nature, 156 | 157 | 40 158 | 00:04:33.401-->00:04:41.939 159 | subtitle(autocreated) wj3235@126.com) => the,and how they can inform our search for the next planet where life exists, 160 | 161 | 41 162 | 00:04:41.939-->00:04:48.399 163 | subtitle(autocreated) wj3235@126.com) => my organization rising star girls teaches astronomy to middle school girls of color, 164 | 165 | 42 166 | 00:04:48.399-->00:04:52.272 167 | subtitle(autocreated) wj3235@126.com) => using theatre writing and visual arts, 168 | 169 | 43 170 | 00:04:52.272-->00:05:03.278 171 | subtitle(autocreated) wj3235@126.com) => that'sanother contradiction science and artdon'toften go together but interweaving them can help these girls bring their whole selves to what a large and maybe one day, 172 | 173 | 44 174 | 00:05:03.278-->00:05:13.415 175 | subtitle(autocreated) wj3235@126.com) => join the ranks of astronomers were full of contradictions can use a background to discover once and for all that we are truly not alone in the universe, 176 | 177 | 45 178 | 00:05:13.415-->00:05:26.992 179 | subtitle(autocreated) wj3235@126.com) => thank you,the, 180 | 181 | -------------------------------------------------------------------------------- /src/translate.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Sun Aug 06 05:26:49 2017 4 | @author: alex 5 | @contact wj3235@126.com 6 | """ 7 | 8 | def get_name(pinyin): 9 | try: 10 | youdao = Youdao(pinyin) 11 | jsonresult=youdao.executor() 12 | return jsonresult['translation'][0] 13 | except Exception as e: 14 | return ' ' 15 | 16 | if __name__ =='__main__': 17 | pass -------------------------------------------------------------------------------- /src/utility.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Sun Aug 06 05:26:49 2017 4 | @author: alex 5 | @contact wj3235@126.com 6 | """ 7 | 8 | def write_txt_to_file(path,txt): 9 | with open(path,'ab+') as f: 10 | f.write(txt) 11 | 12 | def seconds_to_timestamp_str(seconds): 13 | m, s = divmod(seconds, 60) 14 | h, m = divmod(m, 60) 15 | 16 | timestamp= ("%02d:%02d:%06.3f" % (h, m, s)) 17 | return timestamp 18 | 19 | def seconds_to_timestamp_ass(seconds): 20 | m, s = divmod(seconds, 60) 21 | h, m = divmod(m, 60) 22 | 23 | timestamp= ("%01d:%02d:%05.2f" % (h, m, s)) 24 | return timestamp 25 | #print seconds_to_timestamp_ass(1.737625) --------------------------------------------------------------------------------