├── README.md ├── AWS_transcribe.ipynb ├── AWS_transcribe_with_Speaker_diarization.ipynb ├── Google_Longaudio_API_without_speaker_diarization.ipynb └── Google_Longaudio_API_speaker_diarization.ipynb /README.md: -------------------------------------------------------------------------------- 1 | # Speech-to-text 2 | 3 | Convert speech to text using different API. Works well for long audio files (Asynchronous). 4 | -------------------------------------------------------------------------------- /AWS_transcribe.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": { 7 | "collapsed": true 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "filepath = \"~/audio_wav/\"\n", 12 | "output_filepath = \"~/Transcripts/\"" 13 | ] 14 | }, 15 | { 16 | "cell_type": "code", 17 | "execution_count": 2, 18 | "metadata": { 19 | "collapsed": true 20 | }, 21 | "outputs": [], 22 | "source": [ 23 | "from __future__ import print_function\n", 24 | "import time\n", 25 | "import boto3\n", 26 | "import json\n", 27 | "import os\n", 28 | "import botocore\n", 29 | "\n", 30 | "bucketName = \"audiofiles\"" 31 | ] 32 | }, 33 | { 34 | "cell_type": "code", 35 | "execution_count": 3, 36 | "metadata": { 37 | "collapsed": true 38 | }, 39 | "outputs": [], 40 | "source": [ 41 | "def upload_file_to_s3(audio_file_name):\n", 42 | " \n", 43 | " Key = filepath + audio_file_name\n", 44 | " outPutname = audio_file_name\n", 45 | "\n", 46 | " s3 = boto3.client('s3')\n", 47 | " s3.upload_file(Key,bucketName,outPutname)" 48 | ] 49 | }, 50 | { 51 | "cell_type": "code", 52 | "execution_count": 4, 53 | "metadata": { 54 | "collapsed": true 55 | }, 56 | "outputs": [], 57 | "source": [ 58 | "def download_file_from_s3(audio_file_name):\n", 59 | " \n", 60 | " s3 = boto3.resource('s3')\n", 61 | " \n", 62 | " Key = outPutname = audio_file_name.split('.')[0] + '.json'\n", 63 | " \n", 64 | " try:\n", 65 | " s3.Bucket(bucketName).download_file(Key, outPutname)\n", 66 | " except botocore.exceptions.ClientError as e:\n", 67 | " if e.response['Error']['Code'] == \"404\":\n", 68 | " print(\"The object does not exist.\")\n", 69 | " else:\n", 70 | " raise" 71 | ] 72 | }, 73 | { 74 | "cell_type": "code", 75 | "execution_count": 5, 76 | "metadata": { 77 | "collapsed": true 78 | }, 79 | "outputs": [], 80 | "source": [ 81 | "def delete_file_from_s3(audio_file_name):\n", 82 | " \n", 83 | " s3 = boto3.resource('s3')\n", 84 | " s3.Object(bucketName, audio_file_name).delete()\n", 85 | " s3.Object(bucketName, audio_file_name.split('.')[0] + '.json').delete()" 86 | ] 87 | }, 88 | { 89 | "cell_type": "code", 90 | "execution_count": 6, 91 | "metadata": { 92 | "collapsed": true 93 | }, 94 | "outputs": [], 95 | "source": [ 96 | "def transcribe(audio_file_name):\n", 97 | " \n", 98 | " transcripts = ''\n", 99 | " \n", 100 | " upload_file_to_s3(audio_file_name)\n", 101 | " \n", 102 | " transcribe = boto3.client('transcribe', region_name='us-east-2')\n", 103 | " job_name = audio_file_name.split('.')[0]\n", 104 | " job_uri = \"https://s3.us-east-2.amazonaws.com/\" + bucketName + \"/\" + audio_file_name\n", 105 | " transcribe.start_transcription_job(\n", 106 | " TranscriptionJobName=job_name,\n", 107 | " Media={'MediaFileUri': job_uri},\n", 108 | " MediaFormat='wav',\n", 109 | " LanguageCode='en-US',\n", 110 | " OutputBucketName=bucketName\n", 111 | " )\n", 112 | " while True:\n", 113 | " status = transcribe.get_transcription_job(TranscriptionJobName=job_name)\n", 114 | " if status['TranscriptionJob']['TranscriptionJobStatus'] in ['COMPLETED', 'FAILED']:\n", 115 | " break\n", 116 | " time.sleep(5)\n", 117 | " \n", 118 | " download_file_from_s3(audio_file_name)\n", 119 | " \n", 120 | " transcribe.delete_transcription_job(TranscriptionJobName=job_name)\n", 121 | " \n", 122 | " delete_file_from_s3(audio_file_name)\n", 123 | " \n", 124 | " with open(audio_file_name.split('.')[0] + '.json') as f:\n", 125 | " text = json.load(f)\n", 126 | " \n", 127 | " for i in text['results']['transcripts']:\n", 128 | " transcripts += i['transcript']\n", 129 | " \n", 130 | " os.remove(audio_file_name.split('.')[0] + '.json')\n", 131 | " \n", 132 | " return transcripts" 133 | ] 134 | }, 135 | { 136 | "cell_type": "code", 137 | "execution_count": 7, 138 | "metadata": { 139 | "collapsed": true 140 | }, 141 | "outputs": [], 142 | "source": [ 143 | "def write_transcripts(transcript_filename,transcript):\n", 144 | " f= open(output_filepath + transcript_filename,\"w+\")\n", 145 | " f.write(transcript)\n", 146 | " f.close() " 147 | ] 148 | }, 149 | { 150 | "cell_type": "code", 151 | "execution_count": null, 152 | "metadata": { 153 | "collapsed": true 154 | }, 155 | "outputs": [], 156 | "source": [ 157 | "if __name__ == \"__main__\":\n", 158 | " files = [f for f in os.listdir(filepath) if f.endswith(\".wav\")]\n", 159 | " for audio_file_name in files:\n", 160 | " exists = os.path.isfile(output_filepath + audio_file_name.split('.')[0] + '.txt')\n", 161 | " if exists:\n", 162 | " pass\n", 163 | " else:\n", 164 | " print(audio_file_name)\n", 165 | " transcript = transcribe(audio_file_name)\n", 166 | " transcript_filename = audio_file_name.split('.')[0] + '.txt'\n", 167 | " write_transcripts(transcript_filename,transcript)" 168 | ] 169 | }, 170 | { 171 | "cell_type": "code", 172 | "execution_count": null, 173 | "metadata": { 174 | "collapsed": true 175 | }, 176 | "outputs": [], 177 | "source": [] 178 | } 179 | ], 180 | "metadata": { 181 | "kernelspec": { 182 | "display_name": "Python 3", 183 | "language": "python", 184 | "name": "python3" 185 | }, 186 | "language_info": { 187 | "codemirror_mode": { 188 | "name": "ipython", 189 | "version": 3 190 | }, 191 | "file_extension": ".py", 192 | "mimetype": "text/x-python", 193 | "name": "python", 194 | "nbconvert_exporter": "python", 195 | "pygments_lexer": "ipython3", 196 | "version": "3.5.2" 197 | } 198 | }, 199 | "nbformat": 4, 200 | "nbformat_minor": 2 201 | } 202 | -------------------------------------------------------------------------------- /AWS_transcribe_with_Speaker_diarization.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 6, 6 | "metadata": { 7 | "collapsed": true 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "filepath = \"~/audio_wav/\"\n", 12 | "output_filepath = \"~/Transcripts/\"" 13 | ] 14 | }, 15 | { 16 | "cell_type": "code", 17 | "execution_count": 7, 18 | "metadata": { 19 | "collapsed": true 20 | }, 21 | "outputs": [], 22 | "source": [ 23 | "from __future__ import print_function\n", 24 | "import time\n", 25 | "import boto3\n", 26 | "import json\n", 27 | "import os\n", 28 | "import botocore\n", 29 | "\n", 30 | "bucketName = \"audiofiles\"" 31 | ] 32 | }, 33 | { 34 | "cell_type": "code", 35 | "execution_count": 8, 36 | "metadata": { 37 | "collapsed": true 38 | }, 39 | "outputs": [], 40 | "source": [ 41 | "def upload_file_to_s3(audio_file_name):\n", 42 | " \n", 43 | " Key = filepath + audio_file_name\n", 44 | " outPutname = audio_file_name\n", 45 | "\n", 46 | " s3 = boto3.client('s3')\n", 47 | " s3.upload_file(Key,bucketName,outPutname)" 48 | ] 49 | }, 50 | { 51 | "cell_type": "code", 52 | "execution_count": 9, 53 | "metadata": { 54 | "collapsed": true 55 | }, 56 | "outputs": [], 57 | "source": [ 58 | "def download_file_from_s3(audio_file_name):\n", 59 | " \n", 60 | " s3 = boto3.resource('s3')\n", 61 | " \n", 62 | " Key = outPutname = audio_file_name.split('.')[0] + '.json'\n", 63 | " \n", 64 | " try:\n", 65 | " s3.Bucket(bucketName).download_file(Key, outPutname)\n", 66 | " except botocore.exceptions.ClientError as e:\n", 67 | " if e.response['Error']['Code'] == \"404\":\n", 68 | " print(\"The object does not exist.\")\n", 69 | " else:\n", 70 | " raise" 71 | ] 72 | }, 73 | { 74 | "cell_type": "code", 75 | "execution_count": 10, 76 | "metadata": { 77 | "collapsed": true 78 | }, 79 | "outputs": [], 80 | "source": [ 81 | "def delete_file_from_s3(audio_file_name):\n", 82 | " \n", 83 | " s3 = boto3.resource('s3')\n", 84 | " s3.Object(bucketName, audio_file_name).delete()\n", 85 | " s3.Object(bucketName, audio_file_name.split('.')[0] + '.json').delete()" 86 | ] 87 | }, 88 | { 89 | "cell_type": "code", 90 | "execution_count": 15, 91 | "metadata": { 92 | "collapsed": true 93 | }, 94 | "outputs": [], 95 | "source": [ 96 | "def transcribe(audio_file_name):\n", 97 | " \n", 98 | " transcripts = ''\n", 99 | " \n", 100 | " upload_file_to_s3(audio_file_name)\n", 101 | " \n", 102 | " transcribe = boto3.client('transcribe', region_name='us-east-2')\n", 103 | " job_name = audio_file_name.split('.')[0]\n", 104 | " job_uri = \"https://s3.us-east-2.amazonaws.com/\" + bucketName + \"/\" + audio_file_name\n", 105 | " transcribe.start_transcription_job(\n", 106 | " TranscriptionJobName=job_name,\n", 107 | " Media={'MediaFileUri': job_uri},\n", 108 | " MediaFormat='wav',\n", 109 | " LanguageCode='en-US',\n", 110 | " Settings={'MaxSpeakerLabels':2,'ShowSpeakerLabels':True},\n", 111 | " OutputBucketName=bucketName\n", 112 | " )\n", 113 | " while True:\n", 114 | " status = transcribe.get_transcription_job(TranscriptionJobName=job_name)\n", 115 | " if status['TranscriptionJob']['TranscriptionJobStatus'] in ['COMPLETED', 'FAILED']:\n", 116 | " break\n", 117 | " time.sleep(5)\n", 118 | " \n", 119 | " download_file_from_s3(audio_file_name)\n", 120 | " \n", 121 | " transcribe.delete_transcription_job(TranscriptionJobName=job_name)\n", 122 | " \n", 123 | " delete_file_from_s3(audio_file_name)\n", 124 | " \n", 125 | " with open(audio_file_name.split('.')[0] + '.json') as f:\n", 126 | " text = json.load(f)\n", 127 | " \n", 128 | " for i in text['results']['transcripts']:\n", 129 | " transcripts += i['transcript']\n", 130 | " \n", 131 | " #os.remove(audio_file_name.split('.')[0] + '.json')\n", 132 | " \n", 133 | " return transcripts" 134 | ] 135 | }, 136 | { 137 | "cell_type": "code", 138 | "execution_count": 16, 139 | "metadata": { 140 | "collapsed": true 141 | }, 142 | "outputs": [], 143 | "source": [ 144 | "def write_transcripts(transcript_filename,transcript):\n", 145 | " f= open(output_filepath + transcript_filename,\"w+\")\n", 146 | " f.write(transcript)\n", 147 | " f.close() " 148 | ] 149 | }, 150 | { 151 | "cell_type": "code", 152 | "execution_count": 17, 153 | "metadata": {}, 154 | "outputs": [ 155 | { 156 | "name": "stdout", 157 | "output_type": "stream", 158 | "text": [ 159 | "path.wav\n", 160 | "Test.wav\n" 161 | ] 162 | } 163 | ], 164 | "source": [ 165 | "if __name__ == \"__main__\":\n", 166 | " files = [f for f in os.listdir(filepath) if f.endswith(\".wav\")]\n", 167 | " for audio_file_name in files:\n", 168 | " exists = os.path.isfile(output_filepath + audio_file_name.split('.')[0] + '.txt')\n", 169 | " if exists:\n", 170 | " pass\n", 171 | " else:\n", 172 | " print(audio_file_name)\n", 173 | " transcript = transcribe(audio_file_name)\n", 174 | " transcript_filename = audio_file_name.split('.')[0] + '.txt'\n", 175 | " write_transcripts(transcript_filename,transcript)" 176 | ] 177 | }, 178 | { 179 | "cell_type": "code", 180 | "execution_count": null, 181 | "metadata": { 182 | "collapsed": true 183 | }, 184 | "outputs": [], 185 | "source": [] 186 | } 187 | ], 188 | "metadata": { 189 | "kernelspec": { 190 | "display_name": "Python 3", 191 | "language": "python", 192 | "name": "python3" 193 | }, 194 | "language_info": { 195 | "codemirror_mode": { 196 | "name": "ipython", 197 | "version": 3 198 | }, 199 | "file_extension": ".py", 200 | "mimetype": "text/x-python", 201 | "name": "python", 202 | "nbconvert_exporter": "python", 203 | "pygments_lexer": "ipython3", 204 | "version": "3.5.2" 205 | } 206 | }, 207 | "nbformat": 4, 208 | "nbformat_minor": 2 209 | } 210 | -------------------------------------------------------------------------------- /Google_Longaudio_API_without_speaker_diarization.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": { 7 | "collapsed": true 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "filepath = \"~/audio_wav/\"\n", 12 | "output_filepath = \"~/Transcripts/\"" 13 | ] 14 | }, 15 | { 16 | "cell_type": "code", 17 | "execution_count": 2, 18 | "metadata": { 19 | "collapsed": true 20 | }, 21 | "outputs": [], 22 | "source": [ 23 | "from pydub import AudioSegment\n", 24 | "import io\n", 25 | "import os\n", 26 | "from google.cloud import speech\n", 27 | "from google.cloud.speech import enums\n", 28 | "from google.cloud.speech import types\n", 29 | "import wave\n", 30 | "from google.cloud import storage" 31 | ] 32 | }, 33 | { 34 | "cell_type": "code", 35 | "execution_count": 3, 36 | "metadata": { 37 | "collapsed": true 38 | }, 39 | "outputs": [], 40 | "source": [ 41 | "def mp3_to_wav(audio_file_name):\n", 42 | " if audio_file_name.split('.')[1] == 'mp3': \n", 43 | " sound = AudioSegment.from_mp3(audio_file_name)\n", 44 | " audio_file_name = audio_file_name.split('.')[0] + '.wav'\n", 45 | " sound.export(audio_file_name, format=\"wav\")" 46 | ] 47 | }, 48 | { 49 | "cell_type": "code", 50 | "execution_count": 4, 51 | "metadata": { 52 | "collapsed": true 53 | }, 54 | "outputs": [], 55 | "source": [ 56 | "def frame_rate_channel(audio_file_name):\n", 57 | " with wave.open(audio_file_name, \"rb\") as wave_file:\n", 58 | " frame_rate = wave_file.getframerate()\n", 59 | " channels = wave_file.getnchannels()\n", 60 | " return frame_rate,channels" 61 | ] 62 | }, 63 | { 64 | "cell_type": "code", 65 | "execution_count": 5, 66 | "metadata": { 67 | "collapsed": true 68 | }, 69 | "outputs": [], 70 | "source": [ 71 | "def stereo_to_mono(audio_file_name):\n", 72 | " sound = AudioSegment.from_wav(audio_file_name)\n", 73 | " sound = sound.set_channels(1)\n", 74 | " sound.export(audio_file_name, format=\"wav\")" 75 | ] 76 | }, 77 | { 78 | "cell_type": "code", 79 | "execution_count": 6, 80 | "metadata": { 81 | "collapsed": true 82 | }, 83 | "outputs": [], 84 | "source": [ 85 | "def upload_blob(bucket_name, source_file_name, destination_blob_name):\n", 86 | " \"\"\"Uploads a file to the bucket.\"\"\"\n", 87 | " storage_client = storage.Client()\n", 88 | " bucket = storage_client.get_bucket(bucket_name)\n", 89 | " blob = bucket.blob(destination_blob_name)\n", 90 | "\n", 91 | " blob.upload_from_filename(source_file_name)" 92 | ] 93 | }, 94 | { 95 | "cell_type": "code", 96 | "execution_count": 7, 97 | "metadata": { 98 | "collapsed": true 99 | }, 100 | "outputs": [], 101 | "source": [ 102 | "def delete_blob(bucket_name, blob_name):\n", 103 | " \"\"\"Deletes a blob from the bucket.\"\"\"\n", 104 | " storage_client = storage.Client()\n", 105 | " bucket = storage_client.get_bucket(bucket_name)\n", 106 | " blob = bucket.blob(blob_name)\n", 107 | "\n", 108 | " blob.delete()" 109 | ] 110 | }, 111 | { 112 | "cell_type": "code", 113 | "execution_count": 8, 114 | "metadata": { 115 | "collapsed": true 116 | }, 117 | "outputs": [], 118 | "source": [ 119 | "def google_transcribe(audio_file_name):\n", 120 | " \n", 121 | " file_name = filepath + audio_file_name\n", 122 | " mp3_to_wav(file_name)\n", 123 | "\n", 124 | " # The name of the audio file to transcribe\n", 125 | " \n", 126 | " frame_rate, channels = frame_rate_channel(file_name)\n", 127 | " \n", 128 | " if channels > 1:\n", 129 | " stereo_to_mono(file_name)\n", 130 | " \n", 131 | " bucket_name = 'callsaudiofiles'\n", 132 | " source_file_name = filepath + audio_file_name\n", 133 | " destination_blob_name = audio_file_name\n", 134 | " \n", 135 | " upload_blob(bucket_name, source_file_name, destination_blob_name)\n", 136 | " \n", 137 | " gcs_uri = 'gs://callsaudiofiles/' + audio_file_name\n", 138 | " transcript = ''\n", 139 | " \n", 140 | " client = speech.SpeechClient()\n", 141 | " audio = types.RecognitionAudio(uri=gcs_uri)\n", 142 | "\n", 143 | " config = types.RecognitionConfig(\n", 144 | " encoding=enums.RecognitionConfig.AudioEncoding.LINEAR16,\n", 145 | " sample_rate_hertz=frame_rate,\n", 146 | " language_code='en-US')\n", 147 | "\n", 148 | " # Detects speech in the audio file\n", 149 | " operation = client.long_running_recognize(config, audio)\n", 150 | " response = operation.result(timeout=10000)\n", 151 | "\n", 152 | " for result in response.results:\n", 153 | " transcript += result.alternatives[0].transcript\n", 154 | " \n", 155 | " delete_blob(bucket_name, destination_blob_name)\n", 156 | " return transcript" 157 | ] 158 | }, 159 | { 160 | "cell_type": "code", 161 | "execution_count": 9, 162 | "metadata": { 163 | "collapsed": true 164 | }, 165 | "outputs": [], 166 | "source": [ 167 | "def write_transcripts(transcript_filename,transcript):\n", 168 | " f= open(output_filepath + transcript_filename,\"w+\")\n", 169 | " f.write(transcript)\n", 170 | " f.close() " 171 | ] 172 | }, 173 | { 174 | "cell_type": "code", 175 | "execution_count": 10, 176 | "metadata": { 177 | "collapsed": true 178 | }, 179 | "outputs": [], 180 | "source": [ 181 | "if __name__ == \"__main__\":\n", 182 | " for audio_file_name in os.listdir(filepath):\n", 183 | " transcript = google_transcribe(audio_file_name)\n", 184 | " transcript_filename = audio_file_name.split('.')[0] + '.txt'\n", 185 | " write_transcripts(transcript_filename,transcript)" 186 | ] 187 | }, 188 | { 189 | "cell_type": "code", 190 | "execution_count": null, 191 | "metadata": { 192 | "collapsed": true 193 | }, 194 | "outputs": [], 195 | "source": [] 196 | } 197 | ], 198 | "metadata": { 199 | "kernelspec": { 200 | "display_name": "Python 3", 201 | "language": "python", 202 | "name": "python3" 203 | }, 204 | "language_info": { 205 | "codemirror_mode": { 206 | "name": "ipython", 207 | "version": 3 208 | }, 209 | "file_extension": ".py", 210 | "mimetype": "text/x-python", 211 | "name": "python", 212 | "nbconvert_exporter": "python", 213 | "pygments_lexer": "ipython3", 214 | "version": "3.5.2" 215 | } 216 | }, 217 | "nbformat": 4, 218 | "nbformat_minor": 2 219 | } 220 | -------------------------------------------------------------------------------- /Google_Longaudio_API_speaker_diarization.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": { 7 | "collapsed": true 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "filepath = \"~/audio_wav/\"\n", 12 | "output_filepath = \"~/Transcripts/\"" 13 | ] 14 | }, 15 | { 16 | "cell_type": "code", 17 | "execution_count": 2, 18 | "metadata": { 19 | "collapsed": true 20 | }, 21 | "outputs": [], 22 | "source": [ 23 | "from pydub import AudioSegment\n", 24 | "import io\n", 25 | "import os\n", 26 | "from google.cloud import speech_v1p1beta1 as speech\n", 27 | "from google.cloud.speech_v1p1beta1 import enums\n", 28 | "from google.cloud.speech_v1p1beta1 import types\n", 29 | "import wave\n", 30 | "from google.cloud import storage" 31 | ] 32 | }, 33 | { 34 | "cell_type": "code", 35 | "execution_count": 3, 36 | "metadata": { 37 | "collapsed": true 38 | }, 39 | "outputs": [], 40 | "source": [ 41 | "def mp3_to_wav(audio_file_name):\n", 42 | " if audio_file_name.split('.')[1] == 'mp3': \n", 43 | " sound = AudioSegment.from_mp3(audio_file_name)\n", 44 | " audio_file_name = audio_file_name.split('.')[0] + '.wav'\n", 45 | " sound.export(audio_file_name, format=\"wav\")" 46 | ] 47 | }, 48 | { 49 | "cell_type": "code", 50 | "execution_count": 4, 51 | "metadata": { 52 | "collapsed": true 53 | }, 54 | "outputs": [], 55 | "source": [ 56 | "def frame_rate_channel(audio_file_name):\n", 57 | " with wave.open(audio_file_name, \"rb\") as wave_file:\n", 58 | " frame_rate = wave_file.getframerate()\n", 59 | " channels = wave_file.getnchannels()\n", 60 | " return frame_rate,channels" 61 | ] 62 | }, 63 | { 64 | "cell_type": "code", 65 | "execution_count": 5, 66 | "metadata": { 67 | "collapsed": true 68 | }, 69 | "outputs": [], 70 | "source": [ 71 | "def stereo_to_mono(audio_file_name):\n", 72 | " sound = AudioSegment.from_wav(audio_file_name)\n", 73 | " sound = sound.set_channels(1)\n", 74 | " sound.export(audio_file_name, format=\"wav\")" 75 | ] 76 | }, 77 | { 78 | "cell_type": "code", 79 | "execution_count": 6, 80 | "metadata": { 81 | "collapsed": true 82 | }, 83 | "outputs": [], 84 | "source": [ 85 | "def upload_blob(bucket_name, source_file_name, destination_blob_name):\n", 86 | " \"\"\"Uploads a file to the bucket.\"\"\"\n", 87 | " storage_client = storage.Client()\n", 88 | " bucket = storage_client.get_bucket(bucket_name)\n", 89 | " blob = bucket.blob(destination_blob_name)\n", 90 | "\n", 91 | " blob.upload_from_filename(source_file_name)" 92 | ] 93 | }, 94 | { 95 | "cell_type": "code", 96 | "execution_count": 7, 97 | "metadata": { 98 | "collapsed": true 99 | }, 100 | "outputs": [], 101 | "source": [ 102 | "def delete_blob(bucket_name, blob_name):\n", 103 | " \"\"\"Deletes a blob from the bucket.\"\"\"\n", 104 | " storage_client = storage.Client()\n", 105 | " bucket = storage_client.get_bucket(bucket_name)\n", 106 | " blob = bucket.blob(blob_name)\n", 107 | "\n", 108 | " blob.delete()" 109 | ] 110 | }, 111 | { 112 | "cell_type": "code", 113 | "execution_count": 8, 114 | "metadata": { 115 | "collapsed": true 116 | }, 117 | "outputs": [], 118 | "source": [ 119 | "def google_transcribe(audio_file_name):\n", 120 | " \n", 121 | " file_name = filepath + audio_file_name\n", 122 | " mp3_to_wav(file_name)\n", 123 | "\n", 124 | " # The name of the audio file to transcribe\n", 125 | " \n", 126 | " frame_rate, channels = frame_rate_channel(file_name)\n", 127 | " \n", 128 | " if channels > 1:\n", 129 | " stereo_to_mono(file_name)\n", 130 | " \n", 131 | " bucket_name = 'callsaudiofiles'\n", 132 | " source_file_name = filepath + audio_file_name\n", 133 | " destination_blob_name = audio_file_name\n", 134 | " \n", 135 | " upload_blob(bucket_name, source_file_name, destination_blob_name)\n", 136 | " \n", 137 | " gcs_uri = 'gs://callsaudiofiles/' + audio_file_name\n", 138 | " transcript = ''\n", 139 | " \n", 140 | " client = speech.SpeechClient()\n", 141 | " audio = types.RecognitionAudio(uri=gcs_uri)\n", 142 | "\n", 143 | " config = types.RecognitionConfig(\n", 144 | " encoding=enums.RecognitionConfig.AudioEncoding.LINEAR16,\n", 145 | " sample_rate_hertz=frame_rate,\n", 146 | " language_code='en-US',\n", 147 | " enable_speaker_diarization=True,\n", 148 | " diarization_speaker_count=2)\n", 149 | "\n", 150 | " # Detects speech in the audio file\n", 151 | " operation = client.long_running_recognize(config, audio)\n", 152 | " response = operation.result(timeout=10000)\n", 153 | " result = response.results[-1]\n", 154 | " words_info = result.alternatives[0].words\n", 155 | " \n", 156 | " tag=1\n", 157 | " speaker=\"\"\n", 158 | "\n", 159 | " for word_info in words_info:\n", 160 | " if word_info.speaker_tag==tag:\n", 161 | " speaker=speaker+\" \"+word_info.word\n", 162 | " else:\n", 163 | " transcript += \"speaker {}: {}\".format(tag,speaker) + '\\n'\n", 164 | " tag=word_info.speaker_tag\n", 165 | " speaker=\"\"+word_info.word\n", 166 | "\n", 167 | " transcript += \"speaker {}: {}\".format(tag,speaker)\n", 168 | " \n", 169 | " delete_blob(bucket_name, destination_blob_name)\n", 170 | " return transcript" 171 | ] 172 | }, 173 | { 174 | "cell_type": "code", 175 | "execution_count": 9, 176 | "metadata": { 177 | "collapsed": true 178 | }, 179 | "outputs": [], 180 | "source": [ 181 | "def write_transcripts(transcript_filename,transcript):\n", 182 | " f= open(output_filepath + transcript_filename,\"w+\")\n", 183 | " f.write(transcript)\n", 184 | " f.close() " 185 | ] 186 | }, 187 | { 188 | "cell_type": "code", 189 | "execution_count": 10, 190 | "metadata": { 191 | "collapsed": true 192 | }, 193 | "outputs": [], 194 | "source": [ 195 | "if __name__ == \"__main__\":\n", 196 | " for audio_file_name in os.listdir(filepath):\n", 197 | " exists = os.path.isfile(output_filepath + audio_file_name.split('.')[0] + '.txt')\n", 198 | " if exists:\n", 199 | " pass\n", 200 | " else:\n", 201 | " transcript = google_transcribe(audio_file_name)\n", 202 | " transcript_filename = audio_file_name.split('.')[0] + '.txt'\n", 203 | " write_transcripts(transcript_filename,transcript)" 204 | ] 205 | }, 206 | { 207 | "cell_type": "code", 208 | "execution_count": null, 209 | "metadata": { 210 | "collapsed": true 211 | }, 212 | "outputs": [], 213 | "source": [] 214 | }, 215 | { 216 | "cell_type": "code", 217 | "execution_count": null, 218 | "metadata": { 219 | "collapsed": true 220 | }, 221 | "outputs": [], 222 | "source": [] 223 | } 224 | ], 225 | "metadata": { 226 | "kernelspec": { 227 | "display_name": "Python 3", 228 | "language": "python", 229 | "name": "python3" 230 | }, 231 | "language_info": { 232 | "codemirror_mode": { 233 | "name": "ipython", 234 | "version": 3 235 | }, 236 | "file_extension": ".py", 237 | "mimetype": "text/x-python", 238 | "name": "python", 239 | "nbconvert_exporter": "python", 240 | "pygments_lexer": "ipython3", 241 | "version": "3.5.2" 242 | } 243 | }, 244 | "nbformat": 4, 245 | "nbformat_minor": 2 246 | } 247 | --------------------------------------------------------------------------------