├── .gitignore ├── .gitmodules ├── CONTRIBUTING.md ├── LICENSE ├── README.md ├── audio-intelligence ├── README.md ├── auto_chapters.ipynb ├── content_moderation.ipynb ├── entity_redaction.ipynb ├── key_phrases.ipynb ├── summarization.ipynb └── topic_detection.ipynb ├── core-transcription ├── README.md ├── SDK-Node-batch.md ├── SDK_transcribe_batch_of_files │ ├── audio │ │ ├── audio.mp3 │ │ └── audio_2.mp3 │ ├── batch_transcription.ipynb │ └── transcripts │ │ ├── audio.mp3.txt │ │ └── audio_2.mp3.txt ├── Use_AssemblyAI_with_Pyannote_to_generate_custom_Speaker_Labels.ipynb ├── audio-duration-fix.ipynb ├── automatic-language-detection-route-default-language-js.md ├── automatic-language-detection-route-default-language-python.ipynb ├── automatic-language-detection-route-nano-model.ipynb ├── automatic-language-detection-separate.ipynb ├── automatic-language-detection.ipynb ├── common_errors_and_solutions.md ├── delete_transcript.ipynb ├── detecting-low-confidence-words.md ├── do-more-with-sdk-js.md ├── do-more-with-sdk-python.ipynb ├── gradio-frontend.ipynb ├── how_to_use_the_eu_endpoint.ipynb ├── identify_duplicate_channels.ipynb ├── make.com-speaker-labels.md ├── migration_guides │ ├── aws_to_aai.ipynb │ ├── dg_to_aai.ipynb │ ├── google_to_aai.ipynb │ └── oai_to_aai.ipynb ├── retry-server-error.ipynb ├── retry-upload-error.ipynb ├── schedule_delete.ipynb ├── speaker-diarization-with-async-chunking.ipynb ├── speaker_labelled_subtitles.ipynb ├── speaker_labels.ipynb ├── speaker_timeline.ipynb ├── specify-language.ipynb ├── split_audio_file │ ├── README.md │ ├── audio.mp3 │ ├── output │ │ ├── part_1.mp3 │ │ ├── part_2.mp3 │ │ ├── part_3.mp3 │ │ ├── part_4.mp3 │ │ └── part_5.mp3 │ ├── requirements.txt │ └── split.py ├── subtitle_creation_by_word_count.ipynb ├── subtitles.ipynb ├── talk-listen-ratio.ipynb ├── titanet-speaker-identification.ipynb ├── transcribe.ipynb ├── transcribe_batch_of_files │ ├── README.md │ ├── audio │ │ └── audio.mp3 │ ├── receiver.py │ ├── requirements.txt │ ├── submitter.py │ └── transcripts │ │ └── audio.mp3.txt ├── transcribe_from_s3.ipynb ├── transcribe_youtube_videos.ipynb ├── transcribing-github-files.md ├── transcribing-google-drive-file.md ├── translate_subtitles.ipynb └── translate_transcripts.ipynb ├── guide-images ├── make-create-doc.png ├── make-final-transcript.png ├── make-get-id.png ├── make-get-transcript.png ├── make-insert-paragraph.png ├── make-iterator.png ├── make-run.png ├── make-scenario.png ├── make-transcribe-audio.png ├── make-wait-for-completion.png └── view-raw.png ├── lemur ├── README.md ├── call-sentiment-analysis.ipynb ├── counting-tokens.ipynb ├── custom-topic-tags.ipynb ├── custom-vocab-lemur.ipynb ├── dialogue-data.ipynb ├── input-text-chapters.ipynb ├── input-text-speaker-labels.ipynb ├── lemur-transcript-citations.ipynb ├── meeting-action-items.ipynb ├── past-response-prompts.ipynb ├── phone-call-segmentation.ipynb ├── sales-playbook.ipynb ├── soap-note-generation.ipynb ├── speaker-identification.ipynb ├── specialized-endpoints.ipynb ├── task-endpoint-action-items.ipynb ├── task-endpoint-ai-coach.ipynb ├── task-endpoint-custom-summary.ipynb ├── task-endpoint-structured-QA.ipynb ├── timestamped-transcripts.ipynb ├── transcript-citations.ipynb └── using-lemur.ipynb ├── registry.yaml └── streaming-stt ├── README.md ├── file-transcription-nodejs ├── README.md ├── example.wav ├── package.json └── stream_api.js ├── noise_reduction_streaming.ipynb ├── partial_transcripts.ipynb ├── real-time-best-practices.ipynb ├── real-time.ipynb ├── real_time_lemur.ipynb ├── real_time_translation.ipynb ├── terminate_realtime_programmatically.ipynb └── transcribe_system_audio.ipynb /.gitignore: -------------------------------------------------------------------------------- 1 | .DS_Store 2 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "real-time/realtime-react-example"] 2 | path = real-time/realtime-react-example 3 | url = git@github.com:AssemblyAI-Examples/realtime-react-example.git 4 | [submodule "streaming-stt/twilio-realtime-tutorial"] 5 | path = streaming-stt/twilio-realtime-tutorial 6 | url = https://github.com/AssemblyAI/twilio-realtime-tutorial 7 | [submodule "core-transcription/near-realtime-python-stt-app"] 8 | path = core-transcription/near-realtime-python-stt-app 9 | url = https://github.com/AssemblyAI-Solutions/async-chunk-py 10 | [submodule "core-transcription/near-realtime-js-stt-app"] 11 | path = core-transcription/near-realtime-js-stt-app 12 | url = https://github.com/AssemblyAI-Solutions/async-chunk-js 13 | [submodule "streaming-stt/dual-channel-transcriber"] 14 | path = streaming-stt/dual-channel-transcriber 15 | url = https://github.com/AssemblyAI-Solutions/twilio-dual-channel-transcriber-py 16 | [submodule "streaming-stt/twilio-dual-channel-transcriber"] 17 | path = streaming-stt/twilio-dual-channel-transcriber 18 | url = https://github.com/AssemblyAI-Solutions/twilio-dual-channel-transcriber-py 19 | [submodule "streaming-stt/streaming-api-nextjs"] 20 | path = streaming-stt/streaming-api-nextjs 21 | url = https://github.com/MAsuamah/streaming-api-nextjs 22 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing to the AssemblyAI Cookbook 2 | 3 | ## Introduction: What is the Cookbook for? 4 | 5 | The AssemblyAI Cookbook is a practical resource for developers working with AssemblyAI's API. It offers code examples, guides, and tutorials to help understand and utilize the API more effectively. It's a go-to guide for tackling specific problems or broadening your skills. 6 | 7 | Contributions are essential for keeping the Cookbook relevant and useful. Whether fixing a bug or adding a new example, your input makes a difference. 8 | 9 | ## Getting Started 10 | - Create an [AssemblyAI account and obtain your API key](https://www.assemblyai.com/dashboard/signup) to start building and testing your cookbook. 11 | - Set up your development environment with the necessary tools for Python or JavaScript. 12 | - Use [Google Colabs](https://colab.research.google.com/) for developing and testing Jupyter notebooks. 13 | - Consider using an existing cookbook as a template for your submission. 14 | 15 | ## Contribution Guidelines 16 | **Types of Contributions**: 17 | - Bug fixes 18 | - Cookbook contributions 19 | 20 | ### Making a Bug Fix: 21 | - Create a new branch (`git checkout -b /fix/`). 22 | - Commit your changes (`git commit -am 'fix: '`). 23 | - Test your changes to ensure they fix the issue. 24 | - Push to the branch (`git push origin /fix/`). 25 | - Create a new Pull Request detailing the bug and how your change fixes it. 26 | 27 | ### Contributing a Cookbook: 28 | - Replace your API key with `YOUR_API_KEY` and audio URLs with `YOUR_AUDIO_URL` in your cookbook. 29 | - Create your `.ipynb` file, then upload it to the desired directory in the repo. 30 | - Create a new branch (`/`) and start a new pull request. 31 | 32 | #### Cookbook Guidelines: 33 | - Cookbooks vary in complexity and length, ranging from straightforward ones like [this transcription example](core-transcription/transcribe.ipynb) to more intricate projects such as [Extracting Quotes with Timestamps Using LeMUR + Semantic Search](lemur/transcript-citations.ipynb). 34 | - [Try to use our SDKs.](https://www.assemblyai.com/docs/getting-started/transcribe-an-audio-file#step-1-install-the-sdk) 35 | - Generally, stick to one `.ipynb` file. For longer tutorials requiring multiple files, create a folder with a README. 36 | 37 | ## Review Process 38 | Contributions are reviewed by AssemblyAI team members for quality and relevance. 39 | 40 | ## Contact and Support 41 | For help or questions, contact us at support@assemblyai.com. 42 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2023 AssemblyAI 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | > [!IMPORTANT] 2 | > This cookbook repository will no longer be updated. 3 | > Any new cookbooks, as well as all the guides included in this repo, can be found in the [cookbook section of our documentation](https://www.assemblyai.com/docs/guides). 4 | > 5 | # AssemblyAI Cookbook 🧑‍🍳 6 | 7 | The AssemblyAI Cookbook is a resource of code examples, guides, and tutorials for using the AssemblyAI API. Want to learn more about AssemblyAI? Check out this [product overview video](https://youtu.be/UT1sBCuSJxE)! 8 | 9 | You will need an AssemblyAI account and API key to use these code examples. [Click here](https://www.assemblyai.com/dashboard/signup) to create and account for free. 10 | 11 | Most code examples are written in Python or Javascript, but the concepts contained in these examples can be applied to any language. You can learn more about our various models in features in our [official documentation](https://www.assemblyai.com/docs/). 12 | 13 | ## Cookbook Categories 14 | 15 | ### Speech-To-Text 16 | 17 | 🗂️ Speech-To-Text enables you to transcribe spoken words into written text and is the foundation of all AssemblyAI products. 18 |
19 | Speech-To-Text Cookbooks 🎙️ 20 | 21 | 🆕 [Speech-To-Text: How to Use the EU Endpoint](core-transcription/how_to_use_the_eu_endpoint.ipynb)\ 22 | [Speech-To-Text: Identify Speakers in Audio Recordings](core-transcription/speaker_labels.ipynb)\ 23 | [Speech-To-Text: Specify a Language](core-transcription/specify-language.ipynb)\ 24 | [Speech-To-Text: Transcribe YouTube Videos](core-transcription/transcribe_youtube_videos.ipynb)\ 25 | [Speech-To-Text: Delete transcripts After 24 Hours of Creation](core-transcription/schedule_delete.ipynb)\ 26 | 📖 👀 [Click here to see all Core Transcription cookbooks](core-transcription/README.md)❗ 27 | 28 |
29 |
30 | 31 | ### Audio Intelligence 32 | 33 | 🗂️ Our Audio Intelligence models analyze audio and provide additional insights beyond speech to text. 34 |
35 | Audio Intelligence Cookbooks 🤖 36 | 37 | [Audio Intelligence: Create Summarized Chapters from Podcasts](audio-intelligence/auto_chapters.ipynb)\ 38 | [Audio Intelligence: Identify Hate Speech in Audio and Video Files](audio-intelligence/content_moderation.ipynb)\ 39 | [Audio Intelligence: Identify Highlights in Audio and Video Files](audio-intelligence/key_phrases.ipynb)\ 40 | [Audio Intelligence: Create a Redacted Transcript with Entity Detection](audio-intelligence/entity_redaction.ipynb)\ 41 | [Audio Intelligence: Summarize Virtual Meetings](audio-intelligence/summarization.ipynb)\ 42 | 📖 👀 [Click here to see all Audio Intelligence cookbooks](audio-intelligence/README.md)❗ 43 | 44 |
45 |
46 | 47 | ### Streaming STT 48 | 49 | 🗂️ Transcribe live audio streams with high accuracy and low latency. 50 |
51 | Streaming STT Cookbooks 🕒 52 | 53 | [Streaming: Transcribe Files in Real-Time with Node.js](streaming-stt/file-transcription-nodejs)\ 54 | [Streaming: Use Streaming STT](streaming-stt/real-time.ipynb)\ 55 | [Streaming: Use LeMUR with Streaming STT](streaming-stt/real_time_lemur.ipynb)\ 56 | [Streaming: Use LeMUR for Real-Time Translation](streaming-stt/real_time_translation.ipynb)\ 57 | [Streaming: Use Twilio with Node SDK](https://github.com/AssemblyAI/twilio-realtime-tutorial)\ 58 | 📖 👀 [Click here to see all Streaming cookbooks](streaming-stt/README.md)❗ 59 | 60 |
61 |
62 | 63 | ### LeMUR 64 | 65 | 🗂️ Apply Large Language Models to spoken data. 66 |
67 | LeMUR Cookbooks 🐾 68 | 69 | [LeMUR: Process Audio Files with LLMs](lemur/using-lemur.ipynb)\ 70 | [LeMUR: Extract Dialogue Data](lemur/dialogue-data.ipynb)\ 71 | [LeMUR: Boost Transcription Accuracy](lemur/custom-vocab-lemur.ipynb)\ 72 | [LeMUR: Extract Citations from a Transcript with Semantic Search](lemur/transcript-citations.ipynb)\ 73 | [LeMUR: Processing Speaker Labels with the Custom Text Input Parameter](lemur/input-text-speaker-labels.ipynb)\ 74 | 📖 👀 [Click here to see all LeMUR cookbooks](lemur/README.md)❗ 75 | 76 |
77 | 78 | ## SDKs and Other Resources 📚 79 | 80 | Beyond the code examples here, you can learn about the AssemblyAI API from the following resources: 81 | 82 | - [Python SDK](https://github.com/AssemblyAI/assemblyai-python-sdk) 83 | - [JavaScript SDK](https://github.com/AssemblyAI/assemblyai-node-sdk) 84 | - [Java SDK](https://github.com/AssemblyAI/assemblyai-java-sdk) 85 | - [Golang SDK](https://github.com/AssemblyAI/assemblyai-go-sdk) 86 | - [Ruby SDK](https://github.com/AssemblyAI/assemblyai-ruby-sdk) 87 | - [AssemblyAI API Spec](https://github.com/AssemblyAI/assemblyai-api-spec) 88 | - [Command Line Interface (CLI)](https://github.com/AssemblyAI/assemblyai-cli) 89 | - [Discuss the API in the AssemblyAI Discord](https://www.assemblyai.com/discord) 90 | - [Check out our YouTube Channel](https://www.youtube.com/c/assemblyai) 91 | - [Follow us on X](https://twitter.com/AssemblyAI) 92 | 93 | *** 94 | If you have any questions, please feel free to reach out to our Support team - support@assemblyai.com! 95 | -------------------------------------------------------------------------------- /audio-intelligence/README.md: -------------------------------------------------------------------------------- 1 | # Audio Intelligence 🤖 2 | 3 | Use our Audio Intelligence models to analyze audio and gain additional insights beyond speech to text. 4 | 5 | ## All Audio Intelligence Cookbooks 6 | 7 | | Model | Cookbooks | 8 | |----------------|-----------------------------------| 9 | | **Content Moderation** | [Identify Hate Speech in Audio and Video Files](content_moderation.ipynb) | 10 | | **Entity Detection** | [Create a Redacted Transcript with Entity Detection](entity_redaction.ipynb) | 11 | | **Auto Chapters** | [Create Summarized Chapters from Podcasts](auto_chapters.ipynb) | 12 | | **Summarization** | [Summarize Virtual Meetings](summarization.ipynb) | 13 | | **Topic Detection** | [Label Content with Topic Tags](topic_detection.ipynb) | 14 | | **Key Phrases** | [Identify Highlights in Audio and Video Files](key_phrases.ipynb) | -------------------------------------------------------------------------------- /core-transcription/README.md: -------------------------------------------------------------------------------- 1 | # Speech-To-Text 🎙️ 2 | 3 | The Speech Recognition model enables you to transcribe spoken words into written text and is the foundation of all AssemblyAI products. 4 | On top of the core transcription, you can enable other features and models, such as [Speaker Diarization](https://www.assemblyai.com/docs/speech-to-text/speaker-diarization), by adding additional parameters to the same transcription request. 5 | 6 | ## Table of Contents 7 | 8 | - [Speech-To-Text 🎙️](#speech-to-text-️) 9 | - [Table of Contents](#table-of-contents) 10 | - [All Speech-To-Text Cookbooks](#all-speech-to-text-cookbooks) 11 | - [Basic Transcription Workflows](#basic-transcription-workflows) 12 | - [Batch Transcription](#batch-transcription) 13 | - [Hosting Audio Files](#hosting-audio-files) 14 | - [Speaker Labels](#speaker-labels) 15 | - [Automatic Language Detection](#automatic-language-detection) 16 | - [Subtitles](#subtitles) 17 | - [Delete Transcripts](#delete-transcripts) 18 | - [Error Handling and Audio File Fixes](#error-handling-and-audio-file-fixes) 19 | - [Translation](#translation) 20 | - [Async Chunking for Near-Realtime Transcription](#async-chunking-for-near-realtime-transcription) 21 | - [Migration Guides](#migration-guides) 22 | - [Do More with our SDKS](#do-more-with-our-sdks) 23 | 24 | ## All Speech-To-Text Cookbooks 25 | 26 | 27 | 28 | ### Basic Transcription Workflows 29 | 30 | [Transcribe an Audio File](transcribe.ipynb)\ 31 | [Specify a Language](specify-language.ipynb) \ 32 | [Transcribe YouTube Videos](transcribe_youtube_videos.ipynb)\ 33 | [Build a UI for Transcription with Gradio](gradio-frontend.ipynb)\ 34 | [Detect Low Confidence Words in a Transcript](detecting-low-confidence-words.md)\ 35 | 🆕 [How to Use the EU Endpoint](how_to_use_the_eu_endpoint.ipynb) 36 | 37 | 38 | 39 | ### Batch Transcription 40 | 41 | [Transcribe a Batch of Files](transcribe_batch_of_files)\ 42 | [Transcribe Multiple Files Simultaneously - Python SDK](SDK_transcribe_batch_of_files/batch_transcription.ipynb)\ 43 | [Transcribe Multiple Files Simultaneously - Node SDK](SDK-Node-batch.md) 44 | 45 | 46 | 47 | ### Hosting Audio Files 48 | 49 | [Transcribe from an AWS S3 Bucket](transcribe_from_s3.ipynb)\ 50 | [Transcribe Google Drive Links](transcribing-google-drive-file.md)\ 51 | [Transcribe GitHub Files](transcribing-github-files.md) 52 | 53 | 54 | 55 | ### Speaker Labels 56 | 57 | [Identify Speakers in Audio Recordings](speaker_labels.ipynb)\ 58 | [Generate Speaker Labels with Make.com](make.com-speaker-labels.md)\ 59 | [Calculate Talk/Listen Ratio of Speakers](talk-listen-ratio.ipynb)\ 60 | [Create a Speaker Timeline with Speaker Labels](speaker_timeline.ipynb)\ 61 | [Use Pyannote to Generate Custom Speaker Labels](Use_AssemblyAI_with_Pyannote_to_generate_custom_Speaker_Labels.ipynb)\ 62 | [Speaker Diarization with Async Chunking](speaker-diarization-with-async-chunking.ipynb)\ 63 | [Speaker Identification Across Files using Pinecone and Nvidia's TitaNet Model](titanet-speaker-identification.ipynb) 64 | 65 | 66 | 67 | ### Automatic Language Detection 68 | 69 | [Use Automatic Language Detection](automatic-language-detection.ipynb)\ 70 | [Automatic Language Detection as Separate Step from Transcription](automatic-language-detection-separate.ipynb)\ 71 | [Route to Default Language if Language Detection Confidence is Low - Node SDK](automatic-language-detection-route-default-language-js.md)\ 72 | [Route to Default Language if Language Detection Confidence is Low - Python SDK](automatic-language-detection-route-default-language-python.ipynb)\ 73 | [Route to Nano Speech Model if Language Confidence is Low](automatic-language-detection-route-nano-model.ipynb) 74 | 75 | 76 | 77 | ### Subtitles 78 | 79 | [Generate Subtitles for Videos](subtitles.ipynb)\ 80 | [Create Subtitles with Speaker Labels](speaker_labelled_subtitles.ipynb)\ 81 | [Create Custom-Length Subtitles](subtitle_creation_by_word_count.ipynb) 82 | 83 | 84 | 85 | ### Delete Transcripts 86 | 87 | [Delete a Transcript](delete_transcript.ipynb)\ 88 | [Delete Transcripts After 24 Hours of Creation](schedule_delete.ipynb) 89 | 90 | 91 | 92 | ### Error Handling and Audio File Fixes 93 | 94 | [Troubleshoot Common Errors When Starting to Use Our API](common_errors_and_solutions.md)\ 95 | [Automatically Retry Server Errors](retry-server-error.ipynb)\ 96 | [Automatically Retry Upload Errors](retry-upload-error.ipynb)\ 97 | [Identify Duplicate Channels in Stereo Files](identify_duplicate_channels.ipynb)\ 98 | [Correct Audio Duration Discrepancies with Multi-Tool Validation and Transcoding](audio-duration-fix.ipynb) 99 | 100 | 101 | 102 | ### Translation 103 | 104 | [Translate an AssemblyAI Transcript](translate_transcripts.ipynb)\ 105 | [Translate an AssemblyAI Subtitle Transcript](translate_subtitles.ipynb) 106 | 107 | 108 | 109 | ### Async Chunking for Near-Realtime Transcription 110 | 111 | 🆕 [Near-Realtime Python Speech-to-Text App](https://github.com/AssemblyAI-Solutions/async-chunk-py)\ 112 | 🆕 [Near-Realtime Node.js Speech-to-Text App](https://github.com/AssemblyAI-Solutions/async-chunk-js)\ 113 | [Split Audio File into Shorter Files](split_audio_file) 114 | 115 | 116 | 117 | ### Migration Guides 118 | 119 | [AWS Transcribe to AssemblyAI](migration_guides/aws_to_aai.ipynb)\ 120 | [Deepgram to AssemblyAI](migration_guides/dg_to_aai.ipynb)\ 121 | [OpenAI to AssemblyAI](migration_guides/oai_to_aai.ipynb)\ 122 | [Google to AssemblyAI](migration_guides/google_to_aai.ipynb) 123 | 124 | 125 | 126 | ### Do More with our SDKS 127 | 128 | [Do More with the Node SDK](do-more-with-sdk-js.md)\ 129 | [Do More with the Python SDK](do-more-with-sdk-python.ipynb) 130 | -------------------------------------------------------------------------------- /core-transcription/SDK-Node-batch.md: -------------------------------------------------------------------------------- 1 | # Transcribe Multiple Files Simultaneously Using the Node SDK 2 | 3 | In this guide, we'll show you how to transcribe multiple files simultaneously using the Node SDK. 4 | 5 | ## Getting Started 6 | 7 | Before we begin, make sure you have an AssemblyAI account and an API key. You can sign up for an account and get your API key from your [dashboard](https://www.assemblyai.com/app/account). This guide will use AssemblyAI's [node SDK](https://github.com/AssemblyAI/assemblyai-node-sdk). If you haven't already, install the SDK in your project by following these [instructions](https://github.com/AssemblyAI/assemblyai-node-sdk#installation). 8 | 9 | ## Step-by-Step Instructions 10 | 11 | Set up your application folder structure by adding an audio folder which will house the files you would like to transcribe, a transcripts folder which will house your completed transcriptions, and a new `.js` file in the root of the project. Your file structure should look like this: 12 | ``` 13 | BatchApp 14 | ├───audio 15 | │ ├───audio-1.mp3 16 | │ └───audio-2.mp3 17 | ├───transcripts 18 | ├───batch.js 19 | ``` 20 | 21 | In the `batch.js` file import the AssemblyAI package, as well as the node fs and node path packages. Create an AssemblyAI object with your API key: 22 | 23 | ``` 24 | import { AssemblyAI } from "assemblyai"; 25 | import * as path from 'path'; 26 | import * as fs from 'fs'; 27 | 28 | const client = new AssemblyAI({ 29 | apiKey: , 30 | }); 31 | ``` 32 | 33 | Declare the variables `audioFolder`, `files`, `filePathArr`, and `transcriptsFolder`. 34 | * `audioFolder` will be the relative path to the folder containing your audio files. 35 | * `files` will read the files in the audio folder, and return them in an array. 36 | * `filePathArr` will join the file names with the audio folder name to create the relative path to each individual file. 37 | * `transcriptsFolder` will be the relative path to the folder containing your transcription files. 38 | 39 | ``` 40 | const audioFolder = './audio'; 41 | const files = await fs.promises.readdir(audioFolder); 42 | const filePathArr = files.map(file => path.join(audioFolder, file)); 43 | const transcriptsFolder = './transcripts'; 44 | ``` 45 | 46 | Next, we'll create a promise that will submit the file path for transcription. Make sure to add the parameters for the models you would like to run. 47 | 48 | ``` 49 | const getTranscript = (filePath) => new Promise((resolve, reject) => { 50 | client.transcripts.transcribe({ 51 | audio: filePath, 52 | language_detection: true 53 | }) 54 | .then(result => resolve(result)) 55 | .catch(error => reject(error)); 56 | }); 57 | ``` 58 | 59 | Next, we will create an async function that will call the `getTranscript` function and write the transcription text from each audio file to an individual text file in the transcripts folder. 60 | 61 | ``` 62 | const processFile = async (file) => { 63 | const getFileName = file.split('audio/'); //Separate the folder name and file name into substrings 64 | const fileName = getFileName[1]; //Grab the 2nd substring which is the file name 65 | const filePath = path.join(transcriptsFolder, `${fileName}.txt`); //Relative path for transcription text files. 66 | 67 | const transcript = await getTranscript(file); //Request the transcript 68 | const text = transcript.text; //Grab transcription text from the JSON response 69 | 70 | //Write the transcription text to a text file 71 | return new Promise((resolve, reject) => { 72 | fs.writeFile(filePath, text, err => { 73 | if (err) { 74 | reject(err); 75 | return; 76 | } 77 | 78 | resolve({ 79 | ok: true, 80 | message: 'Text File created!' 81 | }); 82 | }); 83 | }); 84 | } 85 | ``` 86 | 87 | Next, we will create the run function. This function will: 88 | * Create an array of unresolved promises with each promise requesting a transcript. 89 | * Use `Promise.all` to iterate over the array of unresolved promises. 90 | 91 | Then we'll call the run function 92 | ``` 93 | const run = async () => { 94 | const unresolvedPromises = filePathArr.map(processFile); 95 | await Promise.all(unresolvedPromises); 96 | } 97 | 98 | run() 99 | ``` 100 | 101 | Your final file will look like this: 102 | 103 | ``` 104 | import { AssemblyAI } from "assemblyai"; 105 | import * as path from 'path'; 106 | import * as fs from 'fs'; 107 | 108 | const client = new AssemblyAI({ 109 | apiKey: , 110 | }); 111 | 112 | const audioFolder = './audio'; 113 | const files = await fs.promises.readdir(audioFolder); 114 | const filePathArr = files.map(file => path.join(audioFolder, file)); 115 | const transcriptsFolder = './transcripts' 116 | 117 | const getTranscript = (filePath) => new Promise((resolve, reject) => { 118 | client.transcripts.transcribe({ 119 | audio: filePath, 120 | language_detection: true, 121 | }) 122 | .then(result => resolve(result)) 123 | .catch(error => reject(error)); 124 | }); 125 | 126 | const processFile = async (file) => { 127 | const getFileName = file.split('audio/') 128 | const fileName = getFileName[1] 129 | const filePath = path.join(transcriptsFolder, `${fileName}.txt`); 130 | 131 | const transcript = await getTranscript(file); 132 | const text = transcript.text 133 | 134 | return new Promise((resolve, reject) => { 135 | fs.writeFile(filePath, text, err => { 136 | if (err) { 137 | reject(err); 138 | return; 139 | } 140 | 141 | resolve({ 142 | ok: true, 143 | message: 'Text File created!' 144 | }); 145 | }); 146 | }); 147 | } 148 | 149 | const run = async () => { 150 | const unresolvedPromises = filePathArr.map(processFile); 151 | await Promise.all(unresolvedPromises); 152 | } 153 | 154 | run() 155 | ``` 156 | 157 | If you have any questions, please feel free to reach out to our Support team - support@assemblyai.com or in our Community Discord! 158 | -------------------------------------------------------------------------------- /core-transcription/SDK_transcribe_batch_of_files/audio/audio.mp3: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AssemblyAI/cookbook/c06fa0906e39c415953c102c338856812071e6ef/core-transcription/SDK_transcribe_batch_of_files/audio/audio.mp3 -------------------------------------------------------------------------------- /core-transcription/SDK_transcribe_batch_of_files/audio/audio_2.mp3: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AssemblyAI/cookbook/c06fa0906e39c415953c102c338856812071e6ef/core-transcription/SDK_transcribe_batch_of_files/audio/audio_2.mp3 -------------------------------------------------------------------------------- /core-transcription/SDK_transcribe_batch_of_files/transcripts/audio.mp3.txt: -------------------------------------------------------------------------------- 1 | AssemblyAI is a deep learning company that builds powerful APIs to help you transcribe and understand audio. The most common use case for the API is to automatically convert prerecorded audio and video files, as well as real time audio streams into text transcriptions. Our APIs convert audio and video into text using powerful deep learning models that we research and develop end to end in house. Millions of podcasts, zoom recordings, phone calls, or video files are being transcribed with AssemblyAI every single day. But where AssemblyAI really excels is with helping you understand your data. So let's say we transcribe Joe Biden's State of the Union using AssemblyAI's API, with our Auto chapter feature, you can generate time coded summaries of the key moments of your audio file. For example, with the State of the Union address, we get chapter summaries like this auto Chapters automatically segments your audio or video files into chapters and provides a summary for each of these chapters. With sentiment analysis, we can classify what's being spoken in your audio files as either positive, negative, or neutral. So, for example, in the State of the Union address, we see that this sentence was classified as positive, whereas this sentence was classified as negative. Content Safety Detection can flag sensitive content as it is spoken, like hate speech, profanity, violence, or weapons. For example, in Biden's State of the Union address, content safety detection flagged parts of his speech as being about weapons. This feature is especially useful for automatic content moderation and brand safety use cases. With Autohighlights, you can automatically identify important words and phrases that are being spoken in your data. Owned by the State of the Union address, AssemblyAI's API detected these words and phrases as being important. Lastly, with entity detection, you can identify entities that are spoken in your audio, like organization names or person names. In Biden's speech, these were the entities that were detected. This is just a preview of the most popular features of AssemblyAI API. If you want a full list of features, go check out our documentation linked in the description below. And if you ever need some support, our team of developers is here to help. Every day, developers are using these features to build really exciting applications. From meeting summarizers, to brand safety or contextual targeting platforms, to full blown conversational intelligence tools, we can't wait to see what you build with AssemblyAI. -------------------------------------------------------------------------------- /core-transcription/SDK_transcribe_batch_of_files/transcripts/audio_2.mp3.txt: -------------------------------------------------------------------------------- 1 | Smoke from hundreds of wildfires in Canada is triggering air quality alerts throughout the US. Skylines from Maine to Maryland to Minnesota are gray and smoggy. And in some places, the air quality warnings include the warning to stay inside. We wanted to better understand what's happening here and why, so we called Peter DiCarlo, an associate professor in the Department of Environmental Health and Engineering at Johns Hopkins University. Good morning, professor. Good morning. So what is it about the conditions right now that have caused this round of wildfires to affect so many people so far away? Well, there's a couple of things. The season has been pretty dry already, and then the fact that we're getting hit in the US. Is because there's a couple of weather systems that are essentially channeling the smoke from those Canadian wildfires through Pennsylvania into the Mid Atlantic and the Northeast and kind of just dropping the smoke there. So what is it in this haze that makes it harmful? And I'm assuming it is is it is the levels outside right now in Baltimore are considered unhealthy. And most of that is due to what's called particulate matter, which are tiny particles, microscopic smaller than the width of your hair, that can get into your lungs and impact your respiratory system, your cardiovascular system, and even your neurological your brain. What makes this particularly harmful? Is it the volume of particulate? Is it something in particular? What is it exactly? Can you just drill down on that a little bit more? Yeah. So the concentration of particulate matter I was looking at some of the monitors that we have was reaching levels of what are, in science speak, 150 micrograms per meter cubed, which is more than ten times what the annual average should be, and about four times higher than what you're supposed to have on a 24 hours average. And so the concentrations of these particles in the air are just much, much higher than we typically see. And exposure to those high levels can lead to a host of health problems. And who is most vulnerable? I noticed that in New York City, for example, they're canceling outdoor activities, and so here it is in the early days of summer, and they have to keep all the kids inside. So who tends to be vulnerable in a situation like this? It's the youngest. So children, obviously, whose bodies are still developing. The elderly who know their bodies are more in decline, and they're more susceptible to the health impacts of breathing, the poor air quality. And then people who have preexisting health conditions, people with respiratory conditions or heart conditions can be triggered by high levels of air pollution. Could this get worse? That's a good in some areas, it's much worse than others. And it just depends on kind of where the smoke is concentrated. I think New York has some of the higher concentrations right now, but that's going to change as that air moves away from the New York area. But over the course of the next few days, we will see different areas being hit at different times with the highest concentrations. I was going to ask you, more fires start burning, I don't expect the concentrations to go up too much higher. I was going to ask you and you started to answer this, but how much longer could this last? Or forgive me if I'm asking you to speculate, but what do you think? Well, I think the fires are going to burn for a little bit longer, but the key for us in the US. Is the weather system changing. And so right now, it's kind of the weather systems that are pulling that air into our mid Atlantic and Northeast region. As those weather systems change and shift, we'll see that smoke going elsewhere and not impact us in this region as much. And so I think that's going to be the defining factor. And I think the next couple of days we're going to see a shift in that weather pattern and start to push the smoke away from where we are. And finally, with the impacts of climate change, we are seeing more wildfires. Will we be seeing more of these kinds of wide ranging air quality consequences or circumstances? I mean, that is one of the predictions for climate change. Looking into the future, the fire season is starting earlier and lasting longer and we're seeing more frequent fires. So, yeah, this is probably something that we'll be seeing more frequently. This tends to be much more of an issue in the Western US. So the Eastern US getting hit right now is a little bit new. But yeah, I think with climate change moving forward, this is something that is going to happen more frequently. That's Peter DiCarlo, associate professor in the Department of Environmental Health and Engineering at Johns Hopkins University. Sergeant Carlo, thanks so much for joining us and sharing this expertise with us. Thank you for having me. -------------------------------------------------------------------------------- /core-transcription/automatic-language-detection-separate.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Automatic Language Detection as separate step from Transcription\n", 8 | "\n", 9 | "In this guide, we'll show you how to perform automatic language detection separately from the transcription process. For the transcription, the file then gets then routed to either our [*Best* or *Nano*](https://www.assemblyai.com/docs/speech-to-text/speech-recognition#select-the-speech-model-with-best-and-nano) model class, depending on the supported language.\n", 10 | "\n", 11 | "This workflow is designed to be cost-effective, slicing the first 60 seconds of audio and running it through Nano ALD, which detects 99 languages, at a cost of $0.002 per transcript for this language detection workflow (not including the total transcription cost).\n", 12 | "\n", 13 | "## Get started\n", 14 | "\n", 15 | "Before we begin, make sure you have an AssemblyAI account and an API key. You can [sign up](https://assemblyai.com/dashboard/signup) for a free account and get your API key from your dashboard.\n", 16 | "\n", 17 | "## Step-by-step instructions\n", 18 | "\n", 19 | "Install the SDK:" 20 | ] 21 | }, 22 | { 23 | "cell_type": "code", 24 | "execution_count": null, 25 | "metadata": {}, 26 | "outputs": [], 27 | "source": [ 28 | "%pip install assemblyai" 29 | ] 30 | }, 31 | { 32 | "cell_type": "markdown", 33 | "metadata": {}, 34 | "source": [ 35 | "Import the `assemblyai` package and set your API key:" 36 | ] 37 | }, 38 | { 39 | "cell_type": "code", 40 | "execution_count": 3, 41 | "metadata": {}, 42 | "outputs": [], 43 | "source": [ 44 | "import assemblyai as aai\n", 45 | "aai.settings.api_key = \"YOUR_API_KEY\"" 46 | ] 47 | }, 48 | { 49 | "cell_type": "markdown", 50 | "metadata": {}, 51 | "source": [ 52 | "Create a set with all supported languages for *Best*. You can find them in our [documentation here](https://www.assemblyai.com/docs/concepts/supported-languages#supported-languages-for-best)." 53 | ] 54 | }, 55 | { 56 | "cell_type": "code", 57 | "execution_count": 5, 58 | "metadata": {}, 59 | "outputs": [], 60 | "source": [ 61 | "supported_languages_for_best = {\n", 62 | " \"en\",\n", 63 | " \"en_au\",\n", 64 | " \"en_uk\",\n", 65 | " \"en_us\",\n", 66 | " \"es\",\n", 67 | " \"fr\",\n", 68 | " \"de\",\n", 69 | " \"it\",\n", 70 | " \"pt\",\n", 71 | " \"nl\",\n", 72 | " \"hi\",\n", 73 | " \"ja\",\n", 74 | " \"zh\",\n", 75 | " \"fi\",\n", 76 | " \"ko\",\n", 77 | " \"pl\",\n", 78 | " \"ru\",\n", 79 | " \"tr\",\n", 80 | " \"uk\",\n", 81 | " \"vi\",\n", 82 | "}" 83 | ] 84 | }, 85 | { 86 | "cell_type": "markdown", 87 | "metadata": {}, 88 | "source": [ 89 | "Define a `Transcriber`. Note that here we don't pass in a global `TranscriptionConfig`, but later apply different ones during the `transcribe()` call." 90 | ] 91 | }, 92 | { 93 | "cell_type": "code", 94 | "execution_count": 6, 95 | "metadata": {}, 96 | "outputs": [], 97 | "source": [ 98 | "transcriber = aai.Transcriber()" 99 | ] 100 | }, 101 | { 102 | "cell_type": "markdown", 103 | "metadata": {}, 104 | "source": [ 105 | "Define two helper functions:\n", 106 | "- `detect_language()` performs language detection on the [first 60 seconds](https://www.assemblyai.com/docs/api-reference/transcripts/submit#request.body.audio_end_at) of the audio using *Nano* and returns the language code.\n", 107 | "- `transcribe_file()` performs the transcription. For this, the identified language is applied and either *Best* or *Nano* is used depending on the supported language." 108 | ] 109 | }, 110 | { 111 | "cell_type": "code", 112 | "execution_count": 7, 113 | "metadata": {}, 114 | "outputs": [], 115 | "source": [ 116 | "def detect_language(audio_url):\n", 117 | " config = aai.TranscriptionConfig(\n", 118 | " audio_end_at=60000, # first 60 seconds (in milliseconds)\n", 119 | " language_detection=True,\n", 120 | " speech_model=aai.SpeechModel.nano,\n", 121 | " )\n", 122 | " transcript = transcriber.transcribe(audio_url, config=config)\n", 123 | " return transcript.json_response[\"language_code\"]\n", 124 | "\n", 125 | "def transcribe_file(audio_url, language_code):\n", 126 | " config = aai.TranscriptionConfig(\n", 127 | " language_code=language_code,\n", 128 | " speech_model=(\n", 129 | " aai.SpeechModel.best\n", 130 | " if language_code in supported_languages_for_best\n", 131 | " else aai.SpeechModel.nano\n", 132 | " ),\n", 133 | " )\n", 134 | " transcript = transcriber.transcribe(audio_url, config=config)\n", 135 | " return transcript" 136 | ] 137 | }, 138 | { 139 | "cell_type": "markdown", 140 | "metadata": {}, 141 | "source": [ 142 | "Test the code with different audio files. For each file, we apply both helper functions sequentially to first identify the language and then transcribe the file." 143 | ] 144 | }, 145 | { 146 | "cell_type": "code", 147 | "execution_count": 8, 148 | "metadata": {}, 149 | "outputs": [ 150 | { 151 | "name": "stdout", 152 | "output_type": "stream", 153 | "text": [ 154 | "Identified language: pt\n", 155 | "Transcript: e aí Olá pessoal, sejam bem-vindos a mais um vídeo e hoje eu vou ensinar-vos como fazer esta espada ...\n", 156 | "Identified language: es\n", 157 | "Transcript: Precisamente sobre este caso, el diario estadounidense New York Times reveló este sábado un conjunto ...\n", 158 | "Identified language: sl\n", 159 | "Transcript: Ni lepška, kaj videl tega otroka v mrekoj svojga okolja, da mu je uspil in to v takimi miri, da pač ...\n", 160 | "Identified language: en\n", 161 | "Transcript: Runner's knee runner's knee is a condition characterized by pain behind or around the kneecap. It is ...\n" 162 | ] 163 | } 164 | ], 165 | "source": [ 166 | "audio_urls = [\n", 167 | " \"https://storage.googleapis.com/aai-web-samples/public_benchmarking_portugese.mp3\",\n", 168 | " \"https://storage.googleapis.com/aai-web-samples/public_benchmarking_spanish.mp3\",\n", 169 | " \"https://storage.googleapis.com/aai-web-samples/slovenian_luka_doncic_interview.mp3\",\n", 170 | " \"https://storage.googleapis.com/aai-web-samples/5_common_sports_injuries.mp3\",\n", 171 | "]\n", 172 | "\n", 173 | "for audio_url in audio_urls:\n", 174 | " language_code = detect_language(audio_url)\n", 175 | " print(\"Identified language:\", language_code)\n", 176 | "\n", 177 | " transcript = transcribe_file(audio_url, language_code)\n", 178 | " print(\"Transcript:\", transcript.text[:100], \"...\")" 179 | ] 180 | } 181 | ], 182 | "metadata": { 183 | "kernelspec": { 184 | "display_name": "venv", 185 | "language": "python", 186 | "name": "python3" 187 | }, 188 | "language_info": { 189 | "codemirror_mode": { 190 | "name": "ipython", 191 | "version": 3 192 | }, 193 | "file_extension": ".py", 194 | "mimetype": "text/x-python", 195 | "name": "python", 196 | "nbconvert_exporter": "python", 197 | "pygments_lexer": "ipython3", 198 | "version": "3.11.7" 199 | } 200 | }, 201 | "nbformat": 4, 202 | "nbformat_minor": 2 203 | } 204 | -------------------------------------------------------------------------------- /core-transcription/common_errors_and_solutions.md: -------------------------------------------------------------------------------- 1 | # Guide to troubleshoot Common Errors 2 | 3 | AssemblyAI's API always returns a response even when there is an error. This guide is designed to help you navigate and resolve common issues when implementing AssemblyAI. 4 | 5 | ## Understanding Errors with AssemblyAI 6 | 7 | There are primarily two types of errors you might encounter when working with AssemblyAI: 8 | - errors that occur when requesting a transcription 9 | - errors that occur while the transcription is processing 10 | 11 | The first category includes issues such as authentication errors, invalid request parameters, or server-related errors, which are typically flagged immediately when you attempt to initiate a request. 12 | 13 | The second category, failed transcription jobs, pertains to errors that occur during the transcription process itself. These might be due to issues with the audio file, unsupported languages, or internal errors on our end. 14 | 15 | ## Handling Errors with AssemblyAI 16 | 17 | ### Error handling with AssemblyAI's SDKs (Recommended) 18 | When using any of our SDKs, both types of errors are surfaced via the error key in a transcript object. For example in Python: 19 | 20 | ``` 21 | if transcript.status == aai.TranscriptStatus.error: 22 | print(transcript.error) 23 | ``` 24 | 25 | ### Error handling with HTTPS requests 26 | For errors when making a request for a transcription, you will have to check the status code that we respond with. For errors that occur during the transcription process, you will need to access the "error" key in the JSON response. For other HTTP errors you can print the information from the response object. Here is an example you can use: 27 | ``` 28 | if response.status_code != 200: 29 | try: 30 | print(response.json()['error']) 31 | except Exception: 32 | print(response.status_code, response.text, response.url) 33 | ``` 34 | 35 | ## Common errors when making a request for a transcription 36 | 37 | #### 1. Models are not Supported for a Particular Language 38 | 39 | ``` 40 | # Status code: 400 41 | { 42 | "error": "The following models are not available in this language: speaker_labels" 43 | } 44 | ``` 45 | 46 | - **Solution**: Before you start, make sure to check our [Supported Languages page](https://www.assemblyai.com/docs/concepts/supported-languages). This page provides detailed information on what features are available for each language, helping you to choose the right options for your transcription or analysis needs. 47 | #### 2. Insufficient Funds 48 | ``` 49 | # Status code: 400 50 | { 51 | "error": "Your current account balance is negative. Please top up to continue using the API." 52 | } 53 | ``` 54 | 55 | **Solution**: 56 | - **Auto-pay**: Enable auto-pay in your account settings to automatically add funds when your balance runs low. 57 | - **Check Funds**: Regularly check your account balance to ensure you have sufficient funds for your transcription requests. 58 | - **Add Funds**: If needed, add funds to your account to continue using our services without interruption. 59 | 60 | #### 4. Invalid API Token 61 | ``` 62 | # Status code: 401 63 | { 64 | "error": "Authentication error, API token missing/invalid." 65 | } 66 | ``` 67 | An invalid API token will prevent you from making successful requests. 68 | 69 | **Solution**: 70 | - Double-check that the API token you're using matches exactly with the token shown in your dashboard. Even a small typo can lead to authentication errors. 71 | 72 | #### 5. Unsupported Characters in Custom Vocabulary 73 | ``` 74 | # Status code: 400 75 | { 76 | "error": "'🇸🇬' was included in the word boost list but contains unsupported characters" 77 | } 78 | ``` 79 | 80 | Custom vocabulary only supports ASCII characters, but attempts to normalise text first before throwing an error. This error is usually caused by words or symbols that do not have an ASCII-equivalent. 81 | 82 | **Solution**: 83 | - Check that the word or phrase can be normalized prior to submitting it to Custom Vocabulary. Here is a code snippet that does this using Python's [unicodedata package](https://docs.python.org/3/library/unicodedata.html#unicodedata.normalize): 84 | ``` 85 | 86 | def filter_unsupported_characters(phrase): 87 | cleaned_phrase = `unicodedata.`normalize('NFKD', phrase).encode('ascii', 'ignore').decode('ascii') 88 | if len(cleaned_phrase) != len(phrase): 89 | raise Error("Unsupported characters in phrase") 90 | return cleaned_phrase 91 | ``` 92 | #### 6. Transcript ID not found 93 | ``` 94 | # Status code: 400 95 | { 96 | "error": "Transcript lookup error, transcript id not found" 97 | } 98 | ``` 99 | 100 | **Solution**: 101 | - **Verify the endpoint and method that you are using**: Check that you are making a `POST` request to `http://api.assemblyai.com/v2/transcript`or a `GET` request to `http://api.assemblyai.com/v2/transcript/{transcript_id}` and not `http://api.assemblyai.com/v2/transcript/` 102 | - **Token Verification**: Double-check that the API token you're using matches exactly with the token used to make the original request. 103 | - If you're using Postman, ensure that `Encode URL automatically` under Settings is **disabled**. 104 | #### 7. Server Errors 105 | ``` 106 | # Status code: 500 107 | { 108 | "error": "Server error, developers have been alerted." 109 | } 110 | ``` 111 | 112 | Server errors rarely happen but can occasionally occur on our side. 113 | 114 | **Solution**: 115 | - **Retries**: Implement retries in your code for when our server returns a 500 code response. 116 | - **Automatic Retries**: Enable automatic retries for your transcription jobs under [Account > Settings](https://www.assemblyai.com/app/account) on your dashboard. This ensures that if a job fails due to a temporary server issue, it will automatically be retried. 117 | - **Check our Status page** to verify that we are not currently undergoing an incident 118 | - **Reach out to Support**: Remember to provide the transcript ID, audio file used, and parameters used in your request or the full JSON response in your message. You can also email support@assemblyai.com for help! 119 | 120 | ## Common transcription processing errors 121 | #### 1. Audio File URL Errors 122 | 123 | ##### Attempting to transcribe webpages 124 | ``` 125 | { 126 | "status": "error", 127 | "audio_url": "https://www.youtube.com/watch?v=r8KTOBOMm0A", 128 | "error": "File does not appear to contain audio. File type is text/html (HTML document, ASCII text, with very long lines (56754)).", 129 | ... 130 | } 131 | ``` 132 | 133 | Our API requires a publicly accessible URL that points to an audio file to retrieve your file for transcription. To transcribe a YouTube video, [check out this Cookbook](https://github.com/AssemblyAI/cookbook/blob/master/core-transcription/transcribe_youtube_videos.ipynb). 134 | 135 | ##### Attempting to transcribe audio files that are not accessible 136 | ``` 137 | { 138 | "status": "error", 139 | "audio_url": "https://foo.bar", 140 | "error": "Download error, unable to download https://foo.bar. Please make sure the file exists and is accessible from the internet.", 141 | } 142 | ``` 143 | 144 | **Solution**: 145 | - **Public Access**: Verify that the audio file URL is publicly accessible. Our servers cannot transcribe audio from private or restricted URLs. 146 | - **Google Drive URLs**: For audio stored on Google Drive, consult our [Google Drive Transcription Cookbook](https://github.com/AssemblyAI/cookbook/blob/master/core-transcription/transcribing-google-drive-file.md) to correctly format your URLs for access. 147 | - **Direct Upload**: Utilize the [AssemblyAI Upload endpoint](https://www.assemblyai.com/docs/api-reference/upload) to upload files directly from your device, eliminating the need for a public URL. 148 | - **AWS S3 Pre-signed URLs**: [This Cookbook](https://github.com/AssemblyAI/cookbook/blob/master/core-transcription/transcribe_from_s3.ipynb) shows you how to use pre-signed URLs for AWS S3 storage to provide secure, temporary access for transcription without making your files public. 149 | 150 | #### 2. Audio File Errors 151 | 152 | ##### Attempting to transcribe audio files that are too short 153 | ``` 154 | { 155 | "status": "error", 156 | "audio_url": "https://foo.bar", 157 | "error": "Audio duration is too short.", 158 | } 159 | ``` 160 | 161 | The minimum audio duration for a file submitted to our API is 160ms. 162 | 163 | **Solution**: 164 | - **Add error handling for this error message**: When this error occurs, handle it safely by checking the error string and returning the error. 165 | - **Add pre-submit checks for the duration of the audio file**: Prior to submitting a file for transcription, check the duration using a tool like soxi (part of the SoX package): `soxi -D audio.mp3` 166 | -------------------------------------------------------------------------------- /core-transcription/detecting-low-confidence-words.md: -------------------------------------------------------------------------------- 1 | # Detecting Low Confidence Words in a Transcript 2 | 3 | In this guide, we'll show you how to detect sentences that contain words with low confidence scores. Confidence scores represent how confident the model was in predicting the transcribed word. Detecting words with low confidence scores can be important for manually editing transcripts. 4 | Each transcribed word will contain a corresponding confidence score between 0.0 (low confidence) and 1.0 (high confidence). 5 | You can decide what your confidence threshold will be when implementing this logic in your application. For this guide, we will use a threshold of 0.4. 6 | 7 | ## Getting Started 8 | 9 | Before we begin, make sure you have an AssemblyAI account and an API key. You can sign up for an account and get your API key from your [dashboard](https://www.assemblyai.com/app/account). This guide will use AssemblyAI's [node SDK](https://github.com/AssemblyAI/assemblyai-node-sdk). If you haven't already, install the SDK by following these [instructions](https://github.com/AssemblyAI/assemblyai-node-sdk#installation). 10 | 11 | ## Step-by-Step Instructions 12 | 13 | Import the AssemblyAI package and create an AssemblyAI object with your API key: 14 | 15 | ```javascript 16 | import { AssemblyAI } from "assemblyai"; 17 | 18 | const client = new AssemblyAI({ 19 | apiKey: process.env.ASSEMBLYAI_API_KEY, 20 | }); 21 | ``` 22 | 23 | Next create the transcript with your audio file, either via local audio file or URL (AssemblyAI's servers need to be able to access the URL, make sure the URL links to a downloadable file). 24 | 25 | ```javascript 26 | const transcript = await client.transcripts.transcribe({ 27 | audio_url: './sample.mp4', 28 | }) 29 | ``` 30 | 31 | From there use the `id` from the transcript to request the transcript broken down into sentences. 32 | 33 | ```javascript 34 | let { id } = transcript 35 | let { sentences } = await client.transcripts.sentences(id) 36 | ``` 37 | 38 | Set the confidence score threshold to a value of you choice (0.5 or less is a good start). In this guide, we'll use 0.4. 39 | 40 | ```javascript 41 | let confidenceThreshold = 0.4 42 | ``` 43 | 44 | Next, we will filter the sentences array down to just sentences that contain words with confidence scores of under 0.4. 45 | 46 | 47 | ```javascript 48 | const sentencesWithLowConfidenceWords = (sentences, confidenceThreshold) => { 49 | return sentences.filter(sentence => { 50 | const hasLowConfidenceWord = sentence.words.some(word => word.confidence < confidenceThreshold); 51 | return hasLowConfidenceWord; 52 | }); 53 | } 54 | 55 | const filteredSentences = sentencesWithLowConfidenceWords(sentences, confidenceThreshold); 56 | ``` 57 | 58 | Next we'll alter the `filteredSentences` array so that the `words` array for each sentence only contains the words with confidence scores under of 0.4. 59 | 60 | ```javascript 61 | const filterScores = filteredSentences.map(item => {return {...item, words: item.words.filter(word => word.confidence < confidenceThreshold)}}) 62 | ``` 63 | 64 | Finally, we'll display the final results. The final results will include the timestamp of the sentence that contains low confidence words, the sentence, the words that scored poorly, and their scores. 65 | 66 | ```javascript 67 | //This function is optional but can be used to format the timestamps from milleseconds to HH:MM:SS 68 | const formatMilliseconds = (milliseconds) => { 69 | // Calculate hours, minutes, and seconds 70 | const hours = Math.floor(milliseconds / 3600000); 71 | const minutes = Math.floor((milliseconds % 3600000) / 60000); 72 | const seconds = Math.floor((milliseconds % 60000) / 1000); 73 | 74 | // Ensure the values are displayed with leading zeros if needed 75 | const formattedHours = hours.toString().padStart(2, '0'); 76 | const formattedMinutes = minutes.toString().padStart(2, '0'); 77 | const formattedSeconds = seconds.toString().padStart(2, '0'); 78 | 79 | return `${formattedHours}:${formattedMinutes}:${formattedSeconds}`; 80 | } 81 | 82 | //Format the final results to contain the sentence, low confidence words, timestamps, and confidence scores. 83 | const finalResults = filterScores.map(res => { 84 | return `The following sentence at timestamp ${formatMilliseconds(res.start)} contained low confidence words: ${res.text} \n Low confidence word(s) from this sentence: ${res.words.map(res => {return `${res.text}[score: ${res.confidence}]`}).join(', ')}}` 85 | }) 86 | 87 | console.log(finalResults) 88 | ``` 89 | The output will look something like this: 90 | 91 | ``` 92 | [ 93 | 'The following sentence at timestamp 00:04:34 contained low confidence words: I am contacting you first when I could just have phoned my bank and marked you as fraud in an instant. \n' + 94 | ' Low confidence word(s) from this sentence: marked[score: 0.33049]}', 95 | 'The following sentence at timestamp 00:06:40 contained low confidence words: Sabitha, as much as I would like to help you, this is the best I can do for you. \n' + 96 | ' Low confidence word(s) from this sentence: Sabitha,[score: 0.22706]}', 97 | 'The following sentence at timestamp 00:07:37 contained low confidence words: Thank you for calling Queston. \n' + 98 | ' Low confidence word(s) from this sentence: Queston.[score: 0.16557]}' 99 | ] 100 | ``` 101 | -------------------------------------------------------------------------------- /core-transcription/do-more-with-sdk-js.md: -------------------------------------------------------------------------------- 1 | 2 | 4 | 5 | 6 | 7 | # Do More With The JavaScript SDK 8 | 9 | This guide will show you additional ways to make use of AssemblyAI's JavaScript SDK. 10 | 11 | ## Get Started 12 | 13 | Before we begin, make sure you have an AssemblyAI account and an API key. You can [sign up for an AssemblyAI account](https://www.assemblyai.com/app) and get your API key from your dashboard. 14 | 15 | ## Some Useful How-Tos 16 | 17 | ### How to Check and Update Your Version of the SDK 18 | 19 | Sometimes errors are encountered because the version of the SDK you are using is not up to date. To see which version you are currently running, type this code in your terminal: 20 | 21 | ```js 22 | npm info assemblyai version 23 | ``` 24 | 25 | If this version is not the same as the current version of the [JavaScript SDK](https://github.com/AssemblyAI/assemblyai-node-sdk) then you can update your version by typing this code in your terminal: 26 | 27 | ```js 28 | npm update assemblyai --save 29 | ``` 30 | 31 | ### How to Catch and Log Errors 32 | 33 | Catching and logging errors to the console is an easy way help you understand what is going wrong if the code does not run correctly. 34 | 35 | Underneath the line of code where the transcript is created, `const transcript = await client.transcripts.transcribe(params)`, add the following code to catch and log any errors to the terminal: 36 | 37 | ```js 38 | if (transcript.status === 'error') { 39 | console.log(transcript.error) 40 | } 41 | ``` 42 | 43 | ### How to Log the Transcript JSON and Save it in a File 44 | 45 | In order to write data to a file, first import the [fs](https://nodejs.org/api/fs.html) package: 46 | 47 | ```js 48 | import fs from 'fs'; 49 | ``` 50 | 51 | If using the error handling code above then add this below it, otherwise add it after the transcript is created, `const transcript = await client.transcripts.transcribe(params)`: 52 | 53 | ```js 54 | const transcriptJSON = JSON.stringify(transcript, null, "\t"); 55 | 56 | fs.writeFile('transcript.json', transcriptJSON, (err) => { 57 | if (err) throw err; 58 | }) 59 | 60 | console.log(transcriptJSON) 61 | ``` 62 | 63 | ### How to Log the Transcript ID and Retrieve a Previously Created Transcript 64 | 65 | To log the transcript ID for a transcription, after the transcript is created and below any error handling, add the following code: 66 | 67 | ```js 68 | console.log("Transcript ID: ", transcript.id) 69 | ``` 70 | 71 | To see a list of all previous transcriptions, use the following code: 72 | 73 | ```js 74 | const allTranscriptsResponse = await fetch("https://api.assemblyai.com/v2/transcript?limit=4", { 75 | method: "GET", 76 | headers: { 77 | "Authorization": "" 78 | }, 79 | }); 80 | const allTranscripts = await allTranscriptsResponse.json(); 81 | console.log(allTranscripts); 82 | ``` 83 | 84 | There are additional [query parameters](https://www.assemblyai.com/docs/api-reference/transcripts/list) that can be added to this request to limit the transcripts that are returned. The above example shows how to limit the number of returned transcripts to 4. These will be the four most recently created transcripts. 85 | 86 | To get a specific transcript, use the following code: 87 | 88 | ```js 89 | const transcriptResponse = await fetch("https://api.assemblyai.com/v2/transcript/transcript_id", { 90 | method: "GET", 91 | headers: { 92 | "Authorization": "" 93 | }, 94 | }); 95 | const previousTranscript = await transcriptResponse.json(); 96 | console.log(previousTranscript); 97 | ``` 98 | 99 | Make sure when using the above code that you replace `transcript_id` in the url with the ID of the transcript you are looking to fetch. 100 | -------------------------------------------------------------------------------- /core-transcription/make.com-speaker-labels.md: -------------------------------------------------------------------------------- 1 | # Iterate over Speaker Labels with Make.com 2 | 3 | ## Introduction 4 | 5 | This is a quick guide on how to iterate over speaker labels in Make.com. This guide will return speaker labels as a readable format to a Google Doc. The end result will look like the two images below. 6 | 7 | #### Make.com Scenario: 8 | 9 | make.com scenario on how to transcribe an audio with speaker labels using AssemblyAI 10 | 11 | #### Google Doc Transcript: 12 | 13 | A Google Doc with a transcript divided into speaker labels 14 | 15 | ## Instructions 16 | 17 | ### Step 1: Transcribe the Audio 18 | Create a scenario in Make.com. Add a new module. Search for and select the AssemblyAI app and select the "Transcribe an Audio File" module. Add an audio URL. Select speaker labels and other models you’d like to run. 19 | 20 | image of Transcribe an Audio File module setup 21 | 22 | ### Click Run once to retrieve data. 23 | 24 | image of make.com run button 25 | 26 | 27 | ### Step 2: Wait for Completion 28 | Next, add the “Wait until Transcript is Ready” module. 29 | 30 | image of “Wait until Transcript is Ready” module 31 | 32 | ### Select "ID" from the “Transcribe an Audio” module as input for the Transcript ID field. 33 | select ID for transcription 34 | 35 | ### Step 3: Get a Transcript 36 | Next, add the “Get a Transcript” module and select "ID" from the “Transcribe an Audio” module as input for the Transcript ID field. 37 | 38 | image Get a Transcript module 39 | 40 | ### Step 4: Create a Document 41 | Search for and add the Google Docs app. From there choose the “Create a Document” module. Connect your Google account and name the Doc what you’d like. Add some filler content as well. Additionally, choose where you’d like the Doc to be located. 42 | 43 | image of the“Create a Document” module 44 | 45 | ### Step 5: Iterator Tool 46 | Add the Iterator tool next. The speaker label data is in the utterances array. Select that array as input from the “Transcribe an Audio File" module. This tool will be used to perform a task for each utterance in the array. The next module will repeat its action for each utterance. 47 | 48 | image of iterator module 49 | 50 | ### Step 6: Write Speaker Labels Data to Google Doc 51 | Add a module and choose the “Insert a Paragraph” module from Google Docs. Connect your Google account if it’s not already connected (you may have to reconnect it if you get a failed to load error). In the “Select a Document” drop-down, choose "by mapping". In the Document ID input field select document ID from the “Create a document” module. For appended text, you can follow the format below for a readable format. 52 | 53 | image of insert a paragraph module 54 | 55 | ### Step 7: Run the Scenario. 56 | Run the scenario and you should get a Google Doc in your Drive with the speaker labels included. 57 | 58 | A Google Doc with a transcript divided into speaker labels -------------------------------------------------------------------------------- /core-transcription/retry-server-error.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# How to Implement Retry Server Error Logic\n", 8 | "\n", 9 | "In this guide, we'll show you how to setup automatic server error retry logic in your transcription process.\n", 10 | "\n", 11 | "Server Errors indicate an server-side issue during the transcription process. These rarely happen but can occasionally occur on our side. If a transcription fails due to a server error, we recommend that you resubmit the file for transcription to allow another server to process the audio. If the issue persists, please reach out to our Support team!: support@assemblyai.com\n", 12 | "\n", 13 | "This workflow is designed to automatically retry these transcripts if a server error is encountered.\n", 14 | "\n", 15 | "> **If your transcription fails due to a server error on our side, we will automatically retry the request up to three times. You can find this option in your [Account Settings](https://assemblyai.com/app/account).**\n", 16 | "\n", 17 | "## Get started\n", 18 | "\n", 19 | "Before we begin, make sure you have an AssemblyAI account and an API key. You can [sign up](https://assemblyai.com/dashboard/signup) for a free account and get your API key from your dashboard.\n", 20 | "\n", 21 | "## Step-by-step instructions\n", 22 | "\n", 23 | "Install the SDK:" 24 | ] 25 | }, 26 | { 27 | "cell_type": "code", 28 | "execution_count": null, 29 | "metadata": {}, 30 | "outputs": [], 31 | "source": [ 32 | "%pip install assemblyai" 33 | ] 34 | }, 35 | { 36 | "cell_type": "markdown", 37 | "metadata": {}, 38 | "source": [ 39 | "Import the `assemblyai` and `time` package and set your API key:" 40 | ] 41 | }, 42 | { 43 | "cell_type": "code", 44 | "execution_count": 2, 45 | "metadata": {}, 46 | "outputs": [], 47 | "source": [ 48 | "import assemblyai as aai\n", 49 | "import time\n", 50 | "\n", 51 | "aai.settings.api_key = \"YOUR_API_KEY\"" 52 | ] 53 | }, 54 | { 55 | "cell_type": "markdown", 56 | "metadata": {}, 57 | "source": [ 58 | "Create a function that handles errors that may occur during the transcription process. The default number of retires is 1. The default wait time before retranscribing is 5 seconds." 59 | ] 60 | }, 61 | { 62 | "cell_type": "code", 63 | "execution_count": 3, 64 | "metadata": {}, 65 | "outputs": [], 66 | "source": [ 67 | "def handle_error_transcription(audio_url, transcriber, config, retries=1, wait_time=5):\n", 68 | " for attempt in range(retries + 1):\n", 69 | " transcript = transcriber.transcribe(audio_url, config)\n", 70 | " if transcript.error == \"Server error, developers have been alerted\":\n", 71 | " if attempt < retries:\n", 72 | " print(f\"Encountered a server error. Retrying in {wait_time} second(s)...\")\n", 73 | " time.sleep(wait_time)\n", 74 | " else:\n", 75 | " print(\"Retry failed with a server error. Please contact AssemblyAI Support: support@assemblyai.com\")\n", 76 | " return None\n", 77 | " elif transcript.status == aai.TranscriptStatus.error:\n", 78 | " print(f\"Transcription failed: {transcript.error}\")\n", 79 | " return None\n", 80 | " else:\n", 81 | " return transcript" 82 | ] 83 | }, 84 | { 85 | "cell_type": "markdown", 86 | "metadata": {}, 87 | "source": [ 88 | "Define the audio file which you want to transcribe." 89 | ] 90 | }, 91 | { 92 | "cell_type": "code", 93 | "execution_count": 4, 94 | "metadata": {}, 95 | "outputs": [], 96 | "source": [ 97 | "audio_url = \"https://storage.googleapis.com/aai-web-samples/5_common_sports_injuries.mp3\"" 98 | ] 99 | }, 100 | { 101 | "cell_type": "markdown", 102 | "metadata": {}, 103 | "source": [ 104 | "Create a `Transcriber` object and specify features in `TranscriptionConfig`." 105 | ] 106 | }, 107 | { 108 | "cell_type": "code", 109 | "execution_count": 5, 110 | "metadata": {}, 111 | "outputs": [], 112 | "source": [ 113 | "transcriber = aai.Transcriber()\n", 114 | "config = aai.TranscriptionConfig()" 115 | ] 116 | }, 117 | { 118 | "cell_type": "markdown", 119 | "metadata": {}, 120 | "source": [ 121 | "Call the function to handle transcription with error handling. Specify number of retries and wait time. Return the transcribed text if transcription is successful." 122 | ] 123 | }, 124 | { 125 | "cell_type": "code", 126 | "execution_count": null, 127 | "metadata": {}, 128 | "outputs": [], 129 | "source": [ 130 | "transcript = handle_error_transcription(audio_url, transcriber, config, retries=1, wait_time=5)\n", 131 | "if transcript:\n", 132 | " print(transcript.text)" 133 | ] 134 | } 135 | ], 136 | "metadata": { 137 | "kernelspec": { 138 | "display_name": "venv", 139 | "language": "python", 140 | "name": "python3" 141 | }, 142 | "language_info": { 143 | "codemirror_mode": { 144 | "name": "ipython", 145 | "version": 3 146 | }, 147 | "file_extension": ".py", 148 | "mimetype": "text/x-python", 149 | "name": "python", 150 | "nbconvert_exporter": "python", 151 | "pygments_lexer": "ipython3", 152 | "version": "3.10.13" 153 | } 154 | }, 155 | "nbformat": 4, 156 | "nbformat_minor": 2 157 | } 158 | -------------------------------------------------------------------------------- /core-transcription/retry-upload-error.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# How to Implement Retry Upload Error Logic\n", 8 | "\n", 9 | "In this guide, we'll show you how to set up automatic upload error retry logic in your transcription process.\n", 10 | "\n", 11 | "Upload errors could be a result of a transient issue with our servers or they could be related to an issue with the file itself. Most likely the issue would be that the file is empty. Because the cause can be unclear at first, we recommend adding some retry logic to handle the rare occasions in which our upload service is experiencing performance issues. If the upload failure persists, you'll want to check whether the file is empty. If you're unclear on why the file is failing, please reach out to our support team at support@assemblyai.com.\n", 12 | "\n", 13 | "This workflow is designed to automatically retry file uploads if an upload error is encountered.\n", 14 | "\n", 15 | "\n", 16 | "## Get started\n", 17 | "\n", 18 | "Before we begin, make sure you have an AssemblyAI account and an API key. You can [sign up](https://assemblyai.com/dashboard/signup) for a free account and get your API key from your dashboard.\n", 19 | "\n", 20 | "## Step-by-step instructions\n", 21 | "\n", 22 | "Install the SDK:" 23 | ] 24 | }, 25 | { 26 | "cell_type": "code", 27 | "execution_count": null, 28 | "metadata": {}, 29 | "outputs": [], 30 | "source": [ 31 | "%pip install assemblyai" 32 | ] 33 | }, 34 | { 35 | "cell_type": "markdown", 36 | "metadata": {}, 37 | "source": [ 38 | "Import the `assemblyai` package and assemblyai's `TranscriptError`. Additionally import the and `time` package and set your API key:" 39 | ] 40 | }, 41 | { 42 | "cell_type": "code", 43 | "execution_count": 4, 44 | "metadata": {}, 45 | "outputs": [], 46 | "source": [ 47 | "import assemblyai as aai\n", 48 | "import time\n", 49 | "from assemblyai.types import TranscriptError\n", 50 | "\n", 51 | "# Set API key\n", 52 | "aai.settings.api_key = \"YOUR_API_KEY\"" 53 | ] 54 | }, 55 | { 56 | "cell_type": "markdown", 57 | "metadata": {}, 58 | "source": [ 59 | "Create a function that retries upload failures. This example retries up to 3 times with a delay of 5 seconds each time." 60 | ] 61 | }, 62 | { 63 | "cell_type": "code", 64 | "execution_count": null, 65 | "metadata": {}, 66 | "outputs": [], 67 | "source": [ 68 | "def transcribe_with_upload_retry(file_path, retries=3, delay=5):\n", 69 | " transcriber = aai.Transcriber()\n", 70 | "\n", 71 | " for attempt in range(retries):\n", 72 | " try:\n", 73 | " # Attempt to transcribe the file\n", 74 | " config = aai.TranscriptionConfig(speaker_labels=True)\n", 75 | " transcript = transcriber.transcribe(file_path, config)\n", 76 | " return transcript\n", 77 | " \n", 78 | " except TranscriptError as e:\n", 79 | " # Handle specific error if upload fails\n", 80 | " print(f\"Attempt {attempt + 1} failed. {e}\")\n", 81 | " \n", 82 | " # Retry if a TranscriptError occurs, \n", 83 | " if attempt + 1 < retries:\n", 84 | " print(f\"Retrying in {delay} seconds...\")\n", 85 | " time.sleep(delay)\n", 86 | " else:\n", 87 | " raise e # Raise the error after max retries\n", 88 | "\n", 89 | " print(\"Max retries reached. Transcription failed.\")\n", 90 | " return None\n", 91 | "\n", 92 | "# Call the function\n", 93 | "transcribe_with_upload_retry(\"audio.mp3\")" 94 | ] 95 | } 96 | ], 97 | "metadata": { 98 | "kernelspec": { 99 | "display_name": "base", 100 | "language": "python", 101 | "name": "python3" 102 | }, 103 | "language_info": { 104 | "codemirror_mode": { 105 | "name": "ipython", 106 | "version": 3 107 | }, 108 | "file_extension": ".py", 109 | "mimetype": "text/x-python", 110 | "name": "python", 111 | "nbconvert_exporter": "python", 112 | "pygments_lexer": "ipython3", 113 | "version": "3.12.4" 114 | } 115 | }, 116 | "nbformat": 4, 117 | "nbformat_minor": 2 118 | } 119 | -------------------------------------------------------------------------------- /core-transcription/speaker_labelled_subtitles.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "nbformat": 4, 3 | "nbformat_minor": 0, 4 | "metadata": { 5 | "colab": { 6 | "provenance": [] 7 | }, 8 | "kernelspec": { 9 | "name": "python3", 10 | "display_name": "Python 3" 11 | }, 12 | "language_info": { 13 | "name": "python" 14 | } 15 | }, 16 | "cells": [ 17 | { 18 | "cell_type": "markdown", 19 | "source": [ 20 | "# Creating Subtitles with Speaker Labels" 21 | ], 22 | "metadata": { 23 | "id": "9ie0NWRRzFwp" 24 | } 25 | }, 26 | { 27 | "cell_type": "markdown", 28 | "source": [ 29 | "This Colab will demonstrate how to use AssemblyAI's [Speaker Diarization](https://www.assemblyai.com/docs/speech-to-text/speaker-diarization) model together to format subtitles according to their respective speaker.\n", 30 | "\n", 31 | "# Getting Started\n", 32 | "Before we begin, make sure you have an AssemblyAI account and an API key. You can [sign up](https://www.assemblyai.com/dashboard/signup) for an AssemblyAI account and get your API key from your [dashboard](https://www.assemblyai.com/app/account).\n", 33 | "\n" 34 | ], 35 | "metadata": { 36 | "id": "Dwd9Zxmo4olH" 37 | } 38 | }, 39 | { 40 | "cell_type": "code", 41 | "execution_count": null, 42 | "metadata": { 43 | "id": "Pc6nDAMqzBDE" 44 | }, 45 | "outputs": [], 46 | "source": [ 47 | "!pip install assemblyai" 48 | ] 49 | }, 50 | { 51 | "cell_type": "markdown", 52 | "source": [ 53 | "First, we will configure our API key as well as our file to be transcribed. Then, we decide on a number of words we want to have per subtitle.\n", 54 | "\n", 55 | "Lastly, we transcribe our file." 56 | ], 57 | "metadata": { 58 | "id": "zFGsz9YlGIlQ" 59 | } 60 | }, 61 | { 62 | "cell_type": "code", 63 | "source": [ 64 | "import assemblyai as aai\n", 65 | "\n", 66 | "# SETTINGS\n", 67 | "aai.settings.api_key = \"YOUR-API-KEY\"\n", 68 | "filename = \"YOUR-FILE-NAME\"\n", 69 | "transcriber = aai.Transcriber(config=aai.TranscriptionConfig(speaker_labels=True))\n", 70 | "transcript = transcriber.transcribe(filename)\n", 71 | "\n", 72 | "# Maximum number of words per subtitle\n", 73 | "max_words_per_subtitle = 6" 74 | ], 75 | "metadata": { 76 | "id": "FnJfmd174yyo" 77 | }, 78 | "execution_count": null, 79 | "outputs": [] 80 | }, 81 | { 82 | "cell_type": "markdown", 83 | "source": [ 84 | "## How the code works\n", 85 | "`speaker_colors` is a dictionary that maps speaker identifiers (like \"A\", \"B\", \"C\", etc.) to specific colors. Each speaker in the transcription will be associated with a unique color in the subtitles.\n", 86 | "\n", 87 | "When Speaker Diarization is enabled, sentences in our API response have a speaker code under the `speaker` key. We use the speaker code to determine the color of the subtitle text." 88 | ], 89 | "metadata": { 90 | "id": "YEQu3RJgImp3" 91 | } 92 | }, 93 | { 94 | "cell_type": "code", 95 | "source": [ 96 | "# Color assignments for speakers\n", 97 | "speaker_colors = {\n", 98 | " \"A\": \"red\",\n", 99 | " \"B\": \"orange\",\n", 100 | " \"C\": \"yellow\",\n", 101 | " \"D\": \"yellowgreen\",\n", 102 | " \"E\": \"green\",\n", 103 | " \"F\": \"lightskyblue\",\n", 104 | " \"G\": \"purple\",\n", 105 | " \"H\": \"mediumpurple\",\n", 106 | " \"I\": \"pink\",\n", 107 | " \"J\": \"brown\",\n", 108 | "}\n", 109 | "\n", 110 | "# Process transcription segments\n", 111 | "def process_segments(segments):\n", 112 | " srt_content = \"\"\n", 113 | " subtitle_index = 1\n", 114 | " for segment in segments:\n", 115 | " speaker = segment.speaker\n", 116 | " color = speaker_colors.get(speaker, \"black\") # Default color is black\n", 117 | "\n", 118 | " # Split text into words and group into chunks\n", 119 | " words = segment.words\n", 120 | " for i in range(0, len(words), max_words_per_subtitle):\n", 121 | " chunk = words[i:i + max_words_per_subtitle]\n", 122 | " start_time = chunk[0].start # -1 indicates continuation\n", 123 | " end_time = chunk[-1].end\n", 124 | " srt_content += create_subtitle(subtitle_index, start_time, end_time, chunk, color)\n", 125 | " subtitle_index += 1\n", 126 | "\n", 127 | " return srt_content\n", 128 | "\n", 129 | "\n", 130 | "\n", 131 | "\n", 132 | "# Create a single subtitle\n", 133 | "def create_subtitle(index, start_time, end_time, words, color):\n", 134 | " \"\"\"\n", 135 | " If you're previewing this code on GitHub, take note that this function generates HTML for color styling,\n", 136 | " and this HTML is not rendered in the GitHub preview. Run the notebook locally to see the actual code.\n", 137 | " Visit this link to learn more about formatting SRT files: https://docs.fileformat.com/video/srt/#formatting-of-srt-files\n", 138 | " \"\"\"\n", 139 | " text = \"\"\n", 140 | " for word in words:\n", 141 | " text += word.text + ' '\n", 142 | " start_srt = format_time(start_time)\n", 143 | " end_srt = format_time(end_time)\n", 144 | " return f\"{index}\\n{start_srt} --> {end_srt}\\n{text}\\n\\n\"\n", 145 | "\n", 146 | "# Format time in SRT style\n", 147 | "def format_time(milliseconds):\n", 148 | " hours, remainder = divmod(milliseconds, 3600000)\n", 149 | " minutes, remainder = divmod(remainder, 60000)\n", 150 | " seconds, milliseconds = divmod(remainder, 1000)\n", 151 | " return f\"{int(hours):02}:{int(minutes):02}:{int(seconds):02},{int(milliseconds):03}\"" 152 | ], 153 | "metadata": { 154 | "id": "gkOsP0KAFmEY" 155 | }, 156 | "execution_count": 2, 157 | "outputs": [] 158 | }, 159 | { 160 | "cell_type": "markdown", 161 | "source": [ 162 | "Our last step is to generate and save our subtitle file!" 163 | ], 164 | "metadata": { 165 | "id": "4guym9y3KZZ3" 166 | } 167 | }, 168 | { 169 | "cell_type": "code", 170 | "source": [ 171 | "# Generate SRT content\n", 172 | "sentences = transcript.get_sentences()\n", 173 | "srt_content = process_segments(sentences)\n", 174 | "\n", 175 | "# Save to SRT file\n", 176 | "with open(filename + '.srt', 'w') as file:\n", 177 | " file.write(srt_content)\n", 178 | "\n", 179 | "print(f\"SRT file generated: {filename}.srt\")" 180 | ], 181 | "metadata": { 182 | "id": "0rXnMeJWFsF1" 183 | }, 184 | "execution_count": null, 185 | "outputs": [] 186 | } 187 | ] 188 | } -------------------------------------------------------------------------------- /core-transcription/split_audio_file/README.md: -------------------------------------------------------------------------------- 1 | # Splitting audio file into shorter files 2 | In this code snippet, we split an audio file into shorter files. You can specify the increments of each split file (in seconds) in the `duration` variable. This can be used for mutliple use cases which can include: asynchronous batch processing, splitting files that exceed the audio length limit (10 hours for the /v2/transcript endpoint) and more. 3 | 4 | Once the script has processed, the split files will be stored in the `output` folder which can be changed to any desired folder. 5 | 6 | ## How To Run the Project 7 | 8 | ### Instructions 9 | 10 | 1. Clone the repo to your local machine. 11 | 2. Open a terminal in the main directory housing the project. 12 | 3. Add your audio files to the `/split_audio_file` folder 13 | 4. Run `pip install -r requirements.txt` to ensure all dependencies are installed. 14 | 5. Specify variables `input_file`, `output_folder` and `duration` 15 | 6. Run `split.py` 16 | 17 | ## File Size Limits 18 | There is both a maximum file size and a maximum audio duration for files that can be submitted to the API. The maximum file size that can be submitted to the /v2/transcript endpoint for transcription is 5GB. The maximum file size for a local file uploaded to the API via the /v2/upload endpoint is 2.2GB. 19 | 20 | The maximum audio duration for a file submitted to the /v2/transcript endpoint for transcription is 10 hours. 21 | 22 | 23 | ## Contact Us 24 | 25 | If you have any questions, please feel free to reach out to our Support team - [support@assemblyai.com](mailto:support@assemblyai.com) or in our Community Discord! 26 | -------------------------------------------------------------------------------- /core-transcription/split_audio_file/audio.mp3: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AssemblyAI/cookbook/c06fa0906e39c415953c102c338856812071e6ef/core-transcription/split_audio_file/audio.mp3 -------------------------------------------------------------------------------- /core-transcription/split_audio_file/output/part_1.mp3: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AssemblyAI/cookbook/c06fa0906e39c415953c102c338856812071e6ef/core-transcription/split_audio_file/output/part_1.mp3 -------------------------------------------------------------------------------- /core-transcription/split_audio_file/output/part_2.mp3: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AssemblyAI/cookbook/c06fa0906e39c415953c102c338856812071e6ef/core-transcription/split_audio_file/output/part_2.mp3 -------------------------------------------------------------------------------- /core-transcription/split_audio_file/output/part_3.mp3: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AssemblyAI/cookbook/c06fa0906e39c415953c102c338856812071e6ef/core-transcription/split_audio_file/output/part_3.mp3 -------------------------------------------------------------------------------- /core-transcription/split_audio_file/output/part_4.mp3: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AssemblyAI/cookbook/c06fa0906e39c415953c102c338856812071e6ef/core-transcription/split_audio_file/output/part_4.mp3 -------------------------------------------------------------------------------- /core-transcription/split_audio_file/output/part_5.mp3: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AssemblyAI/cookbook/c06fa0906e39c415953c102c338856812071e6ef/core-transcription/split_audio_file/output/part_5.mp3 -------------------------------------------------------------------------------- /core-transcription/split_audio_file/requirements.txt: -------------------------------------------------------------------------------- 1 | pydub==0.25.1 2 | -------------------------------------------------------------------------------- /core-transcription/split_audio_file/split.py: -------------------------------------------------------------------------------- 1 | from pydub import AudioSegment 2 | import os 3 | import math 4 | 5 | def split_audio(input_file, output_folder, duration): 6 | audio = AudioSegment.from_mp3(input_file) 7 | total_length = len(audio) 8 | num_parts = math.ceil(total_length / (duration * 1000)) 9 | 10 | for i in range(num_parts): 11 | start = i * duration * 1000 12 | end = (i + 1) * duration * 1000 13 | split_audio = audio[start:end] 14 | output_path = os.path.join(output_folder, f"part_{i+1}.mp3") 15 | split_audio.export(output_path, format="mp3") 16 | print(f"Exported {output_path}") 17 | 18 | total_length = len(audio) 19 | num_parts = math.ceil(total_length / (duration * 1000)) 20 | 21 | input_file = "audio.mp3" # Replace with your input mp3 file 22 | output_folder = "output" # Output folder for the split audio files 23 | duration = 30 # Duration in seconds for each split audio file 24 | 25 | split_audio(input_file, output_folder, duration) -------------------------------------------------------------------------------- /core-transcription/subtitle_creation_by_word_count.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "nbformat": 4, 3 | "nbformat_minor": 0, 4 | "metadata": { 5 | "colab": { 6 | "provenance": [] 7 | }, 8 | "kernelspec": { 9 | "name": "python3", 10 | "display_name": "Python 3" 11 | }, 12 | "language_info": { 13 | "name": "python" 14 | } 15 | }, 16 | "cells": [ 17 | { 18 | "cell_type": "markdown", 19 | "source": [ 20 | "# Create custom length subtitles with AssemblyAI\n", 21 | "\n" 22 | ], 23 | "metadata": { 24 | "id": "6jpYYU6Eh14M" 25 | } 26 | }, 27 | { 28 | "cell_type": "markdown", 29 | "source": [ 30 | "While our SRT/VTT endpoints do allow you to customize the maximum number of characters per caption using the chars_per_caption URL parameter in your API requests, there are some use-cases that require a custom number of words in each subtitle.\n", 31 | "\n", 32 | "In this Colab, we will demonstrate how to construct these subtitles yourself in Python!" 33 | ], 34 | "metadata": { 35 | "id": "lOkGwZOeit5E" 36 | } 37 | }, 38 | { 39 | "cell_type": "markdown", 40 | "source": [ 41 | "## Step-by-Step Instructions" 42 | ], 43 | "metadata": { 44 | "id": "ejk9-USqjVpF" 45 | } 46 | }, 47 | { 48 | "cell_type": "code", 49 | "source": [ 50 | "pip install -U assemblyai\n" 51 | ], 52 | "metadata": { 53 | "id": "8K4rTtDAjefo" 54 | }, 55 | "execution_count": null, 56 | "outputs": [] 57 | }, 58 | { 59 | "cell_type": "markdown", 60 | "source": [ 61 | "Import the assemblyai package and set the API key.\n", 62 | "\n" 63 | ], 64 | "metadata": { 65 | "id": "MwwCHI1AjYdJ" 66 | } 67 | }, 68 | { 69 | "cell_type": "code", 70 | "source": [ 71 | "import assemblyai as aai\n", 72 | "\n", 73 | "aai.settings.api_key = \"YOUR-API-KEY\"" 74 | ], 75 | "metadata": { 76 | "id": "AyE7BUBnjfkt" 77 | }, 78 | "execution_count": null, 79 | "outputs": [] 80 | }, 81 | { 82 | "cell_type": "markdown", 83 | "source": [ 84 | "Create a Transcriber object.\n" 85 | ], 86 | "metadata": { 87 | "id": "vyDXhkqkjhXh" 88 | } 89 | }, 90 | { 91 | "cell_type": "code", 92 | "source": [ 93 | "transcriber = aai.Transcriber()" 94 | ], 95 | "metadata": { 96 | "id": "xrx_D4cxjjfh" 97 | }, 98 | "execution_count": null, 99 | "outputs": [] 100 | }, 101 | { 102 | "cell_type": "markdown", 103 | "source": [ 104 | "Use the Transcriber object's transcribe method and pass in the audio file's path as a parameter. The transcribe method saves the results of the transcription to the Transcriber object's transcript attribute." 105 | ], 106 | "metadata": { 107 | "id": "0Tnxu229jpDE" 108 | } 109 | }, 110 | { 111 | "cell_type": "code", 112 | "source": [ 113 | "transcript = transcriber.transcribe(\"./my-audio.mp3\")" 114 | ], 115 | "metadata": { 116 | "id": "yghza_7tjrgQ" 117 | }, 118 | "execution_count": null, 119 | "outputs": [] 120 | }, 121 | { 122 | "cell_type": "markdown", 123 | "source": [ 124 | "Alternatively, you can pass in the URL of the publicly accessible audio file on the internet.\n", 125 | "\n" 126 | ], 127 | "metadata": { 128 | "id": "dkOKPKBSjtA8" 129 | } 130 | }, 131 | { 132 | "cell_type": "code", 133 | "source": [ 134 | "transcript = transcriber.transcribe(\"https://storage.googleapis.com/aai-docs-samples/espn.m4a\")" 135 | ], 136 | "metadata": { 137 | "id": "dhZvEAl2joyF" 138 | }, 139 | "execution_count": null, 140 | "outputs": [] 141 | }, 142 | { 143 | "cell_type": "markdown", 144 | "source": [ 145 | "Define a function that converts seconds to timecodes" 146 | ], 147 | "metadata": { 148 | "id": "4a-Dbw8fj5x9" 149 | } 150 | }, 151 | { 152 | "cell_type": "code", 153 | "source": [ 154 | "def second_to_timecode(x: float) -> str:\n", 155 | " hour, x = divmod(x, 3600)\n", 156 | " minute, x = divmod(x, 60)\n", 157 | " second, x = divmod(x, 1)\n", 158 | " millisecond = int(x * 1000.)\n", 159 | "\n", 160 | " return '%.2d:%.2d:%.2d,%.3d' % (hour, minute, second, millisecond)" 161 | ], 162 | "metadata": { 163 | "id": "BdXWruwNkAZH" 164 | }, 165 | "execution_count": null, 166 | "outputs": [] 167 | }, 168 | { 169 | "cell_type": "markdown", 170 | "source": [ 171 | "Define a function that iterates through the transcripts object to construct a list according to the number of words per subtitle" 172 | ], 173 | "metadata": { 174 | "id": "N6sZzPD7kE26" 175 | } 176 | }, 177 | { 178 | "cell_type": "code", 179 | "execution_count": null, 180 | "metadata": { 181 | "id": "BHtOhJqkh0KI" 182 | }, 183 | "outputs": [], 184 | "source": [ 185 | "def generate_subtitles_by_word_count(transcript, words_per_line):\n", 186 | " output = []\n", 187 | " subtitle_index = 1 # Start subtitle index at 1\n", 188 | " word_count = 0\n", 189 | " current_words = []\n", 190 | "\n", 191 | " for sentence in transcript.get_sentences():\n", 192 | " for word in sentence.words:\n", 193 | " current_words.append(word)\n", 194 | " word_count += 1\n", 195 | " if word_count >= words_per_line or word == sentence.words[-1]:\n", 196 | " start_time = second_to_timecode(current_words[0].start / 1000)\n", 197 | " end_time = second_to_timecode(current_words[-1].end / 1000)\n", 198 | " subtitle_text = \" \".join([word.text for word in current_words])\n", 199 | " output.append(str(subtitle_index))\n", 200 | " output.append(\"%s --> %s\" % (start_time, end_time))\n", 201 | " output.append(subtitle_text)\n", 202 | " output.append(\"\")\n", 203 | " current_words = [] # Reset for the next subtitle\n", 204 | " word_count = 0 # Reset word count\n", 205 | " subtitle_index += 1\n", 206 | "\n", 207 | " return output" 208 | ] 209 | }, 210 | { 211 | "cell_type": "markdown", 212 | "source": [ 213 | "Generate your subtitle file" 214 | ], 215 | "metadata": { 216 | "id": "n-3pqh9ClGUn" 217 | } 218 | }, 219 | { 220 | "cell_type": "code", 221 | "source": [ 222 | "subs = generate_subtitles_by_word_count(transcript, 6)\n", 223 | "with open(f\"{transcript.id}.srt\", 'w') as o:\n", 224 | " final = '\\n'.join(subs)\n", 225 | " o.write(final)\n", 226 | "\n", 227 | "print(\"SRT file generated.\")" 228 | ], 229 | "metadata": { 230 | "id": "T8pOkDjQlOUi" 231 | }, 232 | "execution_count": null, 233 | "outputs": [] 234 | }, 235 | { 236 | "cell_type": "markdown", 237 | "source": [ 238 | "# Conclusion\n", 239 | "This is a useful script for those looking to customise the number of words in their captions instead of the number of characters!" 240 | ], 241 | "metadata": { 242 | "id": "Bo2TwNyOnd7F" 243 | } 244 | } 245 | ] 246 | } -------------------------------------------------------------------------------- /core-transcription/transcribe_batch_of_files/README.md: -------------------------------------------------------------------------------- 1 | # Transcribe a batch of audio files with AssemblyAI 2 | In this app, we submit a folder of files from the user's computer and then submit them to AssemblyAI for asynchronous transcription. Once a transcript completes, a Webhook message from AssemblyAI triggers a server function that saves the transcript into a text file. This is accomplished using FastAPI for our server, ngrok to expose our development server to the public Internet, and AssemblyAI's Python SDK. 3 | 4 | ## How To Install and Run the Project 5 | 6 | ### Prerequisites 7 | [An AssemblyAI account](https://www.assemblyai.com/dashboard/signup). We recommend upgrading to a Pro account which unlocks a [concurrency limit](https://www.assemblyai.com/docs/concepts/concurrency-limit) of 32 for faster transcriptions. 8 | [A free ngrok account](https://dashboard.ngrok.com/signup). 9 | 10 | ### Instructions 11 | 12 | 1. Clone the repo to your local machine. 13 | 2. Open a terminal in the main directory housing the project. 14 | 3. Add your audio files to be transcribed the `audio` directory 15 | 4. Run `pip install -r requirements.txt` to ensure all dependencies are installed. 16 | 5. Add your AssemblyAI key to line 5 of `receiver.py` and line 12 of `submitter.py`. 17 | 6. Export your authtoken from the ngrok dashboard as `NGROK_AUTHTOKEN` in your terminal using `export NGROK_AUTHTOKEN=YOUR_NGROK_TOKEN_HERE` 18 | 7. Start the server with the command `uvicorn receiver:app` (will run on port 8000). 19 | 8. Open a second terminal in the main directory of the project and start the submitter script with `python submitter.py` 20 | 9. Do not close or exit `submitter.py` until all of your transcripts are complete, as it keeps the public URL created by ngrok for your server alive. 21 | 22 | ## Further Documentation 23 | 24 | - [Using Webhooks with AssemblyAI](https://www.assemblyai.com/docs/concepts/webhooks) 25 | - [FastAPI](https://fastapi.tiangolo.com/) 26 | 27 | ## Contact Us 28 | 29 | If you have any questions, please feel free to reach out to our Support team - [support@assemblyai.com](mailto:support@assemblyai.com) or in our Community Discord! 30 | -------------------------------------------------------------------------------- /core-transcription/transcribe_batch_of_files/audio/audio.mp3: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AssemblyAI/cookbook/c06fa0906e39c415953c102c338856812071e6ef/core-transcription/transcribe_batch_of_files/audio/audio.mp3 -------------------------------------------------------------------------------- /core-transcription/transcribe_batch_of_files/receiver.py: -------------------------------------------------------------------------------- 1 | from fastapi import FastAPI 2 | import assemblyai as aai 3 | from pydantic import BaseModel 4 | 5 | aai.settings.api_key = "YOUR-API-KEY" 6 | 7 | app = FastAPI() 8 | 9 | class Result(BaseModel): 10 | status: str 11 | transcript_id: str 12 | 13 | @app.post("/") 14 | async def retrieve(filename: str, result: Result): 15 | if result.status == "completed": 16 | transcription_result = aai.Transcript.get_by_id(result.transcript_id) 17 | with open(f"transcripts/{filename}.txt", "w") as f: 18 | f.write(transcription_result.text) 19 | elif result.status == "error": 20 | transcription_result = aai.Transcript.get_by_id(result.transcript_id) 21 | print("Error: ", transcription_result.error) -------------------------------------------------------------------------------- /core-transcription/transcribe_batch_of_files/requirements.txt: -------------------------------------------------------------------------------- 1 | fastapi[all] 2 | assemblyai 3 | ngrok -------------------------------------------------------------------------------- /core-transcription/transcribe_batch_of_files/submitter.py: -------------------------------------------------------------------------------- 1 | import ngrok 2 | import os 3 | import assemblyai as aai 4 | 5 | listener = ngrok.connect(8000, authtoken_from_env=True) 6 | public_url = listener.url() 7 | print("Public url: ", public_url) 8 | 9 | aai.settings.api_key = "YOUR-API-KEY" 10 | transcriber = aai.Transcriber() 11 | 12 | for file in os.listdir("audio"): 13 | config = aai.TranscriptionConfig( 14 | webhook_url=f"{public_url}/?filename={file}" 15 | ) 16 | transcriber.submit(f"audio/{file}", config=config) 17 | 18 | while True: 19 | user_input = input("Press enter once your transcriptions are complete") 20 | if user_input: 21 | break 22 | -------------------------------------------------------------------------------- /core-transcription/transcribe_batch_of_files/transcripts/audio.mp3.txt: -------------------------------------------------------------------------------- 1 | AssemblyAI is a deep learning company that builds powerful APIs to help you transcribe and understand audio. The most common use case for the API is to automatically convert pre recorded audio and video files, as well as real time audio streams into text transcriptions. Our APIs convert audio and video into text using powerful deep learning models that we research and develop end to end in house. Millions of podcasts, zoom recordings, phone calls, or video files are being transcribed with AssemblyAI every single day. But where AssemblyAI really excels is with helping you understand your data. So let's say we transcribe Joe Biden's State of the Union using AssemblyAI's API, with our Auto Chapter feature, you can generate time coded summaries of the key moments of your audio file. For example, with the State of the Union address, we get chapter summaries like this auto Chapters automatically segments your audio or video files into chapters and provides a summary for each of these chapters. With sentiment analysis, we can classify what's being spoken in your audio files as either positive, negative, or neutral. So, for example, in the State of the Union address, we see that this sentence was classified as positive, whereas this sentence was classified as negative. Content Safety detection can flag sensitive content as it is spoken, like hate speech, profanity, violence, or weapons. For example, in Biden's State of the Union address, content safety detection flags parts of his speech as being about weapons. This feature is especially useful for automatic content moderation and brand safety use cases. With Auto highlights, you can automatically identify important words and phrases that are being spoken in your data. Owned by the State of the Union address, AssemblyAI's API detected these words and phrases as being important. Lastly, with entity detection, you can identify entities that are spoken in your audio, like organization names or person names. In Biden's speech, these were the entities that were detected. This is just a preview of the most popular features of AssemblyAI API. If you want a full list of features, go check out our documentation linked in the description below. And if you ever need some support, our team of developers is here to help. Every day, developers are using these features to build really exciting applications. From meeting summarizers, to brand safety or contextual targeting platforms, to full blown conversational intelligence tools, we can't wait to see what you build with AssemblyAI. -------------------------------------------------------------------------------- /core-transcription/transcribe_youtube_videos.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "id": "5YYNSNwi1Qju" 7 | }, 8 | "source": [ 9 | "# How to Get YouTube Video Transcripts with yt-dlp\n", 10 | "\n", 11 | "In this guide, we'll show you how to get YouTube transcripts.\n", 12 | "\n", 13 | "For this, we use the [yt-dlp](https://github.com/yt-dlp/yt-dlp) library to download YouTube videos and then transcribe it with the AssemblyAI API.\n", 14 | "\n", 15 | "`yt-dlp` is a [youtube-dl](https://github.com/ytdl-org/youtube-dl) fork with additional features and fixes. It is better maintained and preferred over `youtube-dl` nowadays.\n", 16 | "\n", 17 | "In this guide we'll show 2 different approaches:\n", 18 | "\n", 19 | "- Option 1: Download video via CLI\n", 20 | "- Option 2: Download video via code\n", 21 | "\n", 22 | "Let's get started!" 23 | ] 24 | }, 25 | { 26 | "cell_type": "markdown", 27 | "metadata": { 28 | "id": "UjDmHj8j1Kb8" 29 | }, 30 | "source": [ 31 | "## Install Dependencies\n", 32 | "\n", 33 | "Install [yt-dlp](https://github.com/yt-dlp/yt-dlp) and the [AssemblyAI Python SDK](https://github.com/AssemblyAI/assemblyai-python-sdk) via pip." 34 | ] 35 | }, 36 | { 37 | "cell_type": "code", 38 | "execution_count": null, 39 | "metadata": { 40 | "id": "cTj3fv9JNfec" 41 | }, 42 | "outputs": [], 43 | "source": [ 44 | "!pip install -U yt-dlp" 45 | ] 46 | }, 47 | { 48 | "cell_type": "code", 49 | "execution_count": null, 50 | "metadata": { 51 | "id": "bOwNZqwNRXeH" 52 | }, 53 | "outputs": [], 54 | "source": [ 55 | "!pip install assemblyai" 56 | ] 57 | }, 58 | { 59 | "cell_type": "markdown", 60 | "metadata": { 61 | "id": "6Odi9C4PC4Q_" 62 | }, 63 | "source": [ 64 | "## Option 1: Download video via CLI" 65 | ] 66 | }, 67 | { 68 | "cell_type": "markdown", 69 | "metadata": { 70 | "id": "izfLx--WaO9k" 71 | }, 72 | "source": [ 73 | "In this approach we download the YouTube video via the command line and then transcribe it via the AssemblyAI API. We use the following video here:\n", 74 | "\n", 75 | "- https://www.youtube.com/watch?v=wtolixa9XTg\n", 76 | "\n", 77 | "To download it, use the `yt-dlp` command with the following options:\n", 78 | "\n", 79 | "- `-f m4a/bestaudio`: The format should be the best audio version in m4a format.\n", 80 | "- `-o \"%(id)s.%(ext)s\"`: The output name should be the id followed by the extension. In this example, the video gets saved to \"wtolixa9XTg.m4a\".\n", 81 | "- `wtolixa9XTg`: the id of the video." 82 | ] 83 | }, 84 | { 85 | "cell_type": "code", 86 | "execution_count": 4, 87 | "metadata": { 88 | "colab": { 89 | "base_uri": "https://localhost:8080/" 90 | }, 91 | "id": "TmCGVktCF_DF", 92 | "outputId": "aa0948e0-4d2d-4b05-924e-699257a3618e" 93 | }, 94 | "outputs": [ 95 | { 96 | "name": "stdout", 97 | "output_type": "stream", 98 | "text": [ 99 | "[youtube] Extracting URL: wtolixa9XTg\n", 100 | "[youtube] wtolixa9XTg: Downloading webpage\n", 101 | "[youtube] wtolixa9XTg: Downloading ios player API JSON\n", 102 | "[youtube] wtolixa9XTg: Downloading android player API JSON\n", 103 | "[youtube] wtolixa9XTg: Downloading m3u8 information\n", 104 | "[info] wtolixa9XTg: Downloading 1 format(s): 140\n", 105 | "[download] Destination: wtolixa9XTg.m4a\n", 106 | "\u001b[K[download] 100% of 7.14MiB in \u001b[1;37m00:00:00\u001b[0m at \u001b[0;32m16.32MiB/s\u001b[0m\n", 107 | "[FixupM4a] Correcting container of \"wtolixa9XTg.m4a\"\n" 108 | ] 109 | } 110 | ], 111 | "source": [ 112 | "!yt-dlp -f m4a/bestaudio -o \"%(id)s.%(ext)s\" wtolixa9XTg" 113 | ] 114 | }, 115 | { 116 | "cell_type": "markdown", 117 | "metadata": { 118 | "id": "NxcsSZus7gru" 119 | }, 120 | "source": [ 121 | "Next, set up the AssemblyAI SDK and trancribe the file. Replace `YOUR_API_KEY` with your own key. If you don't have one, you can [sign up here](https://assemblyai.com/dashboard/signup) for free.\n", 122 | "\n", 123 | "Make sure that the path you pass to the `transcribe()` function corresponds to the saved filename." 124 | ] 125 | }, 126 | { 127 | "cell_type": "code", 128 | "execution_count": 5, 129 | "metadata": { 130 | "id": "nvoaFsS9al0U" 131 | }, 132 | "outputs": [], 133 | "source": [ 134 | "import assemblyai as aai\n", 135 | "\n", 136 | "aai.settings.api_key = \"YOUR_API_KEY\"\n", 137 | "\n", 138 | "transcriber = aai.Transcriber()\n", 139 | "transcript = transcriber.transcribe(\"wtolixa9XTg.m4a\")" 140 | ] 141 | }, 142 | { 143 | "cell_type": "code", 144 | "execution_count": 14, 145 | "metadata": { 146 | "colab": { 147 | "base_uri": "https://localhost:8080/", 148 | "height": 143 149 | }, 150 | "id": "NCYWChOz1453", 151 | "outputId": "d14b2dce-94c2-4e0f-f8a3-0892960829a0" 152 | }, 153 | "outputs": [ 154 | { 155 | "data": { 156 | "application/vnd.google.colaboratory.intrinsic+json": { 157 | "type": "string" 158 | }, 159 | "text/plain": [ 160 | "\"Hi everyone, I'm Patrick and in this video I show you how I would learn machine learning if I could start over. For context, I'm a machine learning developer advocate at Assemblyai and before that I worked several years as software developer and ML Engineer, and I also teach Python and machine learning on my own YouTube channel. So I would say I'm pretty experienced in the field, but I know that the available courses out there can be overwhelming for beginners, so I hope to give you some guide ends with this video. The demand for machine learning engineers is still increasing every year, so it's a great skill to have. I divided this learning path into seven steps that should take you about three months to finish. Of course, this can vary depending on how much time and effort you want to put into this, and I know that everyone learns differently or might have different goals. So this is just my personal take on how to learn machine learning. You can use this guide if you just want to explore machine learning as a hobby, but also if you plan to find a job in the field, I will mention a few more tips about the job search in the end. So let's jump into the study plan. The first thing I recommend is to lay the foundation with some math basics. Now you might say math is not really necessary anymore, and this is partly true. The available machine learning frameworks abstract the math away, and I know many machine learning engineers who don't need it in their day job at all. However, in my opinion, knowing the underlying math provides you with a better foundation and better understanding of how the algorithms work, and it makes your life easier when you run into problems. Also, I think there is beauty in the underlying math that makes the machines learn. So for me, knowing the math sparked my excitement even more. Now, you don't need to get too deep into this. A great website with free resources is Khan Academy. So my recommendation is just to take some basic courses and then move on. And then later when you do the actual machine learning course and don't understand everything, then come back here and learn the missing topics. Oh, and by the way, you find all the resources and recommended courses in the description below. The next step is to learn Python. It is the number one programming language for machine learning and there is no way around it. All major machine learning frameworks are built with it, and all major courses use Python for their exercises. So having decent Python skills is essential to build machine learning projects. Now you don't need to become an advanced software developer, but a little bit more than the beginner level would be great. One great thing about Python is that it is very beginner friendly, and in my opinion, it's the best first programming language you can learn. I recommend two free courses on YouTube, one four hour beginner course and one six hour intermediate course, and then you should have a solid base. This step is what I call the Machine learning tech stack and consists of the most important Python libraries for machine learning, data science, and data visualization. This step is a bit optional because you can also pick up these skills later when you do the actual machine learning course, but I think it's great to build the foundation first, and then it will be easier later. The three libraries I recommend at this point are numpy, which is the base for everything, Pandas, which is important for data handling, and Matplotlib, which is needed for visualization. These libraries are used in almost every machine learning project. That's why I would include them in your learning path at this point. Again, you don't need to learn too much here. I recommend just following one free crash course for each library, and then later pick up more advanced concepts if you need them. At this point, you don't have to learn the machine learning courses like ScikitLearn, TensorFlow, or Pytorch. You could, of course, if you want, but these are included in the machine learning course I show you in a moment, so you can pick these up later. Now that we've covered the coding skills, it is finally time for the actual machine learning course. There are many great ones available, but the most popular, and in my opinion, also one of the best ones, is the machine learning specialization by Andrew Ng on Coursera. This specialization includes three courses. It got revamped just a few months ago and now includes Python with Numpy, ScikitLearn and TensorFlow for the code. So you not only learn all the essential machine learning concepts, but also get your first hands on experience with the ML libraries. It is extensive and it takes several weeks to finish, but it's worth it after these courses. I have one more recommendation for you. I suggest to implement a few algorithms from scratch in Python using only pure Python and Numpy, for example, by following my ML from scratch playlist here on YouTube. This is completely optional, but it helped me to properly understand some of the concepts from Andrew's course, and a lot of students have told me the same feedback, so check it out if you want to. Also, we plan to release an updated version of the ML from Scratch course here on the Assemblyai channel, so make sure to subscribe to our channel and don't miss it. Now I recommend getting even more hands on and learning more about data preparation for this. Keckle has awesome free courses on their website. I recommend at least the intro to ML and intermediate ML courses. They are lightweight compared to the previous one and some material is just a refresher for you, but you learn more about data preprocessing and data preparation with pandas. Each lesson has a theory part and then some coding exercises. It also gives you a gentle introduction to the Kaggle platform and you learn how to make code submissions on Kaggle, which is perfect for the next point. Now it's time to practice as much as possible and apply your knowledge to real world machine learning problems. For this, the best platform is Kaggle.com. It provides thousands of different datasets and challenges where you can participate. Participating in challenges can motivate you a lot. Now, I wouldn't try making it to the top or even winning prize money with this, because to be honest, this requires true expertise and also a lot of GPU power. But I would still try to tweak your solutions multiple times by learning more about data pre processing and also about hyperparameter tuning. You can then use Kaggle competitions to build your portfolio and put them on your CV. So in my opinion, Kaggle is an awesome platform and you should practice here as much as possible. Of course, you can also tackle other machine learning problems outside of Kaggle. It just makes your life a little bit easier because it provides you with the datasets, a platform to evaluate the projects, and there's a whole community around it. At this point, you can already be super proud of yourself. And now in this last section, I want to give you a few more tips. If your goal is to get a job. The tasks of ML engineers vary a lot and it's not possible to know everything. For example, some positions are specialized in computer vision or NLP, or they require you to have experience with a specific ML framework or even MLOPs requirements, like how to deploy and scale ML apps. ML Ops is a whole field on its own, so I may cover this in a separate video. My point is, you have to decide in which field you want to work, and then look at the requirements and some corresponding job descriptions, and then specialize in this direction. Another great tip I can give you, and this is something I wish I had done earlier in my career, is to start a blog. You can write tutorials, share what you've learned, which projects you have built, which problems you have faced along the way, and how you've solved them by writing about a topic, you can deepen your knowledge and then you can use this as a resource on your CV. Trust me, this will increase your chances to get an interview a lot. All right, that's my recommendation for a machine learning study guide. Again, this might not be suited for everyone. This is just how I would learn machine learning if I had to start over. Just one more quick addition. If you prefer learning with books, then you can check out these two books. Let me know in the comments if this was helpful, or if you have any other suggestions you would add to the plan. Don't forget to check out the resource list I put in the description below and then I hope to see you in the next video. Bye.\"" 161 | ] 162 | }, 163 | "execution_count": 14, 164 | "metadata": {}, 165 | "output_type": "execute_result" 166 | } 167 | ], 168 | "source": [ 169 | "transcript.text" 170 | ] 171 | }, 172 | { 173 | "cell_type": "markdown", 174 | "metadata": { 175 | "id": "TsNgF7XjC6p6" 176 | }, 177 | "source": [ 178 | "## Option 2: Download video via code" 179 | ] 180 | }, 181 | { 182 | "cell_type": "markdown", 183 | "metadata": { 184 | "id": "sAgakTyCaT_H" 185 | }, 186 | "source": [ 187 | "In this approach we download the video with a Python script instead of the command line.\n", 188 | "\n", 189 | "You can download the file with the following code:" 190 | ] 191 | }, 192 | { 193 | "cell_type": "code", 194 | "execution_count": 7, 195 | "metadata": { 196 | "colab": { 197 | "base_uri": "https://localhost:8080/" 198 | }, 199 | "id": "yF26d-N7vHYf", 200 | "outputId": "c251d291-c729-4063-b91e-d9c67a09804c" 201 | }, 202 | "outputs": [ 203 | { 204 | "name": "stdout", 205 | "output_type": "stream", 206 | "text": [ 207 | "[youtube] Extracting URL: https://www.youtube.com/watch?v=wtolixa9XTg\n", 208 | "[youtube] wtolixa9XTg: Downloading webpage\n", 209 | "[youtube] wtolixa9XTg: Downloading ios player API JSON\n", 210 | "[youtube] wtolixa9XTg: Downloading android player API JSON\n", 211 | "[youtube] wtolixa9XTg: Downloading m3u8 information\n", 212 | "[info] wtolixa9XTg: Downloading 1 format(s): 140\n", 213 | "[download] wtolixa9XTg.m4a has already been downloaded\n", 214 | "[download] 100% of 7.13MiB\n", 215 | "[ExtractAudio] Not converting audio wtolixa9XTg.m4a; file is already in target format m4a\n" 216 | ] 217 | } 218 | ], 219 | "source": [ 220 | "import yt_dlp\n", 221 | "\n", 222 | "URLS = ['https://www.youtube.com/watch?v=wtolixa9XTg']\n", 223 | "\n", 224 | "ydl_opts = {\n", 225 | " 'format': 'm4a/bestaudio/best', # The best audio version in m4a format\n", 226 | " 'outtmpl': '%(id)s.%(ext)s', # The output name should be the id followed by the extension\n", 227 | " 'postprocessors': [{ # Extract audio using ffmpeg\n", 228 | " 'key': 'FFmpegExtractAudio',\n", 229 | " 'preferredcodec': 'm4a',\n", 230 | " }]\n", 231 | "}\n", 232 | "\n", 233 | "\n", 234 | "with yt_dlp.YoutubeDL(ydl_opts) as ydl:\n", 235 | " error_code = ydl.download(URLS)" 236 | ] 237 | }, 238 | { 239 | "cell_type": "markdown", 240 | "metadata": { 241 | "id": "V5eKUhDs9dTX" 242 | }, 243 | "source": [ 244 | "After downloading, you can again use the code from option 1 to transcribe the file:" 245 | ] 246 | }, 247 | { 248 | "cell_type": "code", 249 | "execution_count": null, 250 | "metadata": { 251 | "id": "2sfnjZC_9gaD" 252 | }, 253 | "outputs": [], 254 | "source": [ 255 | "import assemblyai as aai\n", 256 | "\n", 257 | "aai.settings.api_key = \"YOUR_API_KEY\"\n", 258 | "\n", 259 | "transcriber = aai.Transcriber()\n", 260 | "transcript = transcriber.transcribe(\"wtolixa9XTg.m4a\")" 261 | ] 262 | } 263 | ], 264 | "metadata": { 265 | "colab": { 266 | "provenance": [] 267 | }, 268 | "kernelspec": { 269 | "display_name": "Python 3", 270 | "name": "python3" 271 | }, 272 | "language_info": { 273 | "name": "python" 274 | } 275 | }, 276 | "nbformat": 4, 277 | "nbformat_minor": 0 278 | } 279 | -------------------------------------------------------------------------------- /core-transcription/transcribing-github-files.md: -------------------------------------------------------------------------------- 1 | ## **Transcribing Files Hosted on GitHub with AssemblyAI** 2 | 3 | ### Step 1: Upload Your Audio Files to a Public GitHub Repository 4 | 5 | - **File Requirements**: GitHub has a file size limit of 100MB so ensure your audio files are 100MB in size or less. The files must be in a public repository otherwise you will receive an error saying the file is not publicly accessible. For a more secure way to host files check out our [Transcribing from an S3 Bucket Cookbook](transcribe_from_s3.ipynb). 6 | 7 | ### Step 2: Obtain the Raw Audio URL from GitHub 8 | 9 | 1. Navigate to the repository that houses the audio file. 10 | 2. Click on the audio file. On the next page, right-click the "View raw" link and select "copy the link address" from the context menu. 11 | 12 | An image of an audio file in a GitHub repository 13 | 14 | Downloadable file URLs are formatted as `"https://github.com///raw//"` 15 | 16 | ### Step 3: Add the Audio URL to your Request 17 | 18 | ``` 19 | POST v2/transcript endpoint 20 | 21 | { 22 | "audio_url":"https://github.com/user/audio-files/raw/main/audio.mp3" 23 | } 24 | ``` 25 | 26 | ``` 27 | Python SDK 28 | 29 | transcript = transcriber.transcribe("https://github.com/user/audio-files/raw/main/audio.mp3") 30 | ``` 31 | 32 | ``` 33 | Typescript SDK 34 | 35 | const transcript = await client.transcripts.transcribe("https://github.com/user/audio-files/raw/main/audio.mp3") 36 | ``` 37 | 38 | ### **Resources** 39 | [AssemblyAI's Supported File Types](https://www.assemblyai.com/docs/concepts/faq)
40 | [Transcribe an Audio File](https://www.assemblyai.com/docs/getting-started/transcribe-an-audio-file) 41 | -------------------------------------------------------------------------------- /core-transcription/transcribing-google-drive-file.md: -------------------------------------------------------------------------------- 1 | ## **Guide to Using Google Drive Links with AssemblyAI** 2 | 3 | ### **Step 1: Upload Your Audio File to Google Drive** 4 | 5 | - **File Requirements**: Ensure your audio file is smaller than 100MB, as files larger than this cannot be directly downloaded from Google Drive links. 6 | - **Uploading**: Log into your Google Drive account and upload the audio file you want to use. 7 | 8 | ### **Step 2: Make Your File Publicly Accessible** 9 | 10 | - **Right-Click** on the uploaded file in Google Drive. 11 | - Select **'Get Link'**. 12 | - Change the setting from “Restricted” to “Anyone with the link”. This makes the file publicly accessible. 13 | 14 | 15 | ### **Step 3: Obtain the Downloadable URL** 16 | 17 | - Click on `Copy link` to copy your shared link. 18 | - Initially, the shared link will look something like this: https://drive.google.com/file/d/1YvY3gX-4ZwY7K4r3J0THKNTvvolB3D-S/view?usp=sharing. 19 | - To make it a downloadable link, modify it to this format: 20 | `https://drive.google.com/u/0/uc?id=FILE_ID&export=download`. 21 | - **Example**: If your shared link is `https://drive.google.com/file/d/1YvY3gX-4ZwY7K4r3J0THKNTvvolB3D-S/view?usp=sharing`, 22 | change it to `https://drive.google.com/u/0/uc?id=1YvY3gX-4ZwY7K4r3J0THKNTvvolB3D-S&export=download`. 23 | 24 | ![Screenshot of Google Drive settings](https://cdn.discordapp.com/attachments/385968901797707783/1183781584491520082/image.png?ex=65899583&is=65772083&hm=99867e15b81e7b5e3917b2ae88367d8ab44a2de4613aa91e857938fc8f4b6120&) 25 | 26 | ### **Step 4: Use the URL with AssemblyAI** 27 | 28 | - Now, you can use this downloadable link in your AssemblyAI API request. This URL directly points to your audio file, allowing AssemblyAI to access and process it. 29 | 30 | ``` 31 | transcriber = aai.Transcriber() 32 | 33 | audio_url = ( 34 | "https://storage.googleapis.com/aai-web-samples/5_common_sports_injuries.mp3" 35 | ) 36 | 37 | transcript = transcriber.transcribe(audio_url) 38 | ``` 39 | ### **Notes** 40 | 41 | - **Security**: Ensure that sharing your audio file publicly complies with your privacy and security policies. 42 | - If you prefer not to share your file publicly, you can [upload your file to our servers instead.](https://www.assemblyai.com/docs/guides/transcribing-an-audio-file#step-by-step-instructions) 43 | - **File Format**: Check that your audio file is in a format supported by AssemblyAI. 44 | -------------------------------------------------------------------------------- /guide-images/make-create-doc.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AssemblyAI/cookbook/c06fa0906e39c415953c102c338856812071e6ef/guide-images/make-create-doc.png -------------------------------------------------------------------------------- /guide-images/make-final-transcript.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AssemblyAI/cookbook/c06fa0906e39c415953c102c338856812071e6ef/guide-images/make-final-transcript.png -------------------------------------------------------------------------------- /guide-images/make-get-id.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AssemblyAI/cookbook/c06fa0906e39c415953c102c338856812071e6ef/guide-images/make-get-id.png -------------------------------------------------------------------------------- /guide-images/make-get-transcript.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AssemblyAI/cookbook/c06fa0906e39c415953c102c338856812071e6ef/guide-images/make-get-transcript.png -------------------------------------------------------------------------------- /guide-images/make-insert-paragraph.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AssemblyAI/cookbook/c06fa0906e39c415953c102c338856812071e6ef/guide-images/make-insert-paragraph.png -------------------------------------------------------------------------------- /guide-images/make-iterator.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AssemblyAI/cookbook/c06fa0906e39c415953c102c338856812071e6ef/guide-images/make-iterator.png -------------------------------------------------------------------------------- /guide-images/make-run.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AssemblyAI/cookbook/c06fa0906e39c415953c102c338856812071e6ef/guide-images/make-run.png -------------------------------------------------------------------------------- /guide-images/make-scenario.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AssemblyAI/cookbook/c06fa0906e39c415953c102c338856812071e6ef/guide-images/make-scenario.png -------------------------------------------------------------------------------- /guide-images/make-transcribe-audio.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AssemblyAI/cookbook/c06fa0906e39c415953c102c338856812071e6ef/guide-images/make-transcribe-audio.png -------------------------------------------------------------------------------- /guide-images/make-wait-for-completion.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AssemblyAI/cookbook/c06fa0906e39c415953c102c338856812071e6ef/guide-images/make-wait-for-completion.png -------------------------------------------------------------------------------- /guide-images/view-raw.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AssemblyAI/cookbook/c06fa0906e39c415953c102c338856812071e6ef/guide-images/view-raw.png -------------------------------------------------------------------------------- /lemur/README.md: -------------------------------------------------------------------------------- 1 | # LeMUR 🐾 2 | 3 | Apply Large Language Models to spoken data. A Large Language Model (LLM) is a machine learning model that uses natural language processing (NLP) to generate text. LeMUR is a framework that lets you apply LLMs to audio transcripts, for example to ask questions about a call, or to summarize a meeting. 4 | 5 | ## All LeMUR Cookbooks 6 | 7 | ### Basic LeMUR Workflows 8 | 9 | [Process Audio Files with LLMs](using-lemur.ipynb)\ 10 | [Use the Specialized Endpoints](specialized-endpoints.ipynb)\ 11 | 🆕 [Receive AI Coaching from the Task Endpoint](task-endpoint-ai-coach.ipynb)\ 12 | 🆕 [Generate Action Items Using the Task Endpoint](task-endpoint-action-items.ipynb)\ 13 | 🆕 [Ask Questions about a Transcript Using the Task Endpoint](task-endpoint-structured-QA.ipynb)\ 14 | [Estimating Input Token Costs](counting-tokens.ipynb) 15 | 16 | ### Analyze Speakers with LeMUR 17 | 18 | [Processing Speaker Labels with the Custom Text Input Parameter](input-text-speaker-labels.ipynb) 19 | [Speaker Identification](speaker-identification.ipynb) 20 | 21 | ### Get Quotes and Citations with LeMUR 22 | 23 | [Extract Dialogue Data](dialogue-data.ipynb)\ 24 | [Extract Citations from a Transcript with Semantic Search](transcript-citations.ipynb)\ 25 | [Extract Quotes from a Transcript with the Custom Text Input Parameter](timestamped-transcripts.ipynb)\ 26 | 🆕 [Create Transcript Citations Using OpenAI Embeddings](lemur-transcript-citations.ipynb) 27 | 28 | ### Substitute Audio Intelligence with LeMUR 29 | 30 | | Model/Feature | Use with LeMUR | 31 | |----------------|-----------------------------------| 32 | | **Sentiment Analysis** | [Customer Call Sentiment Analysis](call-sentiment-analysis.ipynb) | 33 | | **Custom Vocabulary** | [Boost Transcription Accuracy](custom-vocab-lemur.ipynb) | 34 | | **Auto Chapters** | [Creating Chapter Summaries with the Custom Text Input Parameter](input-text-chapters.ipynb) | 35 | | **Summarization** | [Create Custom Summaries using the Task Endpoint](task-endpoint-custom-summary.ipynb) | 36 | | **Topic Detection** | 🆕 [Create Custom Topic Tags](custom-topic-tags.ipynb) | 37 | 38 | ### Use Case-Specific LeMUR Workflows 39 | 40 | [Implement a Sales Playbook](sales-playbook.ipynb)\ 41 | [How to Pass Context from Previous LeMUR Requests](past-response-prompts.ipynb)\ 42 | [Generate Action Items from a Meeting](meeting-action-items.ipynb)\ 43 | [Phone Call Segmentation](phone-call-segmentation.ipynb)\ 44 | [SOAP Note Generation](soap-note-generation.ipynb) 45 | -------------------------------------------------------------------------------- /registry.yaml: -------------------------------------------------------------------------------- 1 | # yaml-language-server: $schema=./.github/registry_schema.json 2 | 3 | # This file is used to generate our docs cookbook page. It specifies which paths 4 | # each guide is under, and indicates metadata such as tags and a title for each. 5 | 6 | - title: Audio Intelligence - Creating summarized chapters from podcasts 7 | path: audio-intelligence/auto_chapters.ipynb 8 | tags: 9 | - audio-intelligence 10 | 11 | - title: Audio Intelligence - Identify Hate Speech in Audio and Video Files 12 | path: audio-intelligence/content_moderation.ipynb 13 | tags: 14 | - audio-intelligence 15 | 16 | - title: Audio Intelligence - PII Redaction Using Entity Detection 17 | path: audio-intelligence/entity_redaction.ipynb 18 | tags: 19 | - audio-intelligence 20 | 21 | - title: Audio Intelligence - Identify Highlights in Audio and Video Files 22 | path: audio-intelligence/key_phrases.ipynb 23 | tags: 24 | - audio-intelligence 25 | 26 | - title: Speech-To-Text - Identify Speakers in Audio Recordings 27 | path: core-transcription/speaker_labels.ipynb 28 | tags: 29 | - speech-to-text 30 | 31 | - title: Audio Intelligence - Summarizing Virtual Meetings 32 | path: audio-intelligence/summarization.ipynb 33 | tags: 34 | - audio-intelligence 35 | 36 | - title: Speech-To-Text - Transcribe Multiple Files Simultaneously 37 | path: core-transcription/SDK_transcribe_batch_of_files/batch_transcription.ipynb 38 | tags: 39 | - speech-to-text 40 | 41 | - title: Speech-To-Text - Split Audio File into Shorter Files 42 | path: core-transcription/split_audio_file 43 | tags: 44 | - speech-to-text 45 | 46 | - title: Speech-To-Text - Route to Default Language if Language Detection Confidence is Low 47 | path: core-transcription/automatic-language-detection-route-default-language-python.ipynb 48 | tags: 49 | - speech-to-text 50 | 51 | - title: Speech-To-Text - Guide to Common Errors for New AssemblyAI Users 52 | path: core-transcription/common_errors_and_solutions.md 53 | tags: 54 | - speech-to-text 55 | 56 | - title: Speech-To-Text - Deleting an AssemblyAI Transcript 57 | path: core-transcription/delete_transcript.ipynb 58 | tags: 59 | - speech-to-text 60 | 61 | - title: Speech-To-Text - Detecting Low Confidence Words in a Transcript 62 | path: core-transcription/detecting-low-confidence-words.md 63 | tags: 64 | - speech-to-text 65 | 66 | - title: Speech-To-Text - Do More with the Node SDK 67 | path: core-transcription/do-more-with-sdk-js.md 68 | tags: 69 | - speech-to-text 70 | 71 | - title: Speech-To-Text - Build a UI for Transcription with Gradio and Python 72 | path: core-transcription/gradio-frontend.ipynb 73 | tags: 74 | - speech-to-text 75 | 76 | - title: Speech-To-Text - Iterate Over Speaker Labels with Make.com 77 | path: core-transcription/make.com-speaker-labels.md 78 | tags: 79 | - speech-to-text 80 | 81 | - title: Speech-To-Text - How to Implement Retry Server Error Logic 82 | path: core-transcription/retry-server-error.ipynb 83 | tags: 84 | - speech-to-text 85 | 86 | - title: Speech-To-Text - How to Implement Retry Upload Error Logic 87 | path: core-transcription/retry-upload-error.ipynb 88 | tags: 89 | - speech-to-text 90 | 91 | - title: Speech-To-Text - Schedule a DELETE Request with AssemblyAI and EasyCron 92 | path: core-transcription/schedule_delete.ipynb 93 | tags: 94 | - speech-to-text 95 | 96 | - title: Speech-To-Text - Creating Subtitles with Speaker Labels 97 | path: core-transcription/speaker_labelled_subtitles.ipynb 98 | tags: 99 | - speech-to-text 100 | 101 | - title: Speech-To-Text - Create a Speaker Timeline 102 | path: core-transcription/speaker_timeline.ipynb 103 | tags: 104 | - speech-to-text 105 | 106 | - title: Speech-To-Text - Specifying a Language 107 | path: core-transcription/specify-language.ipynb 108 | tags: 109 | - speech-to-text 110 | 111 | - title: Speech-To-Text - Create Custom Length Subtitles 112 | path: core-transcription/subtitle_creation_by_word_count.ipynb 113 | tags: 114 | - speech-to-text 115 | 116 | - title: Speech-To-Text - Generating Subtitles for Videos 117 | path: core-transcription/subtitles.ipynb 118 | tags: 119 | - speech-to-text 120 | 121 | - title: Speech-To-Text - Speaker Identification Across Files Pinecone and Nvidia's TitaNet Model 122 | path: core-transcription/titanet-speaker-identification.ipynb 123 | tags: 124 | - speech-to-text 125 | 126 | - title: Speech-To-Text - Transcribing an Audio File 127 | path: core-transcription/transcribe.ipynb 128 | tags: 129 | - speech-to-text 130 | 131 | - title: Speech-To-Text - Transcribing from an S3 bucket 132 | path: core-transcription/transcribe_from_s3.ipynb 133 | tags: 134 | - speech-to-text 135 | 136 | - title: Speech-To-Text - How to Transcribe YouTube Videos 137 | path: core-transcription/transcribe_youtube_videos.ipynb 138 | tags: 139 | - speech-to-text 140 | 141 | - title: Speech-To-Text - Transcribing Files Hosted on GitHub 142 | path: core-transcription/transcribing-github-files.md 143 | tags: 144 | - speech-to-text 145 | 146 | - title: Speech-To-Text - Guide to Using Google Drive Links 147 | path: core-transcription/transcribing-google-drive-file.md 148 | tags: 149 | - speech-to-text 150 | 151 | - title: Speech-To-Text - Translating an AssemblyAI Subtitle Transcript 152 | path: core-transcription/translate_subtitles.ipynb 153 | tags: 154 | - speech-to-text 155 | 156 | - title: Speech-To-Text - Translating an AssemblyAI Transcript 157 | path: core-transcription/translate_transcripts.ipynb 158 | tags: 159 | - speech-to-text 160 | 161 | - title: Speech-To-Text - Identify Duplicate Channels in Stereo Files 162 | path: core-transcription/identify_duplicate_channels.ipynb 163 | tags: 164 | - speech-to-text 165 | 166 | - title: LeMUR - Customer Call Sentiment Analysis 167 | path: lemur/call-sentiment-analysis.ipynb 168 | tags: 169 | - lemur 170 | 171 | - title: LeMUR - Estimating Input Token Costs 172 | path: lemur/counting-tokens.ipynb 173 | tags: 174 | - lemur 175 | 176 | - title: LeMUR - Boosting transcription accuracy (LeMUR custom vocab) 177 | path: lemur/custom-vocab-lemur.ipynb 178 | tags: 179 | - lemur 180 | 181 | - title: LeMUR - Extract Dialogue Data 182 | path: lemur/dialogue-data.ipynb 183 | tags: 184 | - lemur 185 | 186 | - title: LeMUR - Creating Chapter Summaries with the Custom Text Input Parameter 187 | path: lemur/input-text-chapters.ipynb 188 | tags: 189 | - lemur 190 | 191 | - title: LeMUR - Processing Speaker Labels with the Custom Text Input Parameter 192 | path: lemur/input-text-speaker-labels.ipynb 193 | tags: 194 | - lemur 195 | 196 | - title: LeMUR - Generate Action Items from a Meeting 197 | path: lemur/meeting-action-items.ipynb 198 | tags: 199 | - lemur 200 | 201 | - title: LeMUR - How to Pass Context from Previous LeMUR Requests 202 | path: lemur/past-response-prompts.ipynb 203 | tags: 204 | - lemur 205 | 206 | - title: LeMUR - Implement a Sales Playbook 207 | path: lemur/sales-playbook.ipynb 208 | tags: 209 | - lemur 210 | 211 | - title: LeMUR - Identify Speakers by Name 212 | path: lemur/speaker-identification.ipynb 213 | tags: 214 | - lemur 215 | 216 | - title: LeMUR - How to Use LeMUR's Specialized Endpoints 217 | path: lemur/specialized-endpoints.ipynb 218 | tags: 219 | - lemur 220 | 221 | - title: LeMUR - Generate Action Items Using the Task Endpoint 222 | path: lemur/task-endpoint-action-items.ipynb 223 | tags: 224 | - lemur 225 | 226 | - title: LeMUR - Receive AI Coaching from the Task Endpoint 227 | path: lemur/task-endpoint-ai-coach.ipynb 228 | tags: 229 | - lemur 230 | 231 | - title: LeMUR - Create Custom Summaries using the Task Endpoint 232 | path: lemur/task-endpoint-custom-summary.ipynb 233 | tags: 234 | - lemur 235 | 236 | - title: LeMUR - Perform Structured Q&A Using the Task Endpoint 237 | path: lemur/task-endpoint-structured-QA.ipynb 238 | tags: 239 | - lemur 240 | 241 | - title: LeMUR - Extract Quotes from a Transcript with the Custom Text Input Parameter 242 | path: lemur/timestamped-transcripts.ipynb 243 | tags: 244 | - lemur 245 | 246 | - title: LeMUR - Extract Citations from a Transcript with Semantic Search 247 | path: lemur/transcript-citations.ipynb 248 | tags: 249 | - lemur 250 | 251 | - title: LeMUR - Processing audio files with LLMs using LeMUR 252 | path: lemur/using-lemur.ipynb 253 | tags: 254 | - lemur 255 | 256 | - title: Streaming - Transcribe Local Files in Real-Time with Node.js 257 | path: streaming-stt/file-transcription-nodejs 258 | tags: 259 | - streaming 260 | - speech-to-text 261 | 262 | - title: Streaming - Capturing Complete Sentences as Partial Transcripts 263 | path: streaming-stt/partial_transcripts.ipynb 264 | tags: 265 | - streaming 266 | - speech-to-text 267 | 268 | - title: Streaming - Using Streaming Speech-To-Text 269 | path: streaming-stt/real-time.ipynb 270 | tags: 271 | - streaming 272 | - speech-to-text 273 | 274 | - title: Streaming - Using LeMUR with Streaming Speech-To-Text 275 | path: streaming-stt/real_time_lemur.ipynb 276 | tags: 277 | - streaming 278 | - speech-to-text 279 | - lemur 280 | 281 | - title: Streaming - Terminate Session After Inactivity 282 | path: streaming-stt/terminate_realtime_programmatically.ipynb 283 | tags: 284 | - streaming 285 | - speech-to-text 286 | 287 | - title: Speech-To-Text - Do More with the Python SDK 288 | path: core-transcription/do-more-with-sdk-python.ipynb 289 | tags: 290 | - speech-to-text 291 | 292 | - title: Streaming - Transcribe System Audio in Real-Time (macOS) 293 | path: streaming-stt/transcribe_system_audio.ipynb 294 | tags: 295 | - streaming 296 | - speech-to-text 297 | 298 | - title: Streaming - Use LeMUR for Real-Time Translation 299 | path: streaming-stt/real_time_translation.ipynb 300 | tags: 301 | - streaming 302 | - speech-to-text 303 | - lemur 304 | 305 | - title: Speech-To-Text - Use Pyannote to Generate Custom Speaker Labels 306 | path: core-transcription/Use_AssemblyAI_with_Pyannote_to_generate_custom_Speaker_Labels.ipynb 307 | tags: 308 | - speech-to-text 309 | 310 | - title: Speech-To-Text - Correct Audio Duration Discrepancies for Corrupted Files 311 | path: core-transcription/audio-duration-fix.ipynb 312 | tags: 313 | - speech-to-text 314 | 315 | - title: Streaming - Streaming Best Practices 316 | path: streaming-stt/real-time-best-practices.ipynb 317 | tags: 318 | - streaming 319 | 320 | - title: Speech-To-Text - Calculate Talk/Listen Ratio of Speakers 321 | path: core-transcription/talk-listen-ratio.ipynb 322 | tags: 323 | - speech-to-text 324 | 325 | - title: Speech-To-Text - Speaker Diarization with Async Chunking 326 | path: core-transcription/speaker-diarization-with-async-chunking.ipynb 327 | tags: 328 | - speech-to-text 329 | 330 | - title: Speech-To-Text - Near-Realtime Python Speech-to-Text App 331 | path: https://github.com/AssemblyAI-Solutions/async-chunk-py 332 | tags: 333 | - speech-to-text 334 | 335 | - title: Speech-To-Text - Near-Realtime Node.js Speech-to-Text App 336 | path: https://github.com/AssemblyAI-Solutions/async-chunk-js 337 | tags: 338 | - speech-to-text 339 | 340 | - title: LeMUR - Transcript Citations 341 | path: lemur/lemur-transcript-citations.ipynb 342 | tags: 343 | - lemur 344 | 345 | - title: LeMUR - Create Custom Topic Tags 346 | path: lemur/custom-topic-tags.ipynb 347 | tags: 348 | - lemur 349 | 350 | - title: Audio Intelligence - Label Content with Topic Tags 351 | path: audio-intelligence/topic_detection.ipynb 352 | tags: 353 | - audio-intelligence 354 | 355 | - title: LeMUR - Phone Call Segmentation 356 | path: lemur/phone-call-segmentation.ipynb 357 | tags: 358 | - lemur 359 | 360 | - title: AWS Transcribe to AssemblyAI 361 | path: core-transcription/migration_guides/aws_to_aai.ipynb 362 | tags: 363 | - speech-to-text 364 | 365 | - title: Deepgram to AssemblyAI 366 | path: core-transcription/migration_guides/dg_to_aai.ipynb 367 | tags: 368 | - speech-to-text 369 | 370 | - title: OpenAI to AssemblyAI 371 | path: core-transcription/migration_guides/oai_to_aai.ipynb 372 | tags: 373 | - speech-to-text 374 | 375 | - title: LeMUR - SOAP Note Generation 376 | path: lemur/soap-note-generation.ipynb 377 | tags: 378 | - lemur 379 | 380 | - title: Google to AssemblyAI 381 | path: core-transcription/migration_guides/google_to_aai.ipynb 382 | tags: 383 | - speech-to-text 384 | 385 | - title: Speech-To-Text - How to Use the EU Endpoint 386 | path: core-transcription/how_to_use_the_eu_endpoint.ipynb 387 | tags: 388 | - speech-to-text 389 | -------------------------------------------------------------------------------- /streaming-stt/README.md: -------------------------------------------------------------------------------- 1 | # Streaming STT 🕒 2 | 3 | AssemblyAI's Streaming Speech-to-Text (STT) allows you to transcribe live audio streams with high accuracy and low latency. By streaming your audio data to our secure WebSocket API, you can receive transcripts back within a few hundred milliseconds. 4 | 5 | ## All Streaming Cookbooks 6 | 7 | ### Basic Streaming Workflows 8 | 9 | [Transcribe Files in Real-Time with Node.js](file-transcription-nodejs)\ 10 | [Use Streaming STT](real-time.ipynb)\ 11 | [Terminate Session After a Fixed Duration of Inactivity](terminate_realtime_programmatically.ipynb)\ 12 | [Capturing Complete Sentences as Partial Transcripts](partial_transcripts.ipynb)\ 13 | 🆕 [Transcribe System Audio (macOS)](transcribe_system_audio.ipynb)\ 14 | 🆕 [Best Practices](real-time-best-practices.ipynb) 15 | 16 | ### Streaming for Front-End Applications 17 | 18 | [React Example Using Streaming STT](https://github.com/AssemblyAI-Examples/realtime-react-example)\ 19 | [Vanilla JavaScript Front-End Examples](https://github.com/AssemblyAI/realtime-transcription-browser-js-example/tree/master) 20 | 21 | ### Streaming with LeMUR 22 | 23 | [Use LeMUR with Streaming STT](real_time_lemur.ipynb)\ 24 | 🆕 [Use LeMUR for Real-Time Translation](real_time_translation.ipynb) 25 | 26 | ### Use Case Specific Streaming Workflows 27 | 28 | [Use Twilio with Node SDK](https://github.com/AssemblyAI/twilio-realtime-tutorial)\ 29 | 🆕 [Apply Noise Reduction to Audio](noise_reduction_streaming.ipynb) 30 | -------------------------------------------------------------------------------- /streaming-stt/file-transcription-nodejs/README.md: -------------------------------------------------------------------------------- 1 | ## AssemblyAI Realtime Example 2 | 3 | ##### This example will transcribe any local .wav audio file in realtime and then save the final result to a text file. While streaming the file, the console will display the partial text fragments that are sent back in realtime from the Realtime API websocket connection. 4 | ##### To read more about this API see [here](https://www.assemblyai.com/docs/walkthroughs#realtime-streaming-transcription) 5 | 6 | 7 | #### To begin run the following commands: 8 | 9 | - npm install 10 | - npm start 11 | 12 | ##### Built with Node v18.8.0 13 | ###### Note: This demonstration is designed to exemplify the most basic use case of the AssemblyAI realtime API and is not appropriate for a production enviroment. 14 | -------------------------------------------------------------------------------- /streaming-stt/file-transcription-nodejs/example.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AssemblyAI/cookbook/c06fa0906e39c415953c102c338856812071e6ef/streaming-stt/file-transcription-nodejs/example.wav -------------------------------------------------------------------------------- /streaming-stt/file-transcription-nodejs/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "realtime-example", 3 | "version": "1.0.0", 4 | "description": "A simple example of how to use the AssemblyAI realtime stream API in node.js", 5 | "main": "stream_api.js", 6 | "dependencies": { 7 | "ws": "^8.8.1" 8 | }, 9 | "devDependencies": {}, 10 | "scripts": { 11 | "test": "echo \"Error: no test specified\" && exit 1", 12 | "start": "node stream_api.js" 13 | }, 14 | "author": "Garvan Doyle", 15 | "license": "ISC" 16 | } 17 | -------------------------------------------------------------------------------- /streaming-stt/file-transcription-nodejs/stream_api.js: -------------------------------------------------------------------------------- 1 | const WebSocket = require("ws"); 2 | const fs = require("fs"); 3 | 4 | const ASSEMBLY_API_KEY = 'YOUR_API_KEY'; 5 | 6 | if(ASSEMBLY_API_KEY === 'YOUR_API_KEY') { 7 | throw console.error("Please set your AssemblyAI API key in the ASSEMBLY_API_KEY variable."); 8 | } 9 | 10 | //How to transcribe a local file using the AssemblyAI stream API. 11 | function transcribeFile(filePath) { 12 | 13 | //Initialize the websocket connection. 14 | const url = "wss://api.assemblyai.com/v2/realtime/ws?sample_rate=8000"; 15 | const socket = new WebSocket(url, { 16 | headers: { 17 | Authorization: ASSEMBLY_API_KEY 18 | } 19 | }); 20 | 21 | 22 | let transcriptText = ""; 23 | 24 | //Declare socket callbacks. 25 | socket.onmessage = async (message) => { 26 | const res = JSON.parse(message.data.toString()); 27 | 28 | if (res.message_type == "PartialTranscript") { 29 | //The partial transcript is the current best guess of the transcription. 30 | console.log("Partial Text:", res.text); 31 | } 32 | if (res.message_type == "FinalTranscript") { 33 | //The final transcript is the final transcription of the audio. 34 | console.log("Final Text:", res.text); 35 | transcriptText += res.text + " "; 36 | } 37 | 38 | switch (res.message_type) { 39 | case "SessionBegins": 40 | console.log("Session Begins"); 41 | const data = fs.readFileSync(filePath); 42 | // Loop through data sending 2000 bytes at a time 43 | for (let i = 0; i < data.length; i += 2000) { 44 | const chunk = data.slice(i, i + 2000) 45 | 46 | if (chunk.length < 2000) { 47 | continue; 48 | } 49 | 50 | const audioData = chunk.toString("base64"); 51 | 52 | socket.send(JSON.stringify({ audio_data: audioData })); 53 | 54 | await new Promise(resolve => setTimeout(resolve, 50)); 55 | } 56 | //When all audio data has been chunked and sent, send the terminate_session message to close the stream. 57 | socket.send(JSON.stringify({terminate_session: true})); 58 | } 59 | }; 60 | 61 | socket.onerror = (event) => { 62 | console.error(event); 63 | } 64 | 65 | socket.onclose = event => { 66 | // Write final transcript text to file 67 | fs.writeFile("./example.wav_transcript.txt", transcriptText, (err) => { 68 | if (err) throw err; 69 | }) 70 | 71 | console.log(`Got socket close event type=${event.type} code=${event.code} reason="${event.reason}" wasClean=${event.wasClean}`); 72 | } 73 | 74 | socket.onopen = () => { 75 | this.state = "started"; 76 | console.log("socket open"); 77 | }; 78 | 79 | } 80 | 81 | 82 | transcribeFile("./example.wav"); -------------------------------------------------------------------------------- /streaming-stt/partial_transcripts.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "id": "nJFtEoTSlGkX" 7 | }, 8 | "source": [ 9 | "# Capturing complete sentences as partial transcriptions in AssemblyAI" 10 | ] 11 | }, 12 | { 13 | "cell_type": "markdown", 14 | "metadata": {}, 15 | "source": [] 16 | }, 17 | { 18 | "cell_type": "markdown", 19 | "metadata": { 20 | "id": "-FaHmXYvmehO" 21 | }, 22 | "source": [ 23 | "To effectively use AssemblyAI's Streaming Speech-to-Text (STT) API for partial transcripts, particularly in scenarios where final transcripts (which include punctuation and casing) are not required, you need to understand how partial transcripts work and how to handle them in your application. Here’s a guide to help you get started. [For the full code, refer to this GitHub gist.](https://gist.github.com/m-ods/84b2a54b417897b06c0e13469fb64d61)" 24 | ] 25 | }, 26 | { 27 | "cell_type": "markdown", 28 | "metadata": { 29 | "id": "C7x3wQls8WQX" 30 | }, 31 | "source": [ 32 | "First, install AssemblyAI's Python SDK." 33 | ] 34 | }, 35 | { 36 | "cell_type": "code", 37 | "execution_count": null, 38 | "metadata": { 39 | "id": "kmSvU1JklBLd" 40 | }, 41 | "outputs": [], 42 | "source": [ 43 | "!pip install \"assemblyai[all]\"" 44 | ] 45 | }, 46 | { 47 | "cell_type": "markdown", 48 | "metadata": { 49 | "id": "AvsEMdBKlveq" 50 | }, 51 | "source": [ 52 | "Before we begin, make sure you have an AssemblyAI account and an API key. You can sign up for a free account and get your API key from your dashboard. Please note that this feature is available for paid accounts only. If you're on the free plan, you'll need to upgrade.\n", 53 | "\n" 54 | ] 55 | }, 56 | { 57 | "cell_type": "code", 58 | "execution_count": null, 59 | "metadata": { 60 | "id": "q0mbZ-LoluyM" 61 | }, 62 | "outputs": [], 63 | "source": [ 64 | "import assemblyai as aai\n", 65 | "\n", 66 | "aai.settings.api_key = \"YOUR-API-KEY\"" 67 | ] 68 | }, 69 | { 70 | "cell_type": "markdown", 71 | "metadata": { 72 | "id": "qNoz-czClxFy" 73 | }, 74 | "source": [ 75 | "## Understanding Partial Transcripts\n", 76 | "\n", 77 | "### What are Partial Transcripts?\n", 78 | "Partial transcripts are incomplete and ongoing transcriptions of an audio stream. They provide a near real-time text representation of spoken words before the entire speech is finished.\n", 79 | "\n", 80 | "They are useful in scenarios where immediate text feedback is more important than the complete accuracy or formatting of the final transcript.\n", 81 | "\n", 82 | "### Example Use Cases where Partial Transcripts suffice\n", 83 | "* Chat bots processed using LLMs\n", 84 | "* Voice Command Recognition\n", 85 | "* Real-time Translations\n", 86 | "\n", 87 | "\n", 88 | "### What do Partial Transcripts look like?\n", 89 | "For a sentence such as \"What is the capital of New Zealand\", these are the messages you would receive from our API.\n", 90 | "\n", 91 | "Message 1:\n", 92 | "```\n", 93 | "{\"message_type\":\"PartialTranscript\", \"created\":\"2023-11-10T16:10:22.754985\",\n", 94 | "\"text\":\"what is the\", ...}\n", 95 | "```\n", 96 | "\n", 97 | "Message 2:\n", 98 | "```\n", 99 | "{\"message_type\":\"PartialTranscript\", \"created\":\"2023-11-10T16:10:23.297511\",\n", 100 | "\"text\":\"what is the capital of\", ...}\n", 101 | "```\n", 102 | "\n", 103 | "Message 3:\n", 104 | "```\n", 105 | "{\"message_type\":\"PartialTranscript\", \"created\":\"2023-11-10T16:10:24.113527\",\n", 106 | "\"text\":\"what is the capital of new zealand\", ...}\n", 107 | "```\n", 108 | "\n", 109 | "Message 4 (Notice how the text is the exact same as in Message 3!):\n", 110 | "```\n", 111 | "{\"message_type\":\"PartialTranscript\", \"created\":\"2023-11-10T16:10:24.67045\",\n", 112 | "\"text\":\"what is the capital of new zealand\", ...}\n", 113 | "```\n", 114 | "\n", 115 | "Message 5:\n", 116 | "```\n", 117 | "{\"message_type\":\"FinalTranscript\", \"created\":\"2023-11-10T16:10:24.9708\",\n", 118 | "\"text\":\"What is the capital of New Zealand?\", ...}\n", 119 | "```\n", 120 | "\n", 121 | "Notice that after the text in Messages 3 and 4 are the exact same, a Final Transcript is triggered. Instead, we can programmatically check if the text in a given Message matches the text from a previous Message, and then use that to deduce that the transcript is complete.\n", 122 | "\n", 123 | "Note: Other keys in the payload have been omitted for brevity but can be seen [here in our Streaming API Reference. ](https://www.assemblyai.com/docs/api-reference/streaming)" 124 | ] 125 | }, 126 | { 127 | "cell_type": "markdown", 128 | "metadata": { 129 | "id": "9jxI1ZO0BJbC" 130 | }, 131 | "source": [ 132 | "## Implementing Partial Transcript Checks\n", 133 | "\n", 134 | "Let's consider a code example to check if the partial transcript received from AssemblyAI matches the previous partial transcript." 135 | ] 136 | }, 137 | { 138 | "cell_type": "markdown", 139 | "metadata": { 140 | "id": "tCF2huuKCLcW" 141 | }, 142 | "source": [ 143 | "Define your Streaming functions as per normal." 144 | ] 145 | }, 146 | { 147 | "cell_type": "code", 148 | "execution_count": null, 149 | "metadata": { 150 | "id": "lKPi5OG2CK8l" 151 | }, 152 | "outputs": [], 153 | "source": [ 154 | "def on_open(session_opened: aai.RealtimeSessionOpened):\n", 155 | " \"This function is called when the connection has been established.\"\n", 156 | "\n", 157 | " print(\"Session ID:\", session_opened.session_id)\n", 158 | "\n", 159 | "def on_error(error: aai.RealtimeError):\n", 160 | " \"This function is called when the connection has been closed.\"\n", 161 | "\n", 162 | " print(\"An error occured:\", error)\n", 163 | "\n", 164 | "def on_close():\n", 165 | " \"This function is called when the connection has been closed.\"\n", 166 | "\n", 167 | " print(\"Closing Session\")" 168 | ] 169 | }, 170 | { 171 | "cell_type": "markdown", 172 | "metadata": { 173 | "id": "IZlKqijLCaPk" 174 | }, 175 | "source": [ 176 | "Then, define an empty string for `partial_transcript`. In on_data(), we will do 3 things:\n", 177 | "\n", 178 | "\n", 179 | "* Access the global string `partial_transcript`\n", 180 | "* If the data received is a Final Transcript, reset `partial_transcript`.\n", 181 | "* Else, if the `transcript.text` matches the previous `partial_transcript`, print it to our terminal\n", 182 | "* Otherwise, set `partial_transcript` to be the Partial Transcript received from AssemblyAI.\n", 183 | "\n" 184 | ] 185 | }, 186 | { 187 | "cell_type": "code", 188 | "execution_count": null, 189 | "metadata": { 190 | "id": "VBuGNumRBIyi" 191 | }, 192 | "outputs": [], 193 | "source": [ 194 | "partial_transcript = ''\n", 195 | "\n", 196 | "def on_data(transcript: aai.RealtimeTranscript):\n", 197 | " \"This function is called when a new transcript has been received.\"\n", 198 | "\n", 199 | " global partial_transcript\n", 200 | "\n", 201 | " if not transcript.text:\n", 202 | " return\n", 203 | "\n", 204 | " if isinstance(transcript, aai.RealtimeFinalTranscript):\n", 205 | " partial_transcript = \"\"\n", 206 | " elif partial_transcript == transcript.text:\n", 207 | " print(transcript.text, end=\"\\r\\n\")\n", 208 | " else:\n", 209 | " partial_transcript = transcript.text" 210 | ] 211 | }, 212 | { 213 | "cell_type": "markdown", 214 | "metadata": { 215 | "id": "GjaKGNQ9DUSe" 216 | }, 217 | "source": [ 218 | "Create your Streaming transcriber and start your transcription." 219 | ] 220 | }, 221 | { 222 | "cell_type": "code", 223 | "execution_count": 1, 224 | "metadata": { 225 | "id": "MnxXQ537_ZPJ" 226 | }, 227 | "outputs": [], 228 | "source": [ 229 | "# Create the Streaming transcriber\n", 230 | "transcriber = aai.RealtimeTranscriber(\n", 231 | " on_data=on_data,\n", 232 | " on_error=on_error,\n", 233 | " sample_rate=44_100,\n", 234 | " on_open=on_open, # optional\n", 235 | " on_close=on_close, # optional\n", 236 | ")\n", 237 | "\n", 238 | "# Start the connection\n", 239 | "transcriber.connect()\n", 240 | "\n", 241 | "# Open a microphone stream\n", 242 | "microphone_stream = aai.extras.MicrophoneStream()\n", 243 | "\n", 244 | "# Press CTRL+C to abort\n", 245 | "transcriber.stream(microphone_stream)\n", 246 | "\n", 247 | "transcriber.close()" 248 | ] 249 | }, 250 | { 251 | "cell_type": "markdown", 252 | "metadata": { 253 | "id": "jkoZtIFfDhDj" 254 | }, 255 | "source": [ 256 | "What you should observe is that partial transcripts are printed to the terminal within 500ms of being spoken. By following these guidelines and understanding how to handle Partial Transcripts, you can effectively integrate AssemblyAI's Streaming STT into your application for scenarios where immediate text feedback is crucial, even without the finesse of Final Transcripts." 257 | ] 258 | } 259 | ], 260 | "metadata": { 261 | "colab": { 262 | "provenance": [] 263 | }, 264 | "kernelspec": { 265 | "display_name": "Python 3", 266 | "name": "python3" 267 | }, 268 | "language_info": { 269 | "name": "python" 270 | } 271 | }, 272 | "nbformat": 4, 273 | "nbformat_minor": 0 274 | } 275 | -------------------------------------------------------------------------------- /streaming-stt/real_time_lemur.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "nbformat": 4, 3 | "nbformat_minor": 0, 4 | "metadata": { 5 | "colab": { 6 | "provenance": [] 7 | }, 8 | "kernelspec": { 9 | "name": "python3", 10 | "display_name": "Python 3" 11 | }, 12 | "language_info": { 13 | "name": "python" 14 | } 15 | }, 16 | "cells": [ 17 | { 18 | "cell_type": "markdown", 19 | "source": [ 20 | "# Using LeMUR with Streaming Speech-to-Text (STT)\n", 21 | "\n", 22 | "This script is modified to contain a global variable `conversation_data` that accumulates the transcribed text in the `on_data` function. Once the transcription session is closed, the `on_close` function sends `conversation_data` to LeMUR for analysis using LeMUR's `input_text` parameter." 23 | ], 24 | "metadata": { 25 | "id": "mlzJgtaDIycN" 26 | } 27 | }, 28 | { 29 | "cell_type": "code", 30 | "execution_count": null, 31 | "metadata": { 32 | "id": "Hf5k06F1HFJl" 33 | }, 34 | "outputs": [], 35 | "source": [ 36 | "import assemblyai as aai\n", 37 | "\n", 38 | "aai.settings.api_key = \"YOUR-API-KEY\"\n", 39 | "\n", 40 | "def on_open(session_opened: aai.RealtimeSessionOpened):\n", 41 | " \"This function is called when the connection has been established.\"\n", 42 | "\n", 43 | " print(\"Session ID:\", session_opened.session_id)\n", 44 | "\n", 45 | "def on_data(transcript: aai.RealtimeTranscript):\n", 46 | " \"This function is called when a new transcript has been received.\"\n", 47 | "\n", 48 | " global conversation_data\n", 49 | "\n", 50 | " if not transcript.text:\n", 51 | " return\n", 52 | "\n", 53 | " if isinstance(transcript, aai.RealtimeFinalTranscript):\n", 54 | " print(transcript.text, end=\"\\r\\n\")\n", 55 | " conversation_data += f\"{transcript.text} \\n\"\n", 56 | " else:\n", 57 | " print(transcript.text, end=\"\\r\")\n", 58 | "\n", 59 | "def on_error(error: aai.RealtimeError):\n", 60 | " \"This function is called when the connection has been closed.\"\n", 61 | "\n", 62 | " print(\"An error occured:\", error)\n", 63 | "\n", 64 | "def on_close():\n", 65 | " \"This function is called when the connection has been closed.\"\n", 66 | " global conversation_data\n", 67 | " print(\"Closing Session\")\n", 68 | " result = aai.Lemur().task(\n", 69 | " \"You are a helpful coach. Provide an analysis of the transcript \"\n", 70 | " \"and offer areas to improve with exact quotes. Include no preamble. \"\n", 71 | " \"Start with an overall summary then get into the examples with feedback.\",\n", 72 | " input_text=conversation_data\n", 73 | " )\n", 74 | "\n", 75 | " print(result.response)\n", 76 | "\n", 77 | "# Create the Streaming Speech-to-Text transcriber\n", 78 | "transcriber = aai.RealtimeTranscriber(\n", 79 | " on_data=on_data,\n", 80 | " on_error=on_error,\n", 81 | " sample_rate=44_100,\n", 82 | " on_open=on_open, # optional\n", 83 | " on_close=on_close, # optional\n", 84 | ")\n", 85 | "\n", 86 | "conversation_data = \"\"\n", 87 | "\n", 88 | "# Start the connection\n", 89 | "transcriber.connect()\n", 90 | "\n", 91 | "# Open a microphone stream\n", 92 | "microphone_stream = aai.extras.MicrophoneStream()\n", 93 | "\n", 94 | "# Press CTRL+C to abort\n", 95 | "transcriber.stream(microphone_stream)\n", 96 | "\n", 97 | "transcriber.close()" 98 | ] 99 | } 100 | ] 101 | } -------------------------------------------------------------------------------- /streaming-stt/terminate_realtime_programmatically.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "id": "nJFtEoTSlGkX" 7 | }, 8 | "source": [ 9 | "# Terminate Streaming session after inactivity" 10 | ] 11 | }, 12 | { 13 | "cell_type": "markdown", 14 | "metadata": { 15 | "id": "-FaHmXYvmehO" 16 | }, 17 | "source": [ 18 | "An often-overlooked aspect of implementing AssemblyAI's Streaming Speech-to-Text (STT) service is efficiently terminating transcription sessions. In this cookbook, you will learn how to terminate a Streaming session after any fixed duration of silence.\n", 19 | "\n", 20 | "[For the full code, refer to this GitHub gist.](https://gist.github.com/m-ods/133f7c7e4c08abf97ae53dc7577cadf7)" 21 | ] 22 | }, 23 | { 24 | "cell_type": "markdown", 25 | "metadata": { 26 | "id": "C7x3wQls8WQX" 27 | }, 28 | "source": [ 29 | "First, install AssemblyAI's Python SDK." 30 | ] 31 | }, 32 | { 33 | "cell_type": "code", 34 | "execution_count": null, 35 | "metadata": { 36 | "id": "kmSvU1JklBLd" 37 | }, 38 | "outputs": [], 39 | "source": [ 40 | "!pip install \"assemblyai[all]\"" 41 | ] 42 | }, 43 | { 44 | "cell_type": "markdown", 45 | "metadata": { 46 | "id": "AvsEMdBKlveq" 47 | }, 48 | "source": [ 49 | "Before we begin, make sure you have an AssemblyAI account and an API key. You can sign up for a free account and get your API key from your dashboard. Please note that Streaming STT is only available for Pro accounts. If you're on the free plan, you'll need to upgrade.\n", 50 | "\n" 51 | ] 52 | }, 53 | { 54 | "cell_type": "code", 55 | "execution_count": null, 56 | "metadata": { 57 | "id": "q0mbZ-LoluyM" 58 | }, 59 | "outputs": [], 60 | "source": [ 61 | "import assemblyai as aai\n", 62 | "from datetime import datetime, timedelta\n", 63 | "\n", 64 | "aai.settings.api_key = \"YOUR-API-KEY\"" 65 | ] 66 | }, 67 | { 68 | "cell_type": "markdown", 69 | "metadata": { 70 | "id": "qNoz-czClxFy" 71 | }, 72 | "source": [ 73 | "## Handling inactivity\n", 74 | "\n", 75 | "### Empty transcripts\n", 76 | "As long as a session is open, our Streaming STT service will continue sending empty `PartialTranscript`s that look like this:\n", 77 | "\n", 78 | "Message 1:\n", 79 | "```\n", 80 | "{\"message_type\":\"PartialTranscript\", \"created\":\"2023-11-10T16:10:22.754985\",\n", 81 | "\"text\":\"\", ...}\n", 82 | "```\n", 83 | "\n", 84 | "Message 2:\n", 85 | "```\n", 86 | "{\"message_type\":\"PartialTranscript\", \"created\":\"2023-11-10T16:10:25.297511\",\n", 87 | "\"text\":\"\", ...}\n", 88 | "```\n", 89 | "\n", 90 | "Thus, we can use empty partial transcripts to assume that the user has stopped speaking.\n", 91 | "\n", 92 | "Note: Other keys in the payload have been omitted for brevity but can be seen [here in our Streaming API Reference. ](https://www.assemblyai.com/docs/api-reference/streaming)" 93 | ] 94 | }, 95 | { 96 | "cell_type": "markdown", 97 | "metadata": { 98 | "id": "9jxI1ZO0BJbC" 99 | }, 100 | "source": [ 101 | "## Implementing Partial Transcript Checks\n", 102 | "\n", 103 | "Let's consider a code example to track if the `PartialTranscript`s have been empty for a duration of time." 104 | ] 105 | }, 106 | { 107 | "cell_type": "markdown", 108 | "metadata": { 109 | "id": "tCF2huuKCLcW" 110 | }, 111 | "source": [ 112 | "Define your Streaming functions as per normal." 113 | ] 114 | }, 115 | { 116 | "cell_type": "code", 117 | "execution_count": null, 118 | "metadata": { 119 | "id": "lKPi5OG2CK8l" 120 | }, 121 | "outputs": [], 122 | "source": [ 123 | "def on_open(session_opened: aai.RealtimeSessionOpened):\n", 124 | " \"This function is called when the connection has been established.\"\n", 125 | "\n", 126 | " print(\"Session ID:\", session_opened.session_id)\n", 127 | "\n", 128 | "def on_error(error: aai.RealtimeError):\n", 129 | " \"This function is called when the connection has been closed.\"\n", 130 | "\n", 131 | " print(\"An error occured:\", error)" 132 | ] 133 | }, 134 | { 135 | "cell_type": "markdown", 136 | "metadata": { 137 | "id": "IZlKqijLCaPk" 138 | }, 139 | "source": [ 140 | "Then, define the constant `last_transcript_received = datetime.now()`, and set a flag `terminated` to be `False`.\n", 141 | "\n", 142 | "We will use these variables later on." 143 | ] 144 | }, 145 | { 146 | "cell_type": "code", 147 | "execution_count": null, 148 | "metadata": { 149 | "id": "VBuGNumRBIyi" 150 | }, 151 | "outputs": [], 152 | "source": [ 153 | "last_transcript_received = datetime.now()\n", 154 | "terminated = False\n" 155 | ] 156 | }, 157 | { 158 | "cell_type": "markdown", 159 | "metadata": { 160 | "id": "NZMnfymlp_Z-" 161 | }, 162 | "source": [ 163 | "Next, define your `on_data` function:\n", 164 | "* Access the global variable `last_transcript_received`, as well as `terminated`\n", 165 | "* If the Streaming STT transcriber has been terminated, don't return anything.\n", 166 | "* If `transcript.text` is empty, check if it has been 5 seconds since the last empty transcript. When true, terminate the transcriber.\n", 167 | "* Else, just print the text in our terminal as per usual, and set the time of the last transcript received to now.\n" 168 | ] 169 | }, 170 | { 171 | "cell_type": "code", 172 | "execution_count": null, 173 | "metadata": { 174 | "id": "hP77brVvqlCp" 175 | }, 176 | "outputs": [], 177 | "source": [ 178 | "def on_data(transcript: aai.RealtimeTranscript):\n", 179 | " global last_transcript_received\n", 180 | " global terminated\n", 181 | "\n", 182 | " if terminated:\n", 183 | " return\n", 184 | "\n", 185 | " if transcript.text == \"\":\n", 186 | " # you can set the total_seconds of inactivity to be higher or lower\n", 187 | " if (datetime.now() - last_transcript_received).total_seconds() > 5:\n", 188 | " print(\"5 seconds without new transcription, terminating...\")\n", 189 | " terminate_transcription()\n", 190 | " return\n", 191 | "\n", 192 | " if isinstance(transcript, aai.RealtimeFinalTranscript):\n", 193 | " print(transcript.text, end=\"\\r\\n\")\n", 194 | " else:\n", 195 | " print(transcript.text, end=\"\\r\")\n", 196 | "\n", 197 | " last_transcript_received = datetime.now()" 198 | ] 199 | }, 200 | { 201 | "cell_type": "markdown", 202 | "metadata": { 203 | "id": "Ilwc0S0jqmvQ" 204 | }, 205 | "source": [ 206 | "Lastly, we define our `on_close` and `terminate_transcription` function. On_close simply sets terminated to true when the Websocket connection closes.\n", 207 | "\n", 208 | "`terminate_transcription` just accesses the global `transcriber` and closes the session when the function is called by `on_data`.\n", 209 | "\n" 210 | ] 211 | }, 212 | { 213 | "cell_type": "code", 214 | "execution_count": null, 215 | "metadata": { 216 | "id": "bpQdsXF8qpu5" 217 | }, 218 | "outputs": [], 219 | "source": [ 220 | "def on_close():\n", 221 | " global terminated\n", 222 | " if not terminated:\n", 223 | " print(\"Closing Session\")\n", 224 | " terminated = True\n", 225 | "\n", 226 | "def terminate_transcription():\n", 227 | " global terminated\n", 228 | " if not terminated:\n", 229 | " transcriber.close()\n", 230 | " terminated = True" 231 | ] 232 | }, 233 | { 234 | "cell_type": "markdown", 235 | "metadata": { 236 | "id": "GjaKGNQ9DUSe" 237 | }, 238 | "source": [ 239 | "Create your Streaming STT transcriber and start your transcription." 240 | ] 241 | }, 242 | { 243 | "cell_type": "code", 244 | "execution_count": null, 245 | "metadata": { 246 | "id": "MnxXQ537_ZPJ" 247 | }, 248 | "outputs": [], 249 | "source": [ 250 | "# Create the Streaming STT transcriber\n", 251 | "transcriber = aai.RealtimeTranscriber(\n", 252 | " on_data=on_data,\n", 253 | " on_error=on_error,\n", 254 | " sample_rate=44_100,\n", 255 | " on_open=on_open, # optional\n", 256 | " on_close=on_close, # optional\n", 257 | ")\n", 258 | "\n", 259 | "# Start the connection\n", 260 | "transcriber.connect()\n", 261 | "\n", 262 | "# Open a microphone stream\n", 263 | "microphone_stream = aai.extras.MicrophoneStream()\n", 264 | "\n", 265 | "# Press CTRL+C to abort\n", 266 | "transcriber.stream(microphone_stream)" 267 | ] 268 | }, 269 | { 270 | "cell_type": "markdown", 271 | "metadata": { 272 | "id": "jkoZtIFfDhDj" 273 | }, 274 | "source": [ 275 | "What you should observe is that transcription works in real-time and automatically terminates after 5 seconds!" 276 | ] 277 | } 278 | ], 279 | "metadata": { 280 | "colab": { 281 | "provenance": [] 282 | }, 283 | "kernelspec": { 284 | "display_name": "Python 3", 285 | "name": "python3" 286 | }, 287 | "language_info": { 288 | "name": "python" 289 | } 290 | }, 291 | "nbformat": 4, 292 | "nbformat_minor": 0 293 | } 294 | --------------------------------------------------------------------------------