├── .github ├── ISSUE_TEMPLATE │ ├── bug_report.md │ ├── config.yml │ └── feature-request.md ├── PULL_REQUEST_TEMPLATE │ └── pull_request_template.md └── workflows │ └── release.yml ├── .gitignore ├── CODE_OF_CONDUCT.md ├── CONTRIBUTING.md ├── LICENSE ├── README.md ├── deepgram_captions ├── __init__.py ├── _version.py ├── converters.py ├── helpers.py ├── srt.py └── webvtt.py ├── example.py ├── requirements-dev.txt ├── setup.py └── test ├── __pycache__ ├── test_assembly.cpython-310-pytest-7.4.3.pyc └── test_deepgram.cpython-310-pytest-7.4.3.pyc ├── assemblyai_transcription.json ├── assemblyai_utterances.json ├── dg_speakers.json ├── dg_speakers_no_utterances.json ├── dg_transcription.json ├── dg_utterances.json ├── dg_whisper_transcription.json ├── test_assembly.py ├── test_deepgram.py ├── test_whisper.py └── whisper_timestamped.json /.github/ISSUE_TEMPLATE/bug_report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Bug report 3 | about: Something is occurring that I think is wrong 4 | title: '' 5 | labels: "\U0001F41B bug" 6 | assignees: '' 7 | 8 | --- 9 | 10 | ## What is the current behavior? 11 | 12 | > What's happening that seems wrong? 13 | 14 | ## Steps to reproduce 15 | 16 | > To make it faster to diagnose the root problem. Tell us how can we reproduce the bug. 17 | 18 | ## Expected behavior 19 | 20 | > What would you expect to happen when following the steps above? 21 | 22 | ## Please tell us about your environment 23 | 24 | > We want to make sure the problem isn't specific to your operating system or programming language. 25 | 26 | - **Operating System/Version:** Windows 10 27 | - **Language:** [all | TypeScript | Python | PHP | etc] 28 | - **Browser:** Chrome 29 | 30 | ## Other information 31 | 32 | > Anything else we should know? (e.g. detailed explanation, stack-traces, related issues, suggestions how to fix, links for us to have context, eg. stack overflow, codepen, etc) 33 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/config.yml: -------------------------------------------------------------------------------- 1 | blank_issues_enabled: false 2 | contact_links: 3 | - name: DeepgramDevs on Twitter 4 | url: https://twitter.com/DeepgramDevs 5 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature-request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Feature Request 3 | about: I think X would be a cool addition or change. 4 | title: '' 5 | labels: "✨ enhancement" 6 | assignees: '' 7 | 8 | --- 9 | 10 | ## Proposed changes 11 | 12 | > Provide a detailed description of the change or addition you are proposing 13 | 14 | ## Context 15 | 16 | > Why is this change important to you? How would you use it? How can it benefit other users? 17 | 18 | ## Possible Implementation 19 | 20 | > Not obligatory, but suggest an idea for implementing addition or change 21 | 22 | ## Other information 23 | 24 | > Anything else we should know? (e.g. detailed explanation, related issues, links for us to have context, eg. stack overflow, codepen, etc) 25 | -------------------------------------------------------------------------------- /.github/PULL_REQUEST_TEMPLATE/pull_request_template.md: -------------------------------------------------------------------------------- 1 | ## Proposed changes 2 | 3 | Describe the big picture of your changes here to communicate to the maintainers why we should accept this pull request. If it fixes a bug or resolves a feature request, be sure to link to that issue. 4 | 5 | ## Types of changes 6 | 7 | What types of changes does your code introduce? 8 | _Put an `x` in the boxes that apply_ 9 | 10 | - [ ] Bugfix (non-breaking change which fixes an issue) 11 | - [ ] New feature (non-breaking change which adds functionality) 12 | - [ ] Breaking change (fix or feature that would cause existing functionality to not work as expected) 13 | - [ ] Documentation update or tests (if none of the other choices apply) 14 | 15 | ## Checklist 16 | 17 | _Put an `x` in the boxes that apply.This is simply a reminder of what we are going to look for before merging your code._ 18 | 19 | - [ ] I have read the [CONTRIBUTING](../../CONTRIBUTING.md) doc 20 | - [ ] Lint and unit tests pass locally with my changes 21 | - [ ] I have added tests that prove my fix is effective or that my feature works 22 | - [ ] I have added necessary documentation (if appropriate) 23 | - [ ] Any dependent changes have been merged and published in downstream modules 24 | 25 | ## Further comments 26 | 27 | If this is a relatively large or complex change, kick off the discussion by explaining why you chose the solution you did and what alternatives you considered, etc... 28 | -------------------------------------------------------------------------------- /.github/workflows/release.yml: -------------------------------------------------------------------------------- 1 | # This workflow will upload a Python Package using Twine when a release is created 2 | # For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python#publishing-to-package-registries 3 | 4 | # This workflow uses actions that are not certified by GitHub. 5 | # They are provided by a third-party and are governed by 6 | # separate terms of service, privacy policy, and support 7 | # documentation. 8 | 9 | name: Release 10 | 11 | on: 12 | release: 13 | types: [published] 14 | 15 | permissions: 16 | contents: read 17 | 18 | jobs: 19 | deploy: 20 | runs-on: ubuntu-latest 21 | 22 | steps: 23 | - uses: actions/checkout@v3 24 | - name: Set up Python 25 | uses: actions/setup-python@v3 26 | with: 27 | python-version: "3.x" 28 | - name: Install dependencies 29 | run: | 30 | python -m pip install --upgrade pip 31 | pip install build 32 | - name: Update Version in _version.py 33 | run: sed -i "s/0.0.0/${{ github.event.release.tag_name }}/g" ./deepgram_captions/_version.py 34 | - name: Build package 35 | run: python -m build 36 | - name: Install twine 37 | run: python -m pip install --upgrade twine 38 | - name: Publish package 39 | uses: pypa/gh-action-pypi-publish@27b31702a0e7fc50959f5ad993c78deac1bdfc29 40 | with: 41 | user: __token__ 42 | password: ${{ secrets.PYPI_API_TOKEN }} 43 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.egg-info 2 | */__pycache__/ 3 | **/__pycache__ 4 | dist/ 5 | build/ 6 | .pytest_cache/ -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Contributor Covenant Code of Conduct 2 | 3 | Please refer to [Deepgram Community Code of Conduct](https://dpgr.am/coc) 4 | 5 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing Guidelines 2 | 3 | Want to contribute to this project? We ❤️ it! 4 | 5 | Here are a few types of contributions that we would be interested in hearing about. 6 | 7 | * Bug fixes 8 | * If you find a bug, please first report it using Github Issues. 9 | * Issues that have already been identified as a bug will be labeled `🐛 bug`. 10 | * If you'd like to submit a fix for a bug, send a Pull Request from your own fork and mention the Issue number. 11 | * Include a test that isolates the bug and verifies that it was fixed. 12 | * New Features 13 | * If you'd like to accomplish something in the extension that it doesn't already do, describe the problem in a new Github Issue. 14 | * Issues that have been identified as a feature request will be labeled `✨ enhancement`. 15 | * If you'd like to implement the new feature, please wait for feedback from the project maintainers before spending 16 | too much time writing the code. In some cases, `✨ enhancement`s may not align well with the project objectives at 17 | the time. 18 | * Tests, Documentation, Miscellaneous 19 | * If you think the test coverage could be improved, the documentation could be clearer, you've got an alternative 20 | implementation of something that may have more advantages, or any other change we would still be glad hear about 21 | it. 22 | * If its a trivial change, go ahead and send a Pull Request with the changes you have in mind 23 | * If not, open a Github Issue to discuss the idea first. 24 | 25 | We also welcome anyone to work on any existing issues with the `👋🏽 good first issue` tag. 26 | 27 | ## Requirements 28 | 29 | For a contribution to be accepted: 30 | 31 | * The test suite must be complete and pass 32 | * Code must follow existing styling conventions 33 | * Commit messages must be descriptive. Related issues should be mentioned by number. 34 | 35 | If the contribution doesn't meet these criteria, a maintainer will discuss it with you on the Issue. You can still 36 | continue to add more commits to the branch you have sent the Pull Request from. 37 | 38 | ## How To 39 | 40 | 1. Fork this repository on GitHub. 41 | 1. Clone/fetch your fork to your local development machine. 42 | 1. Create a new branch (e.g. `issue-12`, `feat.add_foo`, etc) and check it out. 43 | 1. Make your changes and commit them. (Did the tests pass? No linting errors?) 44 | 1. Push your new branch to your fork. (e.g. `git push myname issue-12`) 45 | 1. Open a Pull Request from your new branch to the original fork's `main` branch. 46 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2021 deepgram 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Deepgram Python Captions 2 | 3 | [![Discord](https://dcbadge.vercel.app/api/server/xWRaCDBtW4?style=flat)](https://discord.gg/xWRaCDBtW4) [![PyPI version](https://badge.fury.io/py/deepgram-captions.svg)](https://badge.fury.io/py/deepgram-captions) 4 | 5 | This package is the Python implementation of Deepgram's WebVTT and SRT formatting. Given a transcription, this package can return a valid string to store as WebVTT or SRT caption files. 6 | 7 | The package is not dependent on Deepgram, but it is expected that you will provide a JSON response from a transcription request from either Deepgram or one of the other supported speech-to-text APIs. 8 | 9 | ## Installation 10 | 11 | ```bash 12 | pip install deepgram-captions 13 | ``` 14 | 15 | ## How it works 16 | 17 | The converter takes in a JSON object response (see examples in the `./test` folder.) Depending on which API you use, the converter will turn that into a shape that can be handled by the `webvtt` and `srt` scripts. 18 | 19 | You provide the JSON object; then select the converter needed such as `DeepgramConverter`, `WhisperTimestampedConverter`, `AssemblyAIConverter` and so on. (If the API you want to use is not supported, please reach out to `devrel@deepgram.com` and we will do our best to add it.) 20 | 21 | ## WebVTT from Deepgram Transcriptions 22 | 23 | ```python 24 | from deepgram_captions import DeepgramConverter, webvtt 25 | 26 | transcription = DeepgramConverter(dg_response) 27 | captions = webvtt(transcription) 28 | ``` 29 | 30 | ## SRT from Deepgram Transcriptions 31 | 32 | ```py 33 | from deepgram_captions import DeepgramConverter, srt 34 | 35 | transcription = DeepgramConverter(dg_response) 36 | captions = srt(transcription) 37 | ``` 38 | 39 | ### Line length 40 | 41 | Add an optional integer parameter to set the line length of the caption. 42 | 43 | ```py 44 | line_length = 10 45 | 46 | deepgram = DeepgramConverter(dg_speakers) 47 | captions = webvtt(deepgram, line_length) 48 | ``` 49 | 50 | ## Other Converters 51 | 52 | ### Whisper 53 | 54 | Open AI's Whisper (through their API) does not provide timestamps, so a JSON response directly from OpenAI cannot be used with this package. However, there are a couple other options you can try: 55 | 56 | #### Deepgram's Whisper Cloud 57 | 58 | Use Deepgram's fully hosted Whisper Cloud, which gives you Whisper transcriptions along with the features that come with Deepgram's API such as timestamps. Use `model=whisper` when you make your request to Deepgram. Then use the `DeepgramConverter` to create the captions. 59 | 60 | ```py 61 | from deepgram_captions import DeepgramConverter, srt 62 | 63 | transcription = DeepgramConverter(whisper_response) 64 | captions = srt(transcription) 65 | ``` 66 | 67 | #### Whisper Timestamped 68 | 69 | [Whisper Timestamped](https://github.com/linto-ai/whisper-timestamped) adds word-level timestamps to OpenAI's Whisper speech-to-text transcriptions. Word-level timestamps are required for this package to create captions, which is why we have created the captions converter for Whisper Timestamped (and not OpenAI's Whisper). 70 | 71 | ```py 72 | from deepgram_captions import WhisperTimestampedConverter, webvtt 73 | 74 | transcription = WhisperTimestampedConverter(whisper_response) 75 | captions = webvtt(transcription) 76 | ``` 77 | 78 | ### Assembly AI 79 | 80 | AssemblyAI is another popular speech-to-text API. 81 | 82 | ```py 83 | from deepgram_captions import AssemblyAIConverter, webvtt 84 | 85 | transcription = AssemblyAIConverter(assembly_response) 86 | captions = webvtt(transcription) 87 | ``` 88 | 89 | ## Output 90 | 91 | ### Output WebVTT 92 | 93 | When transcribing https://dpgr.am/spacewalk.wav, and running it through our library, this is the WebVTT output. 94 | 95 | ```py 96 | from deepgram_captions.converters import DeepgramConverter 97 | from deepgram_captions.webvtt import webvtt 98 | 99 | transcription = DeepgramConverter(dg_response) 100 | captions = webvtt(transcription) 101 | print(captions) 102 | ``` 103 | 104 | This is the result: 105 | 106 | ```text 107 | WEBVTT 108 | 109 | NOTE 110 | Transcription provided by Deepgram 111 | Request Id: 686278aa-d315-4aeb-b2a9-713615544366 112 | Created: 2023-10-27T15:35:56.637Z 113 | Duration: 25.933313 114 | Channels: 1 115 | 116 | 00:00:00.080 --> 00:00:03.220 117 | Yeah. As as much as, it's worth celebrating, 118 | 119 | 00:00:04.400 --> 00:00:05.779 120 | the first, spacewalk, 121 | 122 | 00:00:06.319 --> 00:00:07.859 123 | with an all female team, 124 | 125 | 00:00:08.475 --> 00:00:10.715 126 | I think many of us are looking forward 127 | 128 | 00:00:10.715 --> 00:00:13.215 129 | to it just being normal and 130 | 131 | 00:00:13.835 --> 00:00:16.480 132 | I think if it signifies anything, It is 133 | 134 | 00:00:16.779 --> 00:00:18.700 135 | to honor the the women who came before 136 | 137 | 00:00:18.700 --> 00:00:21.680 138 | us who, were skilled and qualified, 139 | 140 | 00:00:22.300 --> 00:00:24.779 141 | and didn't get the same opportunities that we 142 | 143 | 00:00:24.779 --> 00:00:25.439 144 | have today. 145 | ``` 146 | 147 | ## Output SRT 148 | 149 | When transcribing https://dpgr.am/spacewalk.wav, and running it through our library, this is the SRT output. 150 | 151 | ```py 152 | from deepgram_captions import DeepgramConverter, srt 153 | 154 | transcription = DeepgramConverter(dg_response) 155 | captions = srt(transcription) 156 | print(captions) 157 | ``` 158 | 159 | This is the result: 160 | 161 | ```text 162 | 1 163 | 00:00:00,080 --> 00:00:03,220 164 | Yeah. As as much as, it's worth celebrating, 165 | 166 | 2 167 | 00:00:04,400 --> 00:00:07,859 168 | the first, spacewalk, with an all female team, 169 | 170 | 3 171 | 00:00:08,475 --> 00:00:10,715 172 | I think many of us are looking forward 173 | 174 | 4 175 | 00:00:10,715 --> 00:00:14,235 176 | to it just being normal and I think 177 | 178 | 5 179 | 00:00:14,235 --> 00:00:17,340 180 | if it signifies anything, It is to honor 181 | 182 | 6 183 | 00:00:17,340 --> 00:00:19,820 184 | the the women who came before us who, 185 | 186 | 7 187 | 00:00:20,140 --> 00:00:23,580 188 | were skilled and qualified, and didn't get the 189 | 190 | 8 191 | 00:00:23,580 --> 00:00:25,439 192 | same opportunities that we have today. 193 | ``` 194 | 195 | ## Documentation 196 | 197 | You can learn more about the Deepgram API at [developers.deepgram.com](https://developers.deepgram.com/docs). 198 | 199 | ## Development and Contributing 200 | 201 | Interested in contributing? We ❤️ pull requests! 202 | 203 | To make sure our community is safe for all, be sure to review and agree to our 204 | [Code of Conduct](./.github/CODE_OF_CONDUCT.md). Then see the 205 | [Contribution](./.github/CONTRIBUTING.md) guidelines for more information. 206 | 207 | ## Getting Help 208 | 209 | We love to hear from you so if you have questions, comments or find a bug in the 210 | project, let us know! You can either: 211 | 212 | - [Open an issue in this repository](https://github.com/deepgram/[reponame]/issues/new) 213 | - [Join the Deepgram Github Discussions Community](https://github.com/orgs/deepgram/discussions) 214 | - [Join the Deepgram Discord Community](https://discord.gg/xWRaCDBtW4) 215 | 216 | [license]: LICENSE.txt 217 | -------------------------------------------------------------------------------- /deepgram_captions/__init__.py: -------------------------------------------------------------------------------- 1 | from .converters import ( 2 | DeepgramConverter, 3 | AssemblyAIConverter, 4 | WhisperTimestampedConverter, 5 | ) 6 | from .srt import srt 7 | from .webvtt import webvtt 8 | -------------------------------------------------------------------------------- /deepgram_captions/_version.py: -------------------------------------------------------------------------------- 1 | __version__ = "0.0.0" 2 | -------------------------------------------------------------------------------- /deepgram_captions/converters.py: -------------------------------------------------------------------------------- 1 | import json 2 | from .helpers import chunk_array, replace_text_with_word 3 | 4 | 5 | class ConverterException(Exception): 6 | pass 7 | 8 | 9 | class DeepgramConverter: 10 | def __init__(self, dg_response, use_exception: bool = True): 11 | if not isinstance(dg_response, dict): 12 | self.response = json.loads(dg_response.to_json()) 13 | else: 14 | self.response = dg_response 15 | 16 | if use_exception: 17 | one_valid_transcription = False 18 | for channel in self.response["results"]["channels"]: 19 | if channel["alternatives"][0]["transcript"] != "": 20 | one_valid_transcription = True 21 | break 22 | if "utterances" in self.response["results"]: 23 | for utterance in self.response["results"]["utterances"]: 24 | if utterance["transcript"] != "": 25 | one_valid_transcription = True 26 | break 27 | 28 | if not one_valid_transcription: 29 | raise ConverterException("No valid transcriptions found in response") 30 | 31 | def get_lines(self, line_length): 32 | results = self.response["results"] 33 | content = [] 34 | 35 | if results.get("utterances"): 36 | for utterance in results["utterances"]: 37 | if len(utterance["words"]) > line_length: 38 | content.extend(chunk_array(utterance["words"], line_length)) 39 | else: 40 | content.append(utterance["words"]) 41 | else: 42 | words = results["channels"][0]["alternatives"][0]["words"] 43 | diarize = ( 44 | "speaker" in words[0] if words else False 45 | ) # Check if diarization was used 46 | buffer = [] 47 | current_speaker = 0 48 | 49 | for word in words: 50 | if diarize and word.get("speaker", 0) != current_speaker: 51 | content.append(buffer) 52 | buffer = [] 53 | 54 | if len(buffer) == line_length: 55 | content.append(buffer) 56 | buffer = [] 57 | 58 | if diarize: 59 | current_speaker = word.get("speaker", 0) 60 | 61 | buffer.append(word) 62 | 63 | content.append(buffer) 64 | 65 | return content 66 | 67 | def get_headers(self): 68 | output = [] 69 | 70 | output.append("NOTE") 71 | output.append("Transcription provided by Deepgram") 72 | 73 | if self.response.get("metadata"): 74 | metadata = self.response["metadata"] 75 | if metadata.get("request_id"): 76 | output.append(f"Request Id: {metadata['request_id']}") 77 | if metadata.get("created"): 78 | output.append(f"Created: {metadata['created']}") 79 | if metadata.get("duration"): 80 | output.append(f"Duration: {metadata['duration']}") 81 | if metadata.get("channels"): 82 | output.append(f"Channels: {metadata['channels']}") 83 | 84 | return output 85 | 86 | 87 | class AssemblyAIConverter: 88 | def __init__(self, assembly_response): 89 | self.response = assembly_response 90 | 91 | def word_map(self, word): 92 | return { 93 | "word": word["text"], 94 | "start": word["start"], 95 | "end": word["end"], 96 | "confidence": word["confidence"], 97 | "punctuated_word": word["text"], 98 | "speaker": word["speaker"], 99 | } 100 | 101 | def get_lines(self, line_length: int = 8): 102 | results = self.response 103 | content = [] 104 | if results.get("utterances"): 105 | for utterance in results["utterances"]: 106 | if len(utterance["words"]) > line_length: 107 | content.extend( 108 | chunk_array( 109 | [self.word_map(w) for w in utterance["words"]], line_length 110 | ) 111 | ) 112 | else: 113 | content.append([self.word_map(w) for w in utterance["words"]]) 114 | else: 115 | content.extend( 116 | chunk_array([self.word_map(w) for w in results["words"]], line_length) 117 | ) 118 | 119 | return content 120 | 121 | 122 | class WhisperTimestampedConverter: 123 | def __init__(self, whisper_response): 124 | self.response = whisper_response 125 | 126 | def get_lines(self, line_length: int = 8): 127 | results = self.response 128 | content = [] 129 | if results.get("segments"): 130 | for segment in results["segments"]: 131 | if len(segment["words"]) > line_length: 132 | content.extend(chunk_array(segment["words"], line_length)) 133 | else: 134 | content.append(segment["words"]) 135 | 136 | res = replace_text_with_word(content) 137 | return res 138 | -------------------------------------------------------------------------------- /deepgram_captions/helpers.py: -------------------------------------------------------------------------------- 1 | from datetime import datetime 2 | 3 | 4 | class EmptyTranscriptException(Exception): 5 | pass 6 | 7 | 8 | def seconds_to_timestamp(seconds, format="%H:%M:%S.%f"): 9 | seconds = round(seconds, 3) 10 | dt = datetime.utcfromtimestamp(seconds) 11 | formatted_time = dt.strftime(format) 12 | formatted_time = formatted_time[:-3] + formatted_time[-3:].lstrip("0") 13 | return formatted_time 14 | 15 | 16 | def chunk_array(arr, length): 17 | res = [] 18 | 19 | for i in range(0, len(arr), length): 20 | chunk_arr = arr[i : i + length] 21 | res.append(chunk_arr) 22 | 23 | return res 24 | 25 | 26 | def replace_text_with_word(content): 27 | for content_list in content: 28 | for dictionary in content_list: 29 | if "text" in dictionary: 30 | dictionary["word"] = dictionary.pop("text") 31 | return content 32 | -------------------------------------------------------------------------------- /deepgram_captions/srt.py: -------------------------------------------------------------------------------- 1 | from .helpers import seconds_to_timestamp, EmptyTranscriptException 2 | 3 | 4 | def srt(converter, line_length=None): 5 | output = [] 6 | 7 | if line_length == None: 8 | line_length = 8 9 | 10 | lines = converter.get_lines(line_length) 11 | entry = 1 12 | 13 | current_speaker = None 14 | 15 | if not lines[0]: 16 | raise EmptyTranscriptException("No transcript data found") 17 | 18 | for words in lines: 19 | output.append(str(entry)) 20 | entry += 1 21 | 22 | first_word = words[0] 23 | last_word = words[-1] 24 | 25 | start_time = seconds_to_timestamp(first_word["start"], "%H:%M:%S,%f") 26 | end_time = seconds_to_timestamp(last_word["end"], "%H:%M:%S,%f") 27 | 28 | output.append(f"{start_time} --> {end_time}") 29 | 30 | if "speaker" in first_word: 31 | if current_speaker is None or current_speaker != first_word["speaker"]: 32 | current_speaker = first_word["speaker"] 33 | output.append(f"[speaker {current_speaker}]") 34 | 35 | punctuated_words = [word.get("punctuated_word", word["word"]) for word in words] 36 | output.append(" ".join(punctuated_words)) 37 | output.append("") 38 | 39 | return "\n".join(output) 40 | -------------------------------------------------------------------------------- /deepgram_captions/webvtt.py: -------------------------------------------------------------------------------- 1 | from .helpers import seconds_to_timestamp, EmptyTranscriptException 2 | 3 | 4 | def webvtt(converter, line_length=None): 5 | output = [] 6 | output.append("WEBVTT") 7 | output.append("") 8 | 9 | if line_length == None: 10 | line_length = 8 11 | 12 | if hasattr(converter, "get_headers") and callable( 13 | getattr(converter, "get_headers") 14 | ): 15 | output.append("\n".join(converter.get_headers())) 16 | 17 | if hasattr(converter, "get_headers") and callable( 18 | getattr(converter, "get_headers") 19 | ): 20 | output.append("") 21 | 22 | if hasattr(converter, "get_lines") and callable(getattr(converter, "get_lines")): 23 | lines = converter.get_lines(line_length) 24 | 25 | if not lines[0]: 26 | raise EmptyTranscriptException("No transcript data found") 27 | 28 | speaker_labels = "speaker" in lines[0][0] 29 | 30 | for words in lines: 31 | first_word = words[0] 32 | last_word = words[-1] 33 | 34 | output.append( 35 | f"{seconds_to_timestamp(first_word['start'])} --> {seconds_to_timestamp(last_word['end'])}" 36 | ) 37 | 38 | line = " ".join(word.get("punctuated_word", word["word"]) for word in words) 39 | speaker_label = ( 40 | f"" if speaker_labels else "" 41 | ) 42 | 43 | output.append(f"{speaker_label}{line}") 44 | output.append("") 45 | 46 | return "\n".join(output) 47 | -------------------------------------------------------------------------------- /example.py: -------------------------------------------------------------------------------- 1 | from deepgram_captions import ( 2 | srt, 3 | webvtt, 4 | DeepgramConverter, 5 | AssemblyAIConverter, 6 | WhisperTimestampedConverter, 7 | ) 8 | import json 9 | 10 | json_file_dg_transcription = "test/dg_transcription.json" 11 | json_file_dg_utterances = "test/dg_utterances.json" 12 | json_file_dg_speakers = "test/dg_speakers.json" 13 | json_file_dg_speakers_no_utterances = "test/dg_speakers_no_utterances.json" 14 | json_file_assemblyai_transcription = "test/assemblyai_transcription.json" 15 | json_file_assemblyai_utterances = "test/assemblyai_utterances.json" 16 | json_file_whisper_timestamped = "test/whisper_timestamped.json" 17 | json_file_dg_whisper_transcription = "test/dg_whisper_transcription.json" 18 | 19 | dg_transcription = None 20 | dg_utterances = None 21 | dg_speakers = None 22 | dg_speakers_no_utterances = None 23 | assemblyai_transcription = None 24 | assemblyai_utterances = None 25 | whisper_timestamped = None 26 | dg_whisper_transcription = None 27 | 28 | with open(json_file_dg_transcription, "r") as json_file: 29 | dg_transcription = json.load(json_file) 30 | with open(json_file_dg_utterances, "r") as json_file: 31 | dg_utterances = json.load(json_file) 32 | with open(json_file_dg_speakers, "r") as json_file: 33 | dg_speakers = json.load(json_file) 34 | with open(json_file_dg_speakers_no_utterances, "r") as json_file: 35 | dg_speakers_no_utterances = json.load(json_file) 36 | with open(json_file_assemblyai_transcription, "r") as json_file: 37 | assemblyai_transcription = json.load(json_file) 38 | with open(json_file_assemblyai_utterances, "r") as json_file: 39 | assemblyai_utterances = json.load(json_file) 40 | with open(json_file_whisper_timestamped, "r") as json_file: 41 | whisper_timestamped = json.load(json_file) 42 | with open(json_file_dg_whisper_transcription, "r") as json_file: 43 | dg_whisper_transcription = json.load(json_file) 44 | 45 | # Uncomment a section to test the converter: 46 | 47 | line_length = 10 48 | 49 | deepgram = DeepgramConverter(dg_speakers) 50 | captions = webvtt(deepgram, line_length) 51 | print(captions) 52 | 53 | # assembly = AssemblyAIConverter(assemblyai_utterances) 54 | # captions = webvtt(assembly) 55 | # print(captions) 56 | 57 | # whisperTS = WhisperTimestampedConverter(whisper_timestamped) 58 | # captions = srt(whisperTS) 59 | # print(captions) 60 | 61 | # deepgram_whisper = DeepgramConverter(dg_whisper_transcription) 62 | # captions = webvtt(deepgram_whisper) 63 | # print(captions) 64 | -------------------------------------------------------------------------------- /requirements-dev.txt: -------------------------------------------------------------------------------- 1 | # pip install -r requirements-dev.txt 2 | 3 | # Testing 4 | pytest 5 | 6 | # Formatting 7 | black -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup, find_packages 2 | import os.path 3 | 4 | with open("README.md", "r", encoding="utf-8") as fh: 5 | LONG_DESCRIPTION = fh.read() 6 | 7 | with open( 8 | os.path.join( 9 | os.path.abspath(os.path.dirname(__file__)), "deepgram_captions", "_version.py" 10 | ), 11 | encoding="utf8", 12 | ) as file: 13 | exec(file.read()) 14 | # imports as __version__ 15 | 16 | DESCRIPTION = "A Python package for generating captions." 17 | 18 | setup( 19 | name="deepgram-captions", 20 | version=__version__, 21 | author="Deepgram", 22 | author_email="devrel@deepgram.com", 23 | url="https://github.com/deepgram/deepgram-python-captions", 24 | description=DESCRIPTION, 25 | long_description=LONG_DESCRIPTION, 26 | long_description_content_type="text/markdown", 27 | packages=find_packages(), 28 | install_requires=[], 29 | extras_require={ 30 | "dev": [ 31 | "black", 32 | "pytest", 33 | ], 34 | }, 35 | keywords=["deepgram", "captions", "srt", "webvtt"], 36 | classifiers=[ 37 | "Development Status :: 3 - Alpha", 38 | "Intended Audience :: Developers", 39 | "License :: OSI Approved :: MIT License", 40 | "Programming Language :: Python :: 3", 41 | "Programming Language :: Python :: 3 :: Only", 42 | "Topic :: Multimedia :: Sound/Audio :: Speech", 43 | "Topic :: Text Processing :: General", 44 | ], 45 | ) 46 | -------------------------------------------------------------------------------- /test/__pycache__/test_assembly.cpython-310-pytest-7.4.3.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepgram/deepgram-python-captions/20ac9324a5a856ec5b0f8b9a86e13065bf5dda03/test/__pycache__/test_assembly.cpython-310-pytest-7.4.3.pyc -------------------------------------------------------------------------------- /test/__pycache__/test_deepgram.cpython-310-pytest-7.4.3.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deepgram/deepgram-python-captions/20ac9324a5a856ec5b0f8b9a86e13065bf5dda03/test/__pycache__/test_deepgram.cpython-310-pytest-7.4.3.pyc -------------------------------------------------------------------------------- /test/assemblyai_transcription.json: -------------------------------------------------------------------------------- 1 | { 2 | "id": "6a9zvzyx8u-f8fd-4bf4-9614-9497d76c6891", 3 | "language_model": "assemblyai_default", 4 | "acoustic_model": "assemblyai_default", 5 | "language_code": "en", 6 | "status": "completed", 7 | "audio_url": "https://storage.googleapis.com/aai-playground-files-production/a72ebcc7-e6c3-4337-92cc-bc8ca86daa14?Expires=1720606027&GoogleAccessId=aai-playground%40assemblyai-web-production.iam.gserviceaccount.com&Signature=u2A4Zw1kohm7FOiw%2FT48XcPwsB3Fg5n%2Fs%2FQA9R8UOOSCN%2F12h%2FE4KNuEpkj713rpbltqNQ4AZhdkZXqS1zsEqOvj%2BH%2BN1a7GKuHgTJh%2BQKoGlNybTm%2F7bizW9ZcnFWNagpTa9KKgup1Z6WBOZxtvEujF4ERK7wYLM1pGwpEmHCPJuay1B0xoKFkYw2UH%2FLk02ieDymPNZs%2BsZ4YQqdeZwhWvMYHMS%2BG6Lc7DZ4Gu1XOJVRtO9KLK9jBW9Ne3k%2F0gNqRbj8kXd38Q4CuLUXs6MdNr4wtb1uhz8PUlcvEZfWcJwBZOPvP1rvGQVDexV2V4j7wNoDpyF9FdMeiaVs%2Fneg%3D%3D", 8 | "text": "Yeah. As much as it's worth celebrating the first spacewalk with an all female team, I think many of us are looking forward to it just being normal. And I think if it signifies anything, it is to honor the women who came before us, who were skilled and qualified and didn't get the same opportunities that we have today.", 9 | "words": [ 10 | { 11 | "text": "Yeah.", 12 | "start": 170, 13 | "end": 718, 14 | "confidence": 0.9941, 15 | "speaker": "A" 16 | }, 17 | { 18 | "text": "As", 19 | "start": 884, 20 | "end": 1166, 21 | "confidence": 0.99992, 22 | "speaker": "A" 23 | }, 24 | { 25 | "text": "much", 26 | "start": 1188, 27 | "end": 1326, 28 | "confidence": 0.99996, 29 | "speaker": "A" 30 | }, 31 | { 32 | "text": "as", 33 | "start": 1348, 34 | "end": 1822, 35 | "confidence": 0.96266, 36 | "speaker": "A" 37 | }, 38 | { 39 | "text": "it's", 40 | "start": 1956, 41 | "end": 2346, 42 | "confidence": 0.99978, 43 | "speaker": "A" 44 | }, 45 | { 46 | "text": "worth", 47 | "start": 2378, 48 | "end": 2766, 49 | "confidence": 0.99999, 50 | "speaker": "A" 51 | }, 52 | { 53 | "text": "celebrating", 54 | "start": 2868, 55 | "end": 3870, 56 | "confidence": 0.99245, 57 | "speaker": "A" 58 | }, 59 | { 60 | "text": "the", 61 | "start": 4290, 62 | "end": 4654, 63 | "confidence": 0.55, 64 | "speaker": "A" 65 | }, 66 | { 67 | "text": "first", 68 | "start": 4692, 69 | "end": 5134, 70 | "confidence": 1, 71 | "speaker": "A" 72 | }, 73 | { 74 | "text": "spacewalk", 75 | "start": 5252, 76 | "end": 6234, 77 | "confidence": 0.9899, 78 | "speaker": "A" 79 | }, 80 | { 81 | "text": "with", 82 | "start": 6362, 83 | "end": 6606, 84 | "confidence": 0.99998, 85 | "speaker": "A" 86 | }, 87 | { 88 | "text": "an", 89 | "start": 6628, 90 | "end": 6766, 91 | "confidence": 0.99914, 92 | "speaker": "A" 93 | }, 94 | { 95 | "text": "all", 96 | "start": 6788, 97 | "end": 6974, 98 | "confidence": 0.99989, 99 | "speaker": "A" 100 | }, 101 | { 102 | "text": "female", 103 | "start": 7012, 104 | "end": 7354, 105 | "confidence": 0.99989, 106 | "speaker": "A" 107 | }, 108 | { 109 | "text": "team,", 110 | "start": 7402, 111 | "end": 8000, 112 | "confidence": 1, 113 | "speaker": "A" 114 | }, 115 | { 116 | "text": "I", 117 | "start": 8370, 118 | "end": 8686, 119 | "confidence": 1, 120 | "speaker": "A" 121 | }, 122 | { 123 | "text": "think", 124 | "start": 8708, 125 | "end": 8894, 126 | "confidence": 1, 127 | "speaker": "A" 128 | }, 129 | { 130 | "text": "many", 131 | "start": 8932, 132 | "end": 9134, 133 | "confidence": 0.99999, 134 | "speaker": "A" 135 | }, 136 | { 137 | "text": "of", 138 | "start": 9172, 139 | "end": 9326, 140 | "confidence": 1, 141 | "speaker": "A" 142 | }, 143 | { 144 | "text": "us", 145 | "start": 9348, 146 | "end": 9774, 147 | "confidence": 1, 148 | "speaker": "A" 149 | }, 150 | { 151 | "text": "are", 152 | "start": 9892, 153 | "end": 10222, 154 | "confidence": 0.57763, 155 | "speaker": "A" 156 | }, 157 | { 158 | "text": "looking", 159 | "start": 10276, 160 | "end": 10494, 161 | "confidence": 1, 162 | "speaker": "A" 163 | }, 164 | { 165 | "text": "forward", 166 | "start": 10532, 167 | "end": 10734, 168 | "confidence": 0.99999, 169 | "speaker": "A" 170 | }, 171 | { 172 | "text": "to", 173 | "start": 10772, 174 | "end": 10974, 175 | "confidence": 0.96, 176 | "speaker": "A" 177 | }, 178 | { 179 | "text": "it", 180 | "start": 11012, 181 | "end": 11166, 182 | "confidence": 0.99995, 183 | "speaker": "A" 184 | }, 185 | { 186 | "text": "just", 187 | "start": 11188, 188 | "end": 11374, 189 | "confidence": 0.99996, 190 | "speaker": "A" 191 | }, 192 | { 193 | "text": "being", 194 | "start": 11412, 195 | "end": 11806, 196 | "confidence": 0.99998, 197 | "speaker": "A" 198 | }, 199 | { 200 | "text": "normal.", 201 | "start": 11908, 202 | "end": 12666, 203 | "confidence": 0.99999, 204 | "speaker": "A" 205 | }, 206 | { 207 | "text": "And", 208 | "start": 12778, 209 | "end": 13534, 210 | "confidence": 1, 211 | "speaker": "A" 212 | }, 213 | { 214 | "text": "I", 215 | "start": 13732, 216 | "end": 14046, 217 | "confidence": 0.65, 218 | "speaker": "A" 219 | }, 220 | { 221 | "text": "think", 222 | "start": 14068, 223 | "end": 14254, 224 | "confidence": 0.99988, 225 | "speaker": "A" 226 | }, 227 | { 228 | "text": "if", 229 | "start": 14292, 230 | "end": 14446, 231 | "confidence": 0.99998, 232 | "speaker": "A" 233 | }, 234 | { 235 | "text": "it", 236 | "start": 14468, 237 | "end": 14606, 238 | "confidence": 0.99999, 239 | "speaker": "A" 240 | }, 241 | { 242 | "text": "signifies", 243 | "start": 14628, 244 | "end": 15114, 245 | "confidence": 0.99175, 246 | "speaker": "A" 247 | }, 248 | { 249 | "text": "anything,", 250 | "start": 15162, 251 | "end": 15614, 252 | "confidence": 0.99997, 253 | "speaker": "A" 254 | }, 255 | { 256 | "text": "it", 257 | "start": 15732, 258 | "end": 16014, 259 | "confidence": 0.5311, 260 | "speaker": "A" 261 | }, 262 | { 263 | "text": "is", 264 | "start": 16052, 265 | "end": 16590, 266 | "confidence": 0.97364, 267 | "speaker": "A" 268 | }, 269 | { 270 | "text": "to", 271 | "start": 16740, 272 | "end": 17054, 273 | "confidence": 0.76, 274 | "speaker": "A" 275 | }, 276 | { 277 | "text": "honor", 278 | "start": 17092, 279 | "end": 17530, 280 | "confidence": 0.99997, 281 | "speaker": "A" 282 | }, 283 | { 284 | "text": "the", 285 | "start": 17610, 286 | "end": 17806, 287 | "confidence": 1, 288 | "speaker": "A" 289 | }, 290 | { 291 | "text": "women", 292 | "start": 17828, 293 | "end": 18014, 294 | "confidence": 0.99908, 295 | "speaker": "A" 296 | }, 297 | { 298 | "text": "who", 299 | "start": 18052, 300 | "end": 18206, 301 | "confidence": 0.99999, 302 | "speaker": "A" 303 | }, 304 | { 305 | "text": "came", 306 | "start": 18228, 307 | "end": 18414, 308 | "confidence": 1, 309 | "speaker": "A" 310 | }, 311 | { 312 | "text": "before", 313 | "start": 18452, 314 | "end": 18702, 315 | "confidence": 0.99999, 316 | "speaker": "A" 317 | }, 318 | { 319 | "text": "us,", 320 | "start": 18756, 321 | "end": 19310, 322 | "confidence": 0.65, 323 | "speaker": "A" 324 | }, 325 | { 326 | "text": "who", 327 | "start": 19460, 328 | "end": 20014, 329 | "confidence": 0.99997, 330 | "speaker": "A" 331 | }, 332 | { 333 | "text": "were", 334 | "start": 20132, 335 | "end": 20414, 336 | "confidence": 0.99969, 337 | "speaker": "A" 338 | }, 339 | { 340 | "text": "skilled", 341 | "start": 20452, 342 | "end": 20906, 343 | "confidence": 0.60398, 344 | "speaker": "A" 345 | }, 346 | { 347 | "text": "and", 348 | "start": 20938, 349 | "end": 21134, 350 | "confidence": 1, 351 | "speaker": "A" 352 | }, 353 | { 354 | "text": "qualified", 355 | "start": 21172, 356 | "end": 22026, 357 | "confidence": 0.99998, 358 | "speaker": "A" 359 | }, 360 | { 361 | "text": "and", 362 | "start": 22218, 363 | "end": 22622, 364 | "confidence": 0.95, 365 | "speaker": "A" 366 | }, 367 | { 368 | "text": "didn't", 369 | "start": 22676, 370 | "end": 22906, 371 | "confidence": 0.9999, 372 | "speaker": "A" 373 | }, 374 | { 375 | "text": "get", 376 | "start": 22938, 377 | "end": 23278, 378 | "confidence": 0.99997, 379 | "speaker": "A" 380 | }, 381 | { 382 | "text": "the", 383 | "start": 23364, 384 | "end": 23614, 385 | "confidence": 1, 386 | "speaker": "A" 387 | }, 388 | { 389 | "text": "same", 390 | "start": 23652, 391 | "end": 23902, 392 | "confidence": 0.99999, 393 | "speaker": "A" 394 | }, 395 | { 396 | "text": "opportunities", 397 | "start": 23956, 398 | "end": 24442, 399 | "confidence": 0.98808, 400 | "speaker": "A" 401 | }, 402 | { 403 | "text": "that", 404 | "start": 24506, 405 | "end": 24638, 406 | "confidence": 0.99985, 407 | "speaker": "A" 408 | }, 409 | { 410 | "text": "we", 411 | "start": 24644, 412 | "end": 24766, 413 | "confidence": 0.84796, 414 | "speaker": "A" 415 | }, 416 | { 417 | "text": "have", 418 | "start": 24788, 419 | "end": 24998, 420 | "confidence": 0.9963, 421 | "speaker": "A" 422 | }, 423 | { 424 | "text": "today.", 425 | "start": 25044, 426 | "end": 25140, 427 | "confidence": 0.99784, 428 | "speaker": "A" 429 | } 430 | ], 431 | "utterances": null, 432 | "confidence": 0.9493999999999997, 433 | "audio_duration": 25, 434 | "punctuate": true, 435 | "format_text": true, 436 | "dual_channel": false, 437 | "webhook_url": "https://assemblyai-web-production.uc.r.appspot.com/transcript/push", 438 | "webhook_status_code": 204, 439 | "webhook_auth": false, 440 | "webhook_auth_header_name": null, 441 | "speed_boost": false, 442 | "auto_highlights_result": null, 443 | "auto_highlights": false, 444 | "audio_start_from": null, 445 | "audio_end_at": null, 446 | "word_boost": [], 447 | "boost_param": null, 448 | "filter_profanity": false, 449 | "redact_pii": false, 450 | "redact_pii_audio": false, 451 | "redact_pii_audio_quality": null, 452 | "redact_pii_policies": null, 453 | "redact_pii_sub": null, 454 | "speaker_labels": true, 455 | "content_safety": false, 456 | "iab_categories": false, 457 | "content_safety_labels": { 458 | "status": "unavailable", 459 | "results": [], 460 | "summary": {} 461 | }, 462 | "iab_categories_result": { 463 | "status": "unavailable", 464 | "results": [], 465 | "summary": {} 466 | }, 467 | "language_detection": true, 468 | "custom_spelling": null, 469 | "throttled": null, 470 | "auto_chapters": false, 471 | "summarization": false, 472 | "summary_type": null, 473 | "summary_model": null, 474 | "custom_topics": false, 475 | "topics": [], 476 | "speech_threshold": null, 477 | "disfluencies": false, 478 | "sentiment_analysis": false, 479 | "chapters": null, 480 | "sentiment_analysis_results": null, 481 | "entity_detection": false, 482 | "entities": null, 483 | "summary": null, 484 | "speakers_expected": null 485 | } 486 | -------------------------------------------------------------------------------- /test/assemblyai_utterances.json: -------------------------------------------------------------------------------- 1 | { 2 | "id": "6a9zvzyx8u-f8fd-4bf4-9614-9497d76c6891", 3 | "language_model": "assemblyai_default", 4 | "acoustic_model": "assemblyai_default", 5 | "language_code": "en", 6 | "status": "completed", 7 | "audio_url": "https://storage.googleapis.com/aai-playground-files-production/a72ebcc7-e6c3-4337-92cc-bc8ca86daa14?Expires=1720606027&GoogleAccessId=aai-playground%40assemblyai-web-production.iam.gserviceaccount.com&Signature=u2A4Zw1kohm7FOiw%2FT48XcPwsB3Fg5n%2Fs%2FQA9R8UOOSCN%2F12h%2FE4KNuEpkj713rpbltqNQ4AZhdkZXqS1zsEqOvj%2BH%2BN1a7GKuHgTJh%2BQKoGlNybTm%2F7bizW9ZcnFWNagpTa9KKgup1Z6WBOZxtvEujF4ERK7wYLM1pGwpEmHCPJuay1B0xoKFkYw2UH%2FLk02ieDymPNZs%2BsZ4YQqdeZwhWvMYHMS%2BG6Lc7DZ4Gu1XOJVRtO9KLK9jBW9Ne3k%2F0gNqRbj8kXd38Q4CuLUXs6MdNr4wtb1uhz8PUlcvEZfWcJwBZOPvP1rvGQVDexV2V4j7wNoDpyF9FdMeiaVs%2Fneg%3D%3D", 8 | "text": "Yeah. As much as it's worth celebrating the first spacewalk with an all female team, I think many of us are looking forward to it just being normal. And I think if it signifies anything, it is to honor the women who came before us, who were skilled and qualified and didn't get the same opportunities that we have today.", 9 | "words": [ 10 | { 11 | "text": "Yeah.", 12 | "start": 170, 13 | "end": 718, 14 | "confidence": 0.9941, 15 | "speaker": "A" 16 | }, 17 | { 18 | "text": "As", 19 | "start": 884, 20 | "end": 1166, 21 | "confidence": 0.99992, 22 | "speaker": "A" 23 | }, 24 | { 25 | "text": "much", 26 | "start": 1188, 27 | "end": 1326, 28 | "confidence": 0.99996, 29 | "speaker": "A" 30 | }, 31 | { 32 | "text": "as", 33 | "start": 1348, 34 | "end": 1822, 35 | "confidence": 0.96266, 36 | "speaker": "A" 37 | }, 38 | { 39 | "text": "it's", 40 | "start": 1956, 41 | "end": 2346, 42 | "confidence": 0.99978, 43 | "speaker": "A" 44 | }, 45 | { 46 | "text": "worth", 47 | "start": 2378, 48 | "end": 2766, 49 | "confidence": 0.99999, 50 | "speaker": "A" 51 | }, 52 | { 53 | "text": "celebrating", 54 | "start": 2868, 55 | "end": 3870, 56 | "confidence": 0.99245, 57 | "speaker": "A" 58 | }, 59 | { 60 | "text": "the", 61 | "start": 4290, 62 | "end": 4654, 63 | "confidence": 0.55, 64 | "speaker": "A" 65 | }, 66 | { 67 | "text": "first", 68 | "start": 4692, 69 | "end": 5134, 70 | "confidence": 1, 71 | "speaker": "A" 72 | }, 73 | { 74 | "text": "spacewalk", 75 | "start": 5252, 76 | "end": 6234, 77 | "confidence": 0.9899, 78 | "speaker": "A" 79 | }, 80 | { 81 | "text": "with", 82 | "start": 6362, 83 | "end": 6606, 84 | "confidence": 0.99998, 85 | "speaker": "A" 86 | }, 87 | { 88 | "text": "an", 89 | "start": 6628, 90 | "end": 6766, 91 | "confidence": 0.99914, 92 | "speaker": "A" 93 | }, 94 | { 95 | "text": "all", 96 | "start": 6788, 97 | "end": 6974, 98 | "confidence": 0.99989, 99 | "speaker": "A" 100 | }, 101 | { 102 | "text": "female", 103 | "start": 7012, 104 | "end": 7354, 105 | "confidence": 0.99989, 106 | "speaker": "A" 107 | }, 108 | { 109 | "text": "team,", 110 | "start": 7402, 111 | "end": 8000, 112 | "confidence": 1, 113 | "speaker": "A" 114 | }, 115 | { 116 | "text": "I", 117 | "start": 8370, 118 | "end": 8686, 119 | "confidence": 1, 120 | "speaker": "A" 121 | }, 122 | { 123 | "text": "think", 124 | "start": 8708, 125 | "end": 8894, 126 | "confidence": 1, 127 | "speaker": "A" 128 | }, 129 | { 130 | "text": "many", 131 | "start": 8932, 132 | "end": 9134, 133 | "confidence": 0.99999, 134 | "speaker": "A" 135 | }, 136 | { 137 | "text": "of", 138 | "start": 9172, 139 | "end": 9326, 140 | "confidence": 1, 141 | "speaker": "A" 142 | }, 143 | { 144 | "text": "us", 145 | "start": 9348, 146 | "end": 9774, 147 | "confidence": 1, 148 | "speaker": "A" 149 | }, 150 | { 151 | "text": "are", 152 | "start": 9892, 153 | "end": 10222, 154 | "confidence": 0.57763, 155 | "speaker": "A" 156 | }, 157 | { 158 | "text": "looking", 159 | "start": 10276, 160 | "end": 10494, 161 | "confidence": 1, 162 | "speaker": "A" 163 | }, 164 | { 165 | "text": "forward", 166 | "start": 10532, 167 | "end": 10734, 168 | "confidence": 0.99999, 169 | "speaker": "A" 170 | }, 171 | { 172 | "text": "to", 173 | "start": 10772, 174 | "end": 10974, 175 | "confidence": 0.96, 176 | "speaker": "A" 177 | }, 178 | { 179 | "text": "it", 180 | "start": 11012, 181 | "end": 11166, 182 | "confidence": 0.99995, 183 | "speaker": "A" 184 | }, 185 | { 186 | "text": "just", 187 | "start": 11188, 188 | "end": 11374, 189 | "confidence": 0.99996, 190 | "speaker": "A" 191 | }, 192 | { 193 | "text": "being", 194 | "start": 11412, 195 | "end": 11806, 196 | "confidence": 0.99998, 197 | "speaker": "A" 198 | }, 199 | { 200 | "text": "normal.", 201 | "start": 11908, 202 | "end": 12666, 203 | "confidence": 0.99999, 204 | "speaker": "A" 205 | }, 206 | { 207 | "text": "And", 208 | "start": 12778, 209 | "end": 13534, 210 | "confidence": 1, 211 | "speaker": "A" 212 | }, 213 | { 214 | "text": "I", 215 | "start": 13732, 216 | "end": 14046, 217 | "confidence": 0.65, 218 | "speaker": "A" 219 | }, 220 | { 221 | "text": "think", 222 | "start": 14068, 223 | "end": 14254, 224 | "confidence": 0.99988, 225 | "speaker": "A" 226 | }, 227 | { 228 | "text": "if", 229 | "start": 14292, 230 | "end": 14446, 231 | "confidence": 0.99998, 232 | "speaker": "A" 233 | }, 234 | { 235 | "text": "it", 236 | "start": 14468, 237 | "end": 14606, 238 | "confidence": 0.99999, 239 | "speaker": "A" 240 | }, 241 | { 242 | "text": "signifies", 243 | "start": 14628, 244 | "end": 15114, 245 | "confidence": 0.99175, 246 | "speaker": "A" 247 | }, 248 | { 249 | "text": "anything,", 250 | "start": 15162, 251 | "end": 15614, 252 | "confidence": 0.99997, 253 | "speaker": "A" 254 | }, 255 | { 256 | "text": "it", 257 | "start": 15732, 258 | "end": 16014, 259 | "confidence": 0.5311, 260 | "speaker": "A" 261 | }, 262 | { 263 | "text": "is", 264 | "start": 16052, 265 | "end": 16590, 266 | "confidence": 0.97364, 267 | "speaker": "A" 268 | }, 269 | { 270 | "text": "to", 271 | "start": 16740, 272 | "end": 17054, 273 | "confidence": 0.76, 274 | "speaker": "A" 275 | }, 276 | { 277 | "text": "honor", 278 | "start": 17092, 279 | "end": 17530, 280 | "confidence": 0.99997, 281 | "speaker": "A" 282 | }, 283 | { 284 | "text": "the", 285 | "start": 17610, 286 | "end": 17806, 287 | "confidence": 1, 288 | "speaker": "A" 289 | }, 290 | { 291 | "text": "women", 292 | "start": 17828, 293 | "end": 18014, 294 | "confidence": 0.99908, 295 | "speaker": "A" 296 | }, 297 | { 298 | "text": "who", 299 | "start": 18052, 300 | "end": 18206, 301 | "confidence": 0.99999, 302 | "speaker": "A" 303 | }, 304 | { 305 | "text": "came", 306 | "start": 18228, 307 | "end": 18414, 308 | "confidence": 1, 309 | "speaker": "A" 310 | }, 311 | { 312 | "text": "before", 313 | "start": 18452, 314 | "end": 18702, 315 | "confidence": 0.99999, 316 | "speaker": "A" 317 | }, 318 | { 319 | "text": "us,", 320 | "start": 18756, 321 | "end": 19310, 322 | "confidence": 0.65, 323 | "speaker": "A" 324 | }, 325 | { 326 | "text": "who", 327 | "start": 19460, 328 | "end": 20014, 329 | "confidence": 0.99997, 330 | "speaker": "A" 331 | }, 332 | { 333 | "text": "were", 334 | "start": 20132, 335 | "end": 20414, 336 | "confidence": 0.99969, 337 | "speaker": "A" 338 | }, 339 | { 340 | "text": "skilled", 341 | "start": 20452, 342 | "end": 20906, 343 | "confidence": 0.60398, 344 | "speaker": "A" 345 | }, 346 | { 347 | "text": "and", 348 | "start": 20938, 349 | "end": 21134, 350 | "confidence": 1, 351 | "speaker": "A" 352 | }, 353 | { 354 | "text": "qualified", 355 | "start": 21172, 356 | "end": 22026, 357 | "confidence": 0.99998, 358 | "speaker": "A" 359 | }, 360 | { 361 | "text": "and", 362 | "start": 22218, 363 | "end": 22622, 364 | "confidence": 0.95, 365 | "speaker": "A" 366 | }, 367 | { 368 | "text": "didn't", 369 | "start": 22676, 370 | "end": 22906, 371 | "confidence": 0.9999, 372 | "speaker": "A" 373 | }, 374 | { 375 | "text": "get", 376 | "start": 22938, 377 | "end": 23278, 378 | "confidence": 0.99997, 379 | "speaker": "A" 380 | }, 381 | { 382 | "text": "the", 383 | "start": 23364, 384 | "end": 23614, 385 | "confidence": 1, 386 | "speaker": "A" 387 | }, 388 | { 389 | "text": "same", 390 | "start": 23652, 391 | "end": 23902, 392 | "confidence": 0.99999, 393 | "speaker": "A" 394 | }, 395 | { 396 | "text": "opportunities", 397 | "start": 23956, 398 | "end": 24442, 399 | "confidence": 0.98808, 400 | "speaker": "A" 401 | }, 402 | { 403 | "text": "that", 404 | "start": 24506, 405 | "end": 24638, 406 | "confidence": 0.99985, 407 | "speaker": "A" 408 | }, 409 | { 410 | "text": "we", 411 | "start": 24644, 412 | "end": 24766, 413 | "confidence": 0.84796, 414 | "speaker": "A" 415 | }, 416 | { 417 | "text": "have", 418 | "start": 24788, 419 | "end": 24998, 420 | "confidence": 0.9963, 421 | "speaker": "A" 422 | }, 423 | { 424 | "text": "today.", 425 | "start": 25044, 426 | "end": 25140, 427 | "confidence": 0.99784, 428 | "speaker": "A" 429 | } 430 | ], 431 | "utterances": [ 432 | { 433 | "confidence": 0.9493999999999997, 434 | "end": 25140, 435 | "speaker": "A", 436 | "start": 170, 437 | "text": "Yeah. As much as it's worth celebrating the first spacewalk with an all female team, I think many of us are looking forward to it just being normal. And I think if it signifies anything, it is to honor the women who came before us, who were skilled and qualified and didn't get the same opportunities that we have today.", 438 | "words": [ 439 | { 440 | "text": "Yeah.", 441 | "start": 170, 442 | "end": 718, 443 | "confidence": 0.9941, 444 | "speaker": "A" 445 | }, 446 | { 447 | "text": "As", 448 | "start": 884, 449 | "end": 1166, 450 | "confidence": 0.99992, 451 | "speaker": "A" 452 | }, 453 | { 454 | "text": "much", 455 | "start": 1188, 456 | "end": 1326, 457 | "confidence": 0.99996, 458 | "speaker": "A" 459 | }, 460 | { 461 | "text": "as", 462 | "start": 1348, 463 | "end": 1822, 464 | "confidence": 0.96266, 465 | "speaker": "A" 466 | }, 467 | { 468 | "text": "it's", 469 | "start": 1956, 470 | "end": 2346, 471 | "confidence": 0.99978, 472 | "speaker": "A" 473 | }, 474 | { 475 | "text": "worth", 476 | "start": 2378, 477 | "end": 2766, 478 | "confidence": 0.99999, 479 | "speaker": "A" 480 | }, 481 | { 482 | "text": "celebrating", 483 | "start": 2868, 484 | "end": 3870, 485 | "confidence": 0.99245, 486 | "speaker": "A" 487 | }, 488 | { 489 | "text": "the", 490 | "start": 4290, 491 | "end": 4654, 492 | "confidence": 0.55, 493 | "speaker": "A" 494 | }, 495 | { 496 | "text": "first", 497 | "start": 4692, 498 | "end": 5134, 499 | "confidence": 1, 500 | "speaker": "A" 501 | }, 502 | { 503 | "text": "spacewalk", 504 | "start": 5252, 505 | "end": 6234, 506 | "confidence": 0.9899, 507 | "speaker": "A" 508 | }, 509 | { 510 | "text": "with", 511 | "start": 6362, 512 | "end": 6606, 513 | "confidence": 0.99998, 514 | "speaker": "A" 515 | }, 516 | { 517 | "text": "an", 518 | "start": 6628, 519 | "end": 6766, 520 | "confidence": 0.99914, 521 | "speaker": "A" 522 | }, 523 | { 524 | "text": "all", 525 | "start": 6788, 526 | "end": 6974, 527 | "confidence": 0.99989, 528 | "speaker": "A" 529 | }, 530 | { 531 | "text": "female", 532 | "start": 7012, 533 | "end": 7354, 534 | "confidence": 0.99989, 535 | "speaker": "A" 536 | }, 537 | { 538 | "text": "team,", 539 | "start": 7402, 540 | "end": 8000, 541 | "confidence": 1, 542 | "speaker": "A" 543 | }, 544 | { 545 | "text": "I", 546 | "start": 8370, 547 | "end": 8686, 548 | "confidence": 1, 549 | "speaker": "A" 550 | }, 551 | { 552 | "text": "think", 553 | "start": 8708, 554 | "end": 8894, 555 | "confidence": 1, 556 | "speaker": "A" 557 | }, 558 | { 559 | "text": "many", 560 | "start": 8932, 561 | "end": 9134, 562 | "confidence": 0.99999, 563 | "speaker": "A" 564 | }, 565 | { 566 | "text": "of", 567 | "start": 9172, 568 | "end": 9326, 569 | "confidence": 1, 570 | "speaker": "A" 571 | }, 572 | { 573 | "text": "us", 574 | "start": 9348, 575 | "end": 9774, 576 | "confidence": 1, 577 | "speaker": "A" 578 | }, 579 | { 580 | "text": "are", 581 | "start": 9892, 582 | "end": 10222, 583 | "confidence": 0.57763, 584 | "speaker": "A" 585 | }, 586 | { 587 | "text": "looking", 588 | "start": 10276, 589 | "end": 10494, 590 | "confidence": 1, 591 | "speaker": "A" 592 | }, 593 | { 594 | "text": "forward", 595 | "start": 10532, 596 | "end": 10734, 597 | "confidence": 0.99999, 598 | "speaker": "A" 599 | }, 600 | { 601 | "text": "to", 602 | "start": 10772, 603 | "end": 10974, 604 | "confidence": 0.96, 605 | "speaker": "A" 606 | }, 607 | { 608 | "text": "it", 609 | "start": 11012, 610 | "end": 11166, 611 | "confidence": 0.99995, 612 | "speaker": "A" 613 | }, 614 | { 615 | "text": "just", 616 | "start": 11188, 617 | "end": 11374, 618 | "confidence": 0.99996, 619 | "speaker": "A" 620 | }, 621 | { 622 | "text": "being", 623 | "start": 11412, 624 | "end": 11806, 625 | "confidence": 0.99998, 626 | "speaker": "A" 627 | }, 628 | { 629 | "text": "normal.", 630 | "start": 11908, 631 | "end": 12666, 632 | "confidence": 0.99999, 633 | "speaker": "A" 634 | }, 635 | { 636 | "text": "And", 637 | "start": 12778, 638 | "end": 13534, 639 | "confidence": 1, 640 | "speaker": "A" 641 | }, 642 | { 643 | "text": "I", 644 | "start": 13732, 645 | "end": 14046, 646 | "confidence": 0.65, 647 | "speaker": "A" 648 | }, 649 | { 650 | "text": "think", 651 | "start": 14068, 652 | "end": 14254, 653 | "confidence": 0.99988, 654 | "speaker": "A" 655 | }, 656 | { 657 | "text": "if", 658 | "start": 14292, 659 | "end": 14446, 660 | "confidence": 0.99998, 661 | "speaker": "A" 662 | }, 663 | { 664 | "text": "it", 665 | "start": 14468, 666 | "end": 14606, 667 | "confidence": 0.99999, 668 | "speaker": "A" 669 | }, 670 | { 671 | "text": "signifies", 672 | "start": 14628, 673 | "end": 15114, 674 | "confidence": 0.99175, 675 | "speaker": "A" 676 | }, 677 | { 678 | "text": "anything,", 679 | "start": 15162, 680 | "end": 15614, 681 | "confidence": 0.99997, 682 | "speaker": "A" 683 | }, 684 | { 685 | "text": "it", 686 | "start": 15732, 687 | "end": 16014, 688 | "confidence": 0.5311, 689 | "speaker": "A" 690 | }, 691 | { 692 | "text": "is", 693 | "start": 16052, 694 | "end": 16590, 695 | "confidence": 0.97364, 696 | "speaker": "A" 697 | }, 698 | { 699 | "text": "to", 700 | "start": 16740, 701 | "end": 17054, 702 | "confidence": 0.76, 703 | "speaker": "A" 704 | }, 705 | { 706 | "text": "honor", 707 | "start": 17092, 708 | "end": 17530, 709 | "confidence": 0.99997, 710 | "speaker": "A" 711 | }, 712 | { 713 | "text": "the", 714 | "start": 17610, 715 | "end": 17806, 716 | "confidence": 1, 717 | "speaker": "A" 718 | }, 719 | { 720 | "text": "women", 721 | "start": 17828, 722 | "end": 18014, 723 | "confidence": 0.99908, 724 | "speaker": "A" 725 | }, 726 | { 727 | "text": "who", 728 | "start": 18052, 729 | "end": 18206, 730 | "confidence": 0.99999, 731 | "speaker": "A" 732 | }, 733 | { 734 | "text": "came", 735 | "start": 18228, 736 | "end": 18414, 737 | "confidence": 1, 738 | "speaker": "A" 739 | }, 740 | { 741 | "text": "before", 742 | "start": 18452, 743 | "end": 18702, 744 | "confidence": 0.99999, 745 | "speaker": "A" 746 | }, 747 | { 748 | "text": "us,", 749 | "start": 18756, 750 | "end": 19310, 751 | "confidence": 0.65, 752 | "speaker": "A" 753 | }, 754 | { 755 | "text": "who", 756 | "start": 19460, 757 | "end": 20014, 758 | "confidence": 0.99997, 759 | "speaker": "A" 760 | }, 761 | { 762 | "text": "were", 763 | "start": 20132, 764 | "end": 20414, 765 | "confidence": 0.99969, 766 | "speaker": "A" 767 | }, 768 | { 769 | "text": "skilled", 770 | "start": 20452, 771 | "end": 20906, 772 | "confidence": 0.60398, 773 | "speaker": "A" 774 | }, 775 | { 776 | "text": "and", 777 | "start": 20938, 778 | "end": 21134, 779 | "confidence": 1, 780 | "speaker": "A" 781 | }, 782 | { 783 | "text": "qualified", 784 | "start": 21172, 785 | "end": 22026, 786 | "confidence": 0.99998, 787 | "speaker": "A" 788 | }, 789 | { 790 | "text": "and", 791 | "start": 22218, 792 | "end": 22622, 793 | "confidence": 0.95, 794 | "speaker": "A" 795 | }, 796 | { 797 | "text": "didn't", 798 | "start": 22676, 799 | "end": 22906, 800 | "confidence": 0.9999, 801 | "speaker": "A" 802 | }, 803 | { 804 | "text": "get", 805 | "start": 22938, 806 | "end": 23278, 807 | "confidence": 0.99997, 808 | "speaker": "A" 809 | }, 810 | { 811 | "text": "the", 812 | "start": 23364, 813 | "end": 23614, 814 | "confidence": 1, 815 | "speaker": "A" 816 | }, 817 | { 818 | "text": "same", 819 | "start": 23652, 820 | "end": 23902, 821 | "confidence": 0.99999, 822 | "speaker": "A" 823 | }, 824 | { 825 | "text": "opportunities", 826 | "start": 23956, 827 | "end": 24442, 828 | "confidence": 0.98808, 829 | "speaker": "A" 830 | }, 831 | { 832 | "text": "that", 833 | "start": 24506, 834 | "end": 24638, 835 | "confidence": 0.99985, 836 | "speaker": "A" 837 | }, 838 | { 839 | "text": "we", 840 | "start": 24644, 841 | "end": 24766, 842 | "confidence": 0.84796, 843 | "speaker": "A" 844 | }, 845 | { 846 | "text": "have", 847 | "start": 24788, 848 | "end": 24998, 849 | "confidence": 0.9963, 850 | "speaker": "A" 851 | }, 852 | { 853 | "text": "today.", 854 | "start": 25044, 855 | "end": 25140, 856 | "confidence": 0.99784, 857 | "speaker": "A" 858 | } 859 | ] 860 | } 861 | ], 862 | "confidence": 0.9493999999999997, 863 | "audio_duration": 25, 864 | "punctuate": true, 865 | "format_text": true, 866 | "dual_channel": false, 867 | "webhook_url": "https://assemblyai-web-production.uc.r.appspot.com/transcript/push", 868 | "webhook_status_code": 204, 869 | "webhook_auth": false, 870 | "webhook_auth_header_name": null, 871 | "speed_boost": false, 872 | "auto_highlights_result": null, 873 | "auto_highlights": false, 874 | "audio_start_from": null, 875 | "audio_end_at": null, 876 | "word_boost": [], 877 | "boost_param": null, 878 | "filter_profanity": false, 879 | "redact_pii": false, 880 | "redact_pii_audio": false, 881 | "redact_pii_audio_quality": null, 882 | "redact_pii_policies": null, 883 | "redact_pii_sub": null, 884 | "speaker_labels": true, 885 | "content_safety": false, 886 | "iab_categories": false, 887 | "content_safety_labels": { 888 | "status": "unavailable", 889 | "results": [], 890 | "summary": {} 891 | }, 892 | "iab_categories_result": { 893 | "status": "unavailable", 894 | "results": [], 895 | "summary": {} 896 | }, 897 | "language_detection": true, 898 | "custom_spelling": null, 899 | "throttled": null, 900 | "auto_chapters": false, 901 | "summarization": false, 902 | "summary_type": null, 903 | "summary_model": null, 904 | "custom_topics": false, 905 | "topics": [], 906 | "speech_threshold": null, 907 | "disfluencies": false, 908 | "sentiment_analysis": false, 909 | "chapters": null, 910 | "sentiment_analysis_results": null, 911 | "entity_detection": false, 912 | "entities": null, 913 | "summary": null, 914 | "speakers_expected": null 915 | } 916 | -------------------------------------------------------------------------------- /test/dg_transcription.json: -------------------------------------------------------------------------------- 1 | { 2 | "metadata": { 3 | "transaction_key": "deprecated", 4 | "request_id": "686278aa-d315-4aeb-b2a9-713615544366", 5 | "sha256": "154e291ecfa8be6ab8343560bcc109008fa7853eb5372533e8efdefc9b504c33", 6 | "created": "2023-10-27T15:35:56.637Z", 7 | "duration": 25.933313, 8 | "channels": 1, 9 | "models": ["626940ac-4b66-436c-a319-5015e4018fae"], 10 | "model_info": { 11 | "626940ac-4b66-436c-a319-5015e4018fae": { 12 | "name": "2-ea-nova", 13 | "version": "2023-09-13.17827", 14 | "arch": "nova-2" 15 | } 16 | } 17 | }, 18 | "results": { 19 | "channels": [ 20 | { 21 | "alternatives": [ 22 | { 23 | "transcript": "Yeah. As as much as, it's worth celebrating, the first, spacewalk, with an all female team, I think many of us are looking forward to it just being normal and I think if it signifies anything, It is to honor the the women who came before us who, were skilled and qualified, and didn't get the same opportunities that we have today.", 24 | "confidence": 0.9972096, 25 | "words": [ 26 | { 27 | "word": "yeah", 28 | "start": 0.08, 29 | "end": 0.32, 30 | "confidence": 0.9980539, 31 | "punctuated_word": "Yeah." 32 | }, 33 | { 34 | "word": "as", 35 | "start": 0.32, 36 | "end": 0.82, 37 | "confidence": 0.99285626, 38 | "punctuated_word": "As" 39 | }, 40 | { 41 | "word": "as", 42 | "start": 0.88, 43 | "end": 1.04, 44 | "confidence": 0.95991635, 45 | "punctuated_word": "as" 46 | }, 47 | { 48 | "word": "much", 49 | "start": 1.04, 50 | "end": 1.28, 51 | "confidence": 0.9996654, 52 | "punctuated_word": "much" 53 | }, 54 | { 55 | "word": "as", 56 | "start": 1.28, 57 | "end": 1.5999999, 58 | "confidence": 0.98531306, 59 | "punctuated_word": "as," 60 | }, 61 | { 62 | "word": "it's", 63 | "start": 2, 64 | "end": 2.24, 65 | "confidence": 0.9999382, 66 | "punctuated_word": "it's" 67 | }, 68 | { 69 | "word": "worth", 70 | "start": 2.24, 71 | "end": 2.72, 72 | "confidence": 0.99997175, 73 | "punctuated_word": "worth" 74 | }, 75 | { 76 | "word": "celebrating", 77 | "start": 2.72, 78 | "end": 3.22, 79 | "confidence": 0.8966907, 80 | "punctuated_word": "celebrating," 81 | }, 82 | { 83 | "word": "the", 84 | "start": 4.4, 85 | "end": 4.64, 86 | "confidence": 0.99508864, 87 | "punctuated_word": "the" 88 | }, 89 | { 90 | "word": "first", 91 | "start": 4.64, 92 | "end": 4.96, 93 | "confidence": 0.578624, 94 | "punctuated_word": "first," 95 | }, 96 | { 97 | "word": "spacewalk", 98 | "start": 5.2799997, 99 | "end": 5.7799997, 100 | "confidence": 0.9014553, 101 | "punctuated_word": "spacewalk," 102 | }, 103 | { 104 | "word": "with", 105 | "start": 6.3199997, 106 | "end": 6.56, 107 | "confidence": 0.999765, 108 | "punctuated_word": "with" 109 | }, 110 | { 111 | "word": "an", 112 | "start": 6.56, 113 | "end": 6.72, 114 | "confidence": 0.9989334, 115 | "punctuated_word": "an" 116 | }, 117 | { 118 | "word": "all", 119 | "start": 6.72, 120 | "end": 6.96, 121 | "confidence": 0.9977842, 122 | "punctuated_word": "all" 123 | }, 124 | { 125 | "word": "female", 126 | "start": 6.96, 127 | "end": 7.3599997, 128 | "confidence": 0.99980587, 129 | "punctuated_word": "female" 130 | }, 131 | { 132 | "word": "team", 133 | "start": 7.3599997, 134 | "end": 7.8599997, 135 | "confidence": 0.8970409, 136 | "punctuated_word": "team," 137 | }, 138 | { 139 | "word": "i", 140 | "start": 8.475, 141 | "end": 8.555, 142 | "confidence": 0.6100898, 143 | "punctuated_word": "I" 144 | }, 145 | { 146 | "word": "think", 147 | "start": 8.555, 148 | "end": 8.875, 149 | "confidence": 0.9999169, 150 | "punctuated_word": "think" 151 | }, 152 | { 153 | "word": "many", 154 | "start": 8.875, 155 | "end": 9.115001, 156 | "confidence": 0.9989447, 157 | "punctuated_word": "many" 158 | }, 159 | { 160 | "word": "of", 161 | "start": 9.115001, 162 | "end": 9.3550005, 163 | "confidence": 0.99934477, 164 | "punctuated_word": "of" 165 | }, 166 | { 167 | "word": "us", 168 | "start": 9.3550005, 169 | "end": 9.8550005, 170 | "confidence": 0.9985586, 171 | "punctuated_word": "us" 172 | }, 173 | { 174 | "word": "are", 175 | "start": 9.995001, 176 | "end": 10.235001, 177 | "confidence": 0.999551, 178 | "punctuated_word": "are" 179 | }, 180 | { 181 | "word": "looking", 182 | "start": 10.235001, 183 | "end": 10.475, 184 | "confidence": 0.9996606, 185 | "punctuated_word": "looking" 186 | }, 187 | { 188 | "word": "forward", 189 | "start": 10.475, 190 | "end": 10.715, 191 | "confidence": 0.9997713, 192 | "punctuated_word": "forward" 193 | }, 194 | { 195 | "word": "to", 196 | "start": 10.715, 197 | "end": 10.955, 198 | "confidence": 0.99927133, 199 | "punctuated_word": "to" 200 | }, 201 | { 202 | "word": "it", 203 | "start": 10.955, 204 | "end": 11.115001, 205 | "confidence": 0.997619, 206 | "punctuated_word": "it" 207 | }, 208 | { 209 | "word": "just", 210 | "start": 11.115001, 211 | "end": 11.3550005, 212 | "confidence": 0.9978631, 213 | "punctuated_word": "just" 214 | }, 215 | { 216 | "word": "being", 217 | "start": 11.3550005, 218 | "end": 11.8550005, 219 | "confidence": 0.9994981, 220 | "punctuated_word": "being" 221 | }, 222 | { 223 | "word": "normal", 224 | "start": 11.995001, 225 | "end": 12.495001, 226 | "confidence": 0.9978975, 227 | "punctuated_word": "normal" 228 | }, 229 | { 230 | "word": "and", 231 | "start": 12.715, 232 | "end": 13.215, 233 | "confidence": 0.54509896, 234 | "punctuated_word": "and" 235 | }, 236 | { 237 | "word": "i", 238 | "start": 13.835001, 239 | "end": 13.995001, 240 | "confidence": 0.6895846, 241 | "punctuated_word": "I" 242 | }, 243 | { 244 | "word": "think", 245 | "start": 13.995001, 246 | "end": 14.235001, 247 | "confidence": 0.99965954, 248 | "punctuated_word": "think" 249 | }, 250 | { 251 | "word": "if", 252 | "start": 14.235001, 253 | "end": 14.395, 254 | "confidence": 0.9954182, 255 | "punctuated_word": "if" 256 | }, 257 | { 258 | "word": "it", 259 | "start": 14.395, 260 | "end": 14.555, 261 | "confidence": 0.9864966, 262 | "punctuated_word": "it" 263 | }, 264 | { 265 | "word": "signifies", 266 | "start": 14.555, 267 | "end": 15.055, 268 | "confidence": 0.9997447, 269 | "punctuated_word": "signifies" 270 | }, 271 | { 272 | "word": "anything", 273 | "start": 15.115, 274 | "end": 15.615, 275 | "confidence": 0.89247376, 276 | "punctuated_word": "anything," 277 | }, 278 | { 279 | "word": "it", 280 | "start": 15.82, 281 | "end": 15.98, 282 | "confidence": 0.49997056, 283 | "punctuated_word": "It" 284 | }, 285 | { 286 | "word": "is", 287 | "start": 15.98, 288 | "end": 16.48, 289 | "confidence": 0.9991165, 290 | "punctuated_word": "is" 291 | }, 292 | { 293 | "word": "to", 294 | "start": 16.779999, 295 | "end": 17.02, 296 | "confidence": 0.51602215, 297 | "punctuated_word": "to" 298 | }, 299 | { 300 | "word": "honor", 301 | "start": 17.02, 302 | "end": 17.34, 303 | "confidence": 0.99897003, 304 | "punctuated_word": "honor" 305 | }, 306 | { 307 | "word": "the", 308 | "start": 17.34, 309 | "end": 17.66, 310 | "confidence": 0.9979972, 311 | "punctuated_word": "the" 312 | }, 313 | { 314 | "word": "the", 315 | "start": 17.66, 316 | "end": 17.74, 317 | "confidence": 0.7534162, 318 | "punctuated_word": "the" 319 | }, 320 | { 321 | "word": "women", 322 | "start": 17.74, 323 | "end": 18.06, 324 | "confidence": 0.9617373, 325 | "punctuated_word": "women" 326 | }, 327 | { 328 | "word": "who", 329 | "start": 18.06, 330 | "end": 18.22, 331 | "confidence": 0.9968215, 332 | "punctuated_word": "who" 333 | }, 334 | { 335 | "word": "came", 336 | "start": 18.22, 337 | "end": 18.38, 338 | "confidence": 0.9990761, 339 | "punctuated_word": "came" 340 | }, 341 | { 342 | "word": "before", 343 | "start": 18.38, 344 | "end": 18.7, 345 | "confidence": 0.99931896, 346 | "punctuated_word": "before" 347 | }, 348 | { 349 | "word": "us", 350 | "start": 18.7, 351 | "end": 19.2, 352 | "confidence": 0.9972096, 353 | "punctuated_word": "us" 354 | }, 355 | { 356 | "word": "who", 357 | "start": 19.5, 358 | "end": 19.82, 359 | "confidence": 0.7276913, 360 | "punctuated_word": "who," 361 | }, 362 | { 363 | "word": "were", 364 | "start": 20.14, 365 | "end": 20.38, 366 | "confidence": 0.9741669, 367 | "punctuated_word": "were" 368 | }, 369 | { 370 | "word": "skilled", 371 | "start": 20.38, 372 | "end": 20.86, 373 | "confidence": 0.9973028, 374 | "punctuated_word": "skilled" 375 | }, 376 | { 377 | "word": "and", 378 | "start": 20.86, 379 | "end": 21.18, 380 | "confidence": 0.99707186, 381 | "punctuated_word": "and" 382 | }, 383 | { 384 | "word": "qualified", 385 | "start": 21.18, 386 | "end": 21.68, 387 | "confidence": 0.837723, 388 | "punctuated_word": "qualified," 389 | }, 390 | { 391 | "word": "and", 392 | "start": 22.3, 393 | "end": 22.619999, 394 | "confidence": 0.9976847, 395 | "punctuated_word": "and" 396 | }, 397 | { 398 | "word": "didn't", 399 | "start": 22.619999, 400 | "end": 22.86, 401 | "confidence": 0.99618447, 402 | "punctuated_word": "didn't" 403 | }, 404 | { 405 | "word": "get", 406 | "start": 22.86, 407 | "end": 23.26, 408 | "confidence": 0.9957491, 409 | "punctuated_word": "get" 410 | }, 411 | { 412 | "word": "the", 413 | "start": 23.26, 414 | "end": 23.58, 415 | "confidence": 0.7927007, 416 | "punctuated_word": "the" 417 | }, 418 | { 419 | "word": "same", 420 | "start": 23.58, 421 | "end": 23.9, 422 | "confidence": 0.7259752, 423 | "punctuated_word": "same" 424 | }, 425 | { 426 | "word": "opportunities", 427 | "start": 23.9, 428 | "end": 24.4, 429 | "confidence": 0.9914248, 430 | "punctuated_word": "opportunities" 431 | }, 432 | { 433 | "word": "that", 434 | "start": 24.46, 435 | "end": 24.619999, 436 | "confidence": 0.97425294, 437 | "punctuated_word": "that" 438 | }, 439 | { 440 | "word": "we", 441 | "start": 24.619999, 442 | "end": 24.779999, 443 | "confidence": 0.98887795, 444 | "punctuated_word": "we" 445 | }, 446 | { 447 | "word": "have", 448 | "start": 24.779999, 449 | "end": 24.939999, 450 | "confidence": 0.9769957, 451 | "punctuated_word": "have" 452 | }, 453 | { 454 | "word": "today", 455 | "start": 24.939999, 456 | "end": 25.439999, 457 | "confidence": 0.725032, 458 | "punctuated_word": "today." 459 | } 460 | ] 461 | } 462 | ] 463 | } 464 | ] 465 | } 466 | } 467 | -------------------------------------------------------------------------------- /test/dg_utterances.json: -------------------------------------------------------------------------------- 1 | { 2 | "metadata": { 3 | "transaction_key": "deprecated", 4 | "request_id": "686278aa-d315-4aeb-b2a9-713615544366", 5 | "sha256": "154e291ecfa8be6ab8343560bcc109008fa7853eb5372533e8efdefc9b504c33", 6 | "created": "2023-10-27T15:35:56.637Z", 7 | "duration": 25.933313, 8 | "channels": 1, 9 | "models": ["626940ac-4b66-436c-a319-5015e4018fae"], 10 | "model_info": { 11 | "626940ac-4b66-436c-a319-5015e4018fae": { 12 | "name": "2-ea-nova", 13 | "version": "2023-09-13.17827", 14 | "arch": "nova-2" 15 | } 16 | } 17 | }, 18 | "results": { 19 | "channels": [ 20 | { 21 | "alternatives": [ 22 | { 23 | "transcript": "Yeah. As as much as, it's worth celebrating, the first, spacewalk, with an all female team, I think many of us are looking forward to it just being normal and I think if it signifies anything, It is to honor the the women who came before us who, were skilled and qualified, and didn't get the same opportunities that we have today.", 24 | "confidence": 0.9972096, 25 | "words": [ 26 | { 27 | "word": "yeah", 28 | "start": 0.08, 29 | "end": 0.32, 30 | "confidence": 0.9980539, 31 | "punctuated_word": "Yeah." 32 | }, 33 | { 34 | "word": "as", 35 | "start": 0.32, 36 | "end": 0.82, 37 | "confidence": 0.99285626, 38 | "punctuated_word": "As" 39 | }, 40 | { 41 | "word": "as", 42 | "start": 0.88, 43 | "end": 1.04, 44 | "confidence": 0.95991635, 45 | "punctuated_word": "as" 46 | }, 47 | { 48 | "word": "much", 49 | "start": 1.04, 50 | "end": 1.28, 51 | "confidence": 0.9996654, 52 | "punctuated_word": "much" 53 | }, 54 | { 55 | "word": "as", 56 | "start": 1.28, 57 | "end": 1.5999999, 58 | "confidence": 0.98531306, 59 | "punctuated_word": "as," 60 | }, 61 | { 62 | "word": "it's", 63 | "start": 2, 64 | "end": 2.24, 65 | "confidence": 0.9999382, 66 | "punctuated_word": "it's" 67 | }, 68 | { 69 | "word": "worth", 70 | "start": 2.24, 71 | "end": 2.72, 72 | "confidence": 0.99997175, 73 | "punctuated_word": "worth" 74 | }, 75 | { 76 | "word": "celebrating", 77 | "start": 2.72, 78 | "end": 3.22, 79 | "confidence": 0.8966907, 80 | "punctuated_word": "celebrating," 81 | }, 82 | { 83 | "word": "the", 84 | "start": 4.4, 85 | "end": 4.64, 86 | "confidence": 0.99508864, 87 | "punctuated_word": "the" 88 | }, 89 | { 90 | "word": "first", 91 | "start": 4.64, 92 | "end": 4.96, 93 | "confidence": 0.578624, 94 | "punctuated_word": "first," 95 | }, 96 | { 97 | "word": "spacewalk", 98 | "start": 5.2799997, 99 | "end": 5.7799997, 100 | "confidence": 0.9014553, 101 | "punctuated_word": "spacewalk," 102 | }, 103 | { 104 | "word": "with", 105 | "start": 6.3199997, 106 | "end": 6.56, 107 | "confidence": 0.999765, 108 | "punctuated_word": "with" 109 | }, 110 | { 111 | "word": "an", 112 | "start": 6.56, 113 | "end": 6.72, 114 | "confidence": 0.9989334, 115 | "punctuated_word": "an" 116 | }, 117 | { 118 | "word": "all", 119 | "start": 6.72, 120 | "end": 6.96, 121 | "confidence": 0.9977842, 122 | "punctuated_word": "all" 123 | }, 124 | { 125 | "word": "female", 126 | "start": 6.96, 127 | "end": 7.3599997, 128 | "confidence": 0.99980587, 129 | "punctuated_word": "female" 130 | }, 131 | { 132 | "word": "team", 133 | "start": 7.3599997, 134 | "end": 7.8599997, 135 | "confidence": 0.8970409, 136 | "punctuated_word": "team," 137 | }, 138 | { 139 | "word": "i", 140 | "start": 8.475, 141 | "end": 8.555, 142 | "confidence": 0.6100898, 143 | "punctuated_word": "I" 144 | }, 145 | { 146 | "word": "think", 147 | "start": 8.555, 148 | "end": 8.875, 149 | "confidence": 0.9999169, 150 | "punctuated_word": "think" 151 | }, 152 | { 153 | "word": "many", 154 | "start": 8.875, 155 | "end": 9.115001, 156 | "confidence": 0.9989447, 157 | "punctuated_word": "many" 158 | }, 159 | { 160 | "word": "of", 161 | "start": 9.115001, 162 | "end": 9.3550005, 163 | "confidence": 0.99934477, 164 | "punctuated_word": "of" 165 | }, 166 | { 167 | "word": "us", 168 | "start": 9.3550005, 169 | "end": 9.8550005, 170 | "confidence": 0.9985586, 171 | "punctuated_word": "us" 172 | }, 173 | { 174 | "word": "are", 175 | "start": 9.995001, 176 | "end": 10.235001, 177 | "confidence": 0.999551, 178 | "punctuated_word": "are" 179 | }, 180 | { 181 | "word": "looking", 182 | "start": 10.235001, 183 | "end": 10.475, 184 | "confidence": 0.9996606, 185 | "punctuated_word": "looking" 186 | }, 187 | { 188 | "word": "forward", 189 | "start": 10.475, 190 | "end": 10.715, 191 | "confidence": 0.9997713, 192 | "punctuated_word": "forward" 193 | }, 194 | { 195 | "word": "to", 196 | "start": 10.715, 197 | "end": 10.955, 198 | "confidence": 0.99927133, 199 | "punctuated_word": "to" 200 | }, 201 | { 202 | "word": "it", 203 | "start": 10.955, 204 | "end": 11.115001, 205 | "confidence": 0.997619, 206 | "punctuated_word": "it" 207 | }, 208 | { 209 | "word": "just", 210 | "start": 11.115001, 211 | "end": 11.3550005, 212 | "confidence": 0.9978631, 213 | "punctuated_word": "just" 214 | }, 215 | { 216 | "word": "being", 217 | "start": 11.3550005, 218 | "end": 11.8550005, 219 | "confidence": 0.9994981, 220 | "punctuated_word": "being" 221 | }, 222 | { 223 | "word": "normal", 224 | "start": 11.995001, 225 | "end": 12.495001, 226 | "confidence": 0.9978975, 227 | "punctuated_word": "normal" 228 | }, 229 | { 230 | "word": "and", 231 | "start": 12.715, 232 | "end": 13.215, 233 | "confidence": 0.54509896, 234 | "punctuated_word": "and" 235 | }, 236 | { 237 | "word": "i", 238 | "start": 13.835001, 239 | "end": 13.995001, 240 | "confidence": 0.6895846, 241 | "punctuated_word": "I" 242 | }, 243 | { 244 | "word": "think", 245 | "start": 13.995001, 246 | "end": 14.235001, 247 | "confidence": 0.99965954, 248 | "punctuated_word": "think" 249 | }, 250 | { 251 | "word": "if", 252 | "start": 14.235001, 253 | "end": 14.395, 254 | "confidence": 0.9954182, 255 | "punctuated_word": "if" 256 | }, 257 | { 258 | "word": "it", 259 | "start": 14.395, 260 | "end": 14.555, 261 | "confidence": 0.9864966, 262 | "punctuated_word": "it" 263 | }, 264 | { 265 | "word": "signifies", 266 | "start": 14.555, 267 | "end": 15.055, 268 | "confidence": 0.9997447, 269 | "punctuated_word": "signifies" 270 | }, 271 | { 272 | "word": "anything", 273 | "start": 15.115, 274 | "end": 15.615, 275 | "confidence": 0.89247376, 276 | "punctuated_word": "anything," 277 | }, 278 | { 279 | "word": "it", 280 | "start": 15.82, 281 | "end": 15.98, 282 | "confidence": 0.49997056, 283 | "punctuated_word": "It" 284 | }, 285 | { 286 | "word": "is", 287 | "start": 15.98, 288 | "end": 16.48, 289 | "confidence": 0.9991165, 290 | "punctuated_word": "is" 291 | }, 292 | { 293 | "word": "to", 294 | "start": 16.779999, 295 | "end": 17.02, 296 | "confidence": 0.51602215, 297 | "punctuated_word": "to" 298 | }, 299 | { 300 | "word": "honor", 301 | "start": 17.02, 302 | "end": 17.34, 303 | "confidence": 0.99897003, 304 | "punctuated_word": "honor" 305 | }, 306 | { 307 | "word": "the", 308 | "start": 17.34, 309 | "end": 17.66, 310 | "confidence": 0.9979972, 311 | "punctuated_word": "the" 312 | }, 313 | { 314 | "word": "the", 315 | "start": 17.66, 316 | "end": 17.74, 317 | "confidence": 0.7534162, 318 | "punctuated_word": "the" 319 | }, 320 | { 321 | "word": "women", 322 | "start": 17.74, 323 | "end": 18.06, 324 | "confidence": 0.9617373, 325 | "punctuated_word": "women" 326 | }, 327 | { 328 | "word": "who", 329 | "start": 18.06, 330 | "end": 18.22, 331 | "confidence": 0.9968215, 332 | "punctuated_word": "who" 333 | }, 334 | { 335 | "word": "came", 336 | "start": 18.22, 337 | "end": 18.38, 338 | "confidence": 0.9990761, 339 | "punctuated_word": "came" 340 | }, 341 | { 342 | "word": "before", 343 | "start": 18.38, 344 | "end": 18.7, 345 | "confidence": 0.99931896, 346 | "punctuated_word": "before" 347 | }, 348 | { 349 | "word": "us", 350 | "start": 18.7, 351 | "end": 19.2, 352 | "confidence": 0.9972096, 353 | "punctuated_word": "us" 354 | }, 355 | { 356 | "word": "who", 357 | "start": 19.5, 358 | "end": 19.82, 359 | "confidence": 0.7276913, 360 | "punctuated_word": "who," 361 | }, 362 | { 363 | "word": "were", 364 | "start": 20.14, 365 | "end": 20.38, 366 | "confidence": 0.9741669, 367 | "punctuated_word": "were" 368 | }, 369 | { 370 | "word": "skilled", 371 | "start": 20.38, 372 | "end": 20.86, 373 | "confidence": 0.9973028, 374 | "punctuated_word": "skilled" 375 | }, 376 | { 377 | "word": "and", 378 | "start": 20.86, 379 | "end": 21.18, 380 | "confidence": 0.99707186, 381 | "punctuated_word": "and" 382 | }, 383 | { 384 | "word": "qualified", 385 | "start": 21.18, 386 | "end": 21.68, 387 | "confidence": 0.837723, 388 | "punctuated_word": "qualified," 389 | }, 390 | { 391 | "word": "and", 392 | "start": 22.3, 393 | "end": 22.619999, 394 | "confidence": 0.9976847, 395 | "punctuated_word": "and" 396 | }, 397 | { 398 | "word": "didn't", 399 | "start": 22.619999, 400 | "end": 22.86, 401 | "confidence": 0.99618447, 402 | "punctuated_word": "didn't" 403 | }, 404 | { 405 | "word": "get", 406 | "start": 22.86, 407 | "end": 23.26, 408 | "confidence": 0.9957491, 409 | "punctuated_word": "get" 410 | }, 411 | { 412 | "word": "the", 413 | "start": 23.26, 414 | "end": 23.58, 415 | "confidence": 0.7927007, 416 | "punctuated_word": "the" 417 | }, 418 | { 419 | "word": "same", 420 | "start": 23.58, 421 | "end": 23.9, 422 | "confidence": 0.7259752, 423 | "punctuated_word": "same" 424 | }, 425 | { 426 | "word": "opportunities", 427 | "start": 23.9, 428 | "end": 24.4, 429 | "confidence": 0.9914248, 430 | "punctuated_word": "opportunities" 431 | }, 432 | { 433 | "word": "that", 434 | "start": 24.46, 435 | "end": 24.619999, 436 | "confidence": 0.97425294, 437 | "punctuated_word": "that" 438 | }, 439 | { 440 | "word": "we", 441 | "start": 24.619999, 442 | "end": 24.779999, 443 | "confidence": 0.98887795, 444 | "punctuated_word": "we" 445 | }, 446 | { 447 | "word": "have", 448 | "start": 24.779999, 449 | "end": 24.939999, 450 | "confidence": 0.9769957, 451 | "punctuated_word": "have" 452 | }, 453 | { 454 | "word": "today", 455 | "start": 24.939999, 456 | "end": 25.439999, 457 | "confidence": 0.725032, 458 | "punctuated_word": "today." 459 | } 460 | ], 461 | "paragraphs": { 462 | "transcript": "\nYeah. As as much as, it's worth celebrating, the first, spacewalk, with an all female team, I think many of us are looking forward to it just being normal and I think if it signifies anything, It is to honor the the women who came before us who, were skilled and qualified, and didn't get the same opportunities that we have today.", 463 | "paragraphs": [ 464 | { 465 | "sentences": [ 466 | { 467 | "text": "Yeah.", 468 | "start": 0.08, 469 | "end": 0.32 470 | }, 471 | { 472 | "text": "As as much as, it's worth celebrating, the first, spacewalk, with an all female team, I think many of us are looking forward to it just being normal and I think if it signifies anything, It is to honor the the women who came before us who, were skilled and qualified, and didn't get the same opportunities that we have today.", 473 | "start": 0.32, 474 | "end": 25.439999 475 | } 476 | ], 477 | "num_words": 62, 478 | "start": 0.08, 479 | "end": 25.439999 480 | } 481 | ] 482 | } 483 | } 484 | ] 485 | } 486 | ], 487 | "utterances": [ 488 | { 489 | "start": 0.08, 490 | "end": 3.22, 491 | "confidence": 0.9790507, 492 | "channel": 0, 493 | "transcript": "Yeah. As as much as, it's worth celebrating,", 494 | "words": [ 495 | { 496 | "word": "yeah", 497 | "start": 0.08, 498 | "end": 0.32, 499 | "confidence": 0.9980539, 500 | "punctuated_word": "Yeah." 501 | }, 502 | { 503 | "word": "as", 504 | "start": 0.32, 505 | "end": 0.82, 506 | "confidence": 0.99285626, 507 | "punctuated_word": "As" 508 | }, 509 | { 510 | "word": "as", 511 | "start": 0.88, 512 | "end": 1.04, 513 | "confidence": 0.95991635, 514 | "punctuated_word": "as" 515 | }, 516 | { 517 | "word": "much", 518 | "start": 1.04, 519 | "end": 1.28, 520 | "confidence": 0.9996654, 521 | "punctuated_word": "much" 522 | }, 523 | { 524 | "word": "as", 525 | "start": 1.28, 526 | "end": 1.5999999, 527 | "confidence": 0.98531306, 528 | "punctuated_word": "as," 529 | }, 530 | { 531 | "word": "it's", 532 | "start": 2, 533 | "end": 2.24, 534 | "confidence": 0.9999382, 535 | "punctuated_word": "it's" 536 | }, 537 | { 538 | "word": "worth", 539 | "start": 2.24, 540 | "end": 2.72, 541 | "confidence": 0.99997175, 542 | "punctuated_word": "worth" 543 | }, 544 | { 545 | "word": "celebrating", 546 | "start": 2.72, 547 | "end": 3.22, 548 | "confidence": 0.8966907, 549 | "punctuated_word": "celebrating," 550 | } 551 | ], 552 | "id": "d3bb4463-e3aa-4411-a664-85f3685e03f9" 553 | }, 554 | { 555 | "start": 4.4, 556 | "end": 5.7799997, 557 | "confidence": 0.8250559, 558 | "channel": 0, 559 | "transcript": "the first, spacewalk,", 560 | "words": [ 561 | { 562 | "word": "the", 563 | "start": 4.4, 564 | "end": 4.64, 565 | "confidence": 0.99508864, 566 | "punctuated_word": "the" 567 | }, 568 | { 569 | "word": "first", 570 | "start": 4.64, 571 | "end": 4.96, 572 | "confidence": 0.578624, 573 | "punctuated_word": "first," 574 | }, 575 | { 576 | "word": "spacewalk", 577 | "start": 5.2799997, 578 | "end": 5.7799997, 579 | "confidence": 0.9014553, 580 | "punctuated_word": "spacewalk," 581 | } 582 | ], 583 | "id": "3deaa54a-ff4e-492e-9fa0-f66c0014826e" 584 | }, 585 | { 586 | "start": 6.3199997, 587 | "end": 7.8599997, 588 | "confidence": 0.97866595, 589 | "channel": 0, 590 | "transcript": "with an all female team,", 591 | "words": [ 592 | { 593 | "word": "with", 594 | "start": 6.3199997, 595 | "end": 6.56, 596 | "confidence": 0.999765, 597 | "punctuated_word": "with" 598 | }, 599 | { 600 | "word": "an", 601 | "start": 6.56, 602 | "end": 6.72, 603 | "confidence": 0.9989334, 604 | "punctuated_word": "an" 605 | }, 606 | { 607 | "word": "all", 608 | "start": 6.72, 609 | "end": 6.96, 610 | "confidence": 0.9977842, 611 | "punctuated_word": "all" 612 | }, 613 | { 614 | "word": "female", 615 | "start": 6.96, 616 | "end": 7.3599997, 617 | "confidence": 0.99980587, 618 | "punctuated_word": "female" 619 | }, 620 | { 621 | "word": "team", 622 | "start": 7.3599997, 623 | "end": 7.8599997, 624 | "confidence": 0.8970409, 625 | "punctuated_word": "team," 626 | } 627 | ], 628 | "id": "d44277d3-651f-41b2-94f6-f524545d4f5d" 629 | }, 630 | { 631 | "start": 8.475, 632 | "end": 13.215, 633 | "confidence": 0.93879175, 634 | "channel": 0, 635 | "transcript": "I think many of us are looking forward to it just being normal and", 636 | "words": [ 637 | { 638 | "word": "i", 639 | "start": 8.475, 640 | "end": 8.555, 641 | "confidence": 0.6100898, 642 | "punctuated_word": "I" 643 | }, 644 | { 645 | "word": "think", 646 | "start": 8.555, 647 | "end": 8.875, 648 | "confidence": 0.9999169, 649 | "punctuated_word": "think" 650 | }, 651 | { 652 | "word": "many", 653 | "start": 8.875, 654 | "end": 9.115001, 655 | "confidence": 0.9989447, 656 | "punctuated_word": "many" 657 | }, 658 | { 659 | "word": "of", 660 | "start": 9.115001, 661 | "end": 9.3550005, 662 | "confidence": 0.99934477, 663 | "punctuated_word": "of" 664 | }, 665 | { 666 | "word": "us", 667 | "start": 9.3550005, 668 | "end": 9.8550005, 669 | "confidence": 0.9985586, 670 | "punctuated_word": "us" 671 | }, 672 | { 673 | "word": "are", 674 | "start": 9.995001, 675 | "end": 10.235001, 676 | "confidence": 0.999551, 677 | "punctuated_word": "are" 678 | }, 679 | { 680 | "word": "looking", 681 | "start": 10.235001, 682 | "end": 10.475, 683 | "confidence": 0.9996606, 684 | "punctuated_word": "looking" 685 | }, 686 | { 687 | "word": "forward", 688 | "start": 10.475, 689 | "end": 10.715, 690 | "confidence": 0.9997713, 691 | "punctuated_word": "forward" 692 | }, 693 | { 694 | "word": "to", 695 | "start": 10.715, 696 | "end": 10.955, 697 | "confidence": 0.99927133, 698 | "punctuated_word": "to" 699 | }, 700 | { 701 | "word": "it", 702 | "start": 10.955, 703 | "end": 11.115001, 704 | "confidence": 0.997619, 705 | "punctuated_word": "it" 706 | }, 707 | { 708 | "word": "just", 709 | "start": 11.115001, 710 | "end": 11.3550005, 711 | "confidence": 0.9978631, 712 | "punctuated_word": "just" 713 | }, 714 | { 715 | "word": "being", 716 | "start": 11.3550005, 717 | "end": 11.8550005, 718 | "confidence": 0.9994981, 719 | "punctuated_word": "being" 720 | }, 721 | { 722 | "word": "normal", 723 | "start": 11.995001, 724 | "end": 12.495001, 725 | "confidence": 0.9978975, 726 | "punctuated_word": "normal" 727 | }, 728 | { 729 | "word": "and", 730 | "start": 12.715, 731 | "end": 13.215, 732 | "confidence": 0.54509896, 733 | "punctuated_word": "and" 734 | } 735 | ], 736 | "id": "c15c6b07-9eea-4cda-b0ca-e53c0b2f91f7" 737 | }, 738 | { 739 | "start": 13.835001, 740 | "end": 21.68, 741 | "confidence": 0.9007723, 742 | "channel": 0, 743 | "transcript": "I think if it signifies anything, It is to honor the the women who came before us who, were skilled and qualified,", 744 | "words": [ 745 | { 746 | "word": "i", 747 | "start": 13.835001, 748 | "end": 13.995001, 749 | "confidence": 0.6895846, 750 | "punctuated_word": "I" 751 | }, 752 | { 753 | "word": "think", 754 | "start": 13.995001, 755 | "end": 14.235001, 756 | "confidence": 0.99965954, 757 | "punctuated_word": "think" 758 | }, 759 | { 760 | "word": "if", 761 | "start": 14.235001, 762 | "end": 14.395, 763 | "confidence": 0.9954182, 764 | "punctuated_word": "if" 765 | }, 766 | { 767 | "word": "it", 768 | "start": 14.395, 769 | "end": 14.555, 770 | "confidence": 0.9864966, 771 | "punctuated_word": "it" 772 | }, 773 | { 774 | "word": "signifies", 775 | "start": 14.555, 776 | "end": 15.055, 777 | "confidence": 0.9997447, 778 | "punctuated_word": "signifies" 779 | }, 780 | { 781 | "word": "anything", 782 | "start": 15.115, 783 | "end": 15.615, 784 | "confidence": 0.89247376, 785 | "punctuated_word": "anything," 786 | }, 787 | { 788 | "word": "it", 789 | "start": 15.82, 790 | "end": 15.98, 791 | "confidence": 0.49997056, 792 | "punctuated_word": "It" 793 | }, 794 | { 795 | "word": "is", 796 | "start": 15.98, 797 | "end": 16.48, 798 | "confidence": 0.9991165, 799 | "punctuated_word": "is" 800 | }, 801 | { 802 | "word": "to", 803 | "start": 16.779999, 804 | "end": 17.02, 805 | "confidence": 0.51602215, 806 | "punctuated_word": "to" 807 | }, 808 | { 809 | "word": "honor", 810 | "start": 17.02, 811 | "end": 17.34, 812 | "confidence": 0.99897003, 813 | "punctuated_word": "honor" 814 | }, 815 | { 816 | "word": "the", 817 | "start": 17.34, 818 | "end": 17.66, 819 | "confidence": 0.9979972, 820 | "punctuated_word": "the" 821 | }, 822 | { 823 | "word": "the", 824 | "start": 17.66, 825 | "end": 17.74, 826 | "confidence": 0.7534162, 827 | "punctuated_word": "the" 828 | }, 829 | { 830 | "word": "women", 831 | "start": 17.74, 832 | "end": 18.06, 833 | "confidence": 0.9617373, 834 | "punctuated_word": "women" 835 | }, 836 | { 837 | "word": "who", 838 | "start": 18.06, 839 | "end": 18.22, 840 | "confidence": 0.9968215, 841 | "punctuated_word": "who" 842 | }, 843 | { 844 | "word": "came", 845 | "start": 18.22, 846 | "end": 18.38, 847 | "confidence": 0.9990761, 848 | "punctuated_word": "came" 849 | }, 850 | { 851 | "word": "before", 852 | "start": 18.38, 853 | "end": 18.7, 854 | "confidence": 0.99931896, 855 | "punctuated_word": "before" 856 | }, 857 | { 858 | "word": "us", 859 | "start": 18.7, 860 | "end": 19.2, 861 | "confidence": 0.9972096, 862 | "punctuated_word": "us" 863 | }, 864 | { 865 | "word": "who", 866 | "start": 19.5, 867 | "end": 19.82, 868 | "confidence": 0.7276913, 869 | "punctuated_word": "who," 870 | }, 871 | { 872 | "word": "were", 873 | "start": 20.14, 874 | "end": 20.38, 875 | "confidence": 0.9741669, 876 | "punctuated_word": "were" 877 | }, 878 | { 879 | "word": "skilled", 880 | "start": 20.38, 881 | "end": 20.86, 882 | "confidence": 0.9973028, 883 | "punctuated_word": "skilled" 884 | }, 885 | { 886 | "word": "and", 887 | "start": 20.86, 888 | "end": 21.18, 889 | "confidence": 0.99707186, 890 | "punctuated_word": "and" 891 | }, 892 | { 893 | "word": "qualified", 894 | "start": 21.18, 895 | "end": 21.68, 896 | "confidence": 0.837723, 897 | "punctuated_word": "qualified," 898 | } 899 | ], 900 | "id": "d057c1de-9d37-4c68-a757-5ca083451c1d" 901 | }, 902 | { 903 | "start": 22.3, 904 | "end": 25.439999, 905 | "confidence": 0.9164877, 906 | "channel": 0, 907 | "transcript": "and didn't get the same opportunities that we have today.", 908 | "words": [ 909 | { 910 | "word": "and", 911 | "start": 22.3, 912 | "end": 22.619999, 913 | "confidence": 0.9976847, 914 | "punctuated_word": "and" 915 | }, 916 | { 917 | "word": "didn't", 918 | "start": 22.619999, 919 | "end": 22.86, 920 | "confidence": 0.99618447, 921 | "punctuated_word": "didn't" 922 | }, 923 | { 924 | "word": "get", 925 | "start": 22.86, 926 | "end": 23.26, 927 | "confidence": 0.9957491, 928 | "punctuated_word": "get" 929 | }, 930 | { 931 | "word": "the", 932 | "start": 23.26, 933 | "end": 23.58, 934 | "confidence": 0.7927007, 935 | "punctuated_word": "the" 936 | }, 937 | { 938 | "word": "same", 939 | "start": 23.58, 940 | "end": 23.9, 941 | "confidence": 0.7259752, 942 | "punctuated_word": "same" 943 | }, 944 | { 945 | "word": "opportunities", 946 | "start": 23.9, 947 | "end": 24.4, 948 | "confidence": 0.9914248, 949 | "punctuated_word": "opportunities" 950 | }, 951 | { 952 | "word": "that", 953 | "start": 24.46, 954 | "end": 24.619999, 955 | "confidence": 0.97425294, 956 | "punctuated_word": "that" 957 | }, 958 | { 959 | "word": "we", 960 | "start": 24.619999, 961 | "end": 24.779999, 962 | "confidence": 0.98887795, 963 | "punctuated_word": "we" 964 | }, 965 | { 966 | "word": "have", 967 | "start": 24.779999, 968 | "end": 24.939999, 969 | "confidence": 0.9769957, 970 | "punctuated_word": "have" 971 | }, 972 | { 973 | "word": "today", 974 | "start": 24.939999, 975 | "end": 25.439999, 976 | "confidence": 0.725032, 977 | "punctuated_word": "today." 978 | } 979 | ], 980 | "id": "ff6bbc3f-5985-40ec-89b0-8f720b16209e" 981 | } 982 | ] 983 | } 984 | } 985 | -------------------------------------------------------------------------------- /test/dg_whisper_transcription.json: -------------------------------------------------------------------------------- 1 | { 2 | "metadata": { 3 | "transaction_key": "deprecated", 4 | "request_id": "9e213b5c-cf78-4c95-bb5d-bb1ca65f26a1", 5 | "sha256": "154e291ecfa8be6ab8343560bcc109008fa7853eb5372533e8efdefc9b504c33", 6 | "created": "2023-11-14T15:00:10.698Z", 7 | "duration": 25.933313, 8 | "channels": 1, 9 | "models": ["c6481769-89cd-447f-be27-1e6e9dac3616"], 10 | "model_info": { 11 | "c6481769-89cd-447f-be27-1e6e9dac3616": { 12 | "name": "medium-whisper", 13 | "version": "2022-09-21.5", 14 | "arch": "whisper" 15 | } 16 | } 17 | }, 18 | "results": { 19 | "channels": [ 20 | { 21 | "alternatives": [ 22 | { 23 | "transcript": "yeah as much as it's worth celebrating the first spacewalk with an all-female team i think many of us are looking forward to it just being normal and i think if it signifies anything it is to honor the women who came before us who were skilled and qualified and didn't get the same opportunities that we have today", 24 | "confidence": 0.9866368, 25 | "words": [ 26 | { 27 | "word": "yeah", 28 | "start": 0.06, 29 | "end": 0.56, 30 | "confidence": 0.9563703 31 | }, 32 | { 33 | "word": "as", 34 | "start": 0.9, 35 | "end": 1.12, 36 | "confidence": 0.9768862 37 | }, 38 | { 39 | "word": "much", 40 | "start": 1.12, 41 | "end": 1.36, 42 | "confidence": 0.97141933 43 | }, 44 | { 45 | "word": "as", 46 | "start": 1.36, 47 | "end": 1.86, 48 | "confidence": 0.97323734 49 | }, 50 | { 51 | "word": "it's", 52 | "start": 2.08, 53 | "end": 2.34, 54 | "confidence": 0.9978277 55 | }, 56 | { 57 | "word": "worth", 58 | "start": 2.34, 59 | "end": 2.84, 60 | "confidence": 0.9052546 61 | }, 62 | { 63 | "word": "celebrating", 64 | "start": 2.9199998, 65 | "end": 3.4199998, 66 | "confidence": 0.9887623 67 | }, 68 | { 69 | "word": "the", 70 | "start": 4.46, 71 | "end": 4.64, 72 | "confidence": 0.8508073 73 | }, 74 | { 75 | "word": "first", 76 | "start": 4.64, 77 | "end": 5.14, 78 | "confidence": 0.66090274 79 | }, 80 | { 81 | "word": "spacewalk", 82 | "start": 5.3399997, 83 | "end": 5.8399997, 84 | "confidence": 0.5009989 85 | }, 86 | { 87 | "word": "with", 88 | "start": 6.42, 89 | "end": 6.62, 90 | "confidence": 0.9866368 91 | }, 92 | { 93 | "word": "an", 94 | "start": 6.62, 95 | "end": 6.7799997, 96 | "confidence": 0.59729016 97 | }, 98 | { 99 | "word": "all-female", 100 | "start": 6.7799997, 101 | "end": 7.2799997, 102 | "confidence": 0.99692756 103 | }, 104 | { 105 | "word": "team", 106 | "start": 7.3599997, 107 | "end": 7.8599997, 108 | "confidence": 0.05924989 109 | }, 110 | { 111 | "word": "i", 112 | "start": 8.5, 113 | "end": 8.639999, 114 | "confidence": 0.99139494 115 | }, 116 | { 117 | "word": "think", 118 | "start": 8.639999, 119 | "end": 8.92, 120 | "confidence": 0.9987821 121 | }, 122 | { 123 | "word": "many", 124 | "start": 8.92, 125 | "end": 9.139999, 126 | "confidence": 0.9982223 127 | }, 128 | { 129 | "word": "of", 130 | "start": 9.139999, 131 | "end": 9.3, 132 | "confidence": 0.9979126 133 | }, 134 | { 135 | "word": "us", 136 | "start": 9.3, 137 | "end": 9.8, 138 | "confidence": 0.98904747 139 | }, 140 | { 141 | "word": "are", 142 | "start": 9.96, 143 | "end": 10.2, 144 | "confidence": 0.9962388 145 | }, 146 | { 147 | "word": "looking", 148 | "start": 10.2, 149 | "end": 10.5199995, 150 | "confidence": 0.99491775 151 | }, 152 | { 153 | "word": "forward", 154 | "start": 10.5199995, 155 | "end": 10.8, 156 | "confidence": 0.99450845 157 | }, 158 | { 159 | "word": "to", 160 | "start": 10.8, 161 | "end": 10.94, 162 | "confidence": 0.9888053 163 | }, 164 | { 165 | "word": "it", 166 | "start": 10.94, 167 | "end": 11.16, 168 | "confidence": 0.99732226 169 | }, 170 | { 171 | "word": "just", 172 | "start": 11.16, 173 | "end": 11.42, 174 | "confidence": 0.9988391 175 | }, 176 | { 177 | "word": "being", 178 | "start": 11.42, 179 | "end": 11.92, 180 | "confidence": 0.779804 181 | }, 182 | { 183 | "word": "normal", 184 | "start": 12.04, 185 | "end": 12.54, 186 | "confidence": 0.078283265 187 | }, 188 | { 189 | "word": "and", 190 | "start": 13.78, 191 | "end": 13.9, 192 | "confidence": 0.9996959 193 | }, 194 | { 195 | "word": "i", 196 | "start": 13.9, 197 | "end": 14.0, 198 | "confidence": 0.9448686 199 | }, 200 | { 201 | "word": "think", 202 | "start": 14.0, 203 | "end": 14.24, 204 | "confidence": 0.9922965 205 | }, 206 | { 207 | "word": "if", 208 | "start": 14.24, 209 | "end": 14.4, 210 | "confidence": 0.99608225 211 | }, 212 | { 213 | "word": "it", 214 | "start": 14.4, 215 | "end": 14.559999, 216 | "confidence": 0.9979526 217 | }, 218 | { 219 | "word": "signifies", 220 | "start": 14.559999, 221 | "end": 15.059999, 222 | "confidence": 0.9989042 223 | }, 224 | { 225 | "word": "anything", 226 | "start": 15.16, 227 | "end": 15.66, 228 | "confidence": 0.9827157 229 | }, 230 | { 231 | "word": "it", 232 | "start": 15.82, 233 | "end": 16.02, 234 | "confidence": 0.9629161 235 | }, 236 | { 237 | "word": "is", 238 | "start": 16.02, 239 | "end": 16.52, 240 | "confidence": 0.9853512 241 | }, 242 | { 243 | "word": "to", 244 | "start": 16.9, 245 | "end": 17.1, 246 | "confidence": 0.99523395 247 | }, 248 | { 249 | "word": "honor", 250 | "start": 17.1, 251 | "end": 17.6, 252 | "confidence": 0.6679874 253 | }, 254 | { 255 | "word": "the", 256 | "start": 17.619999, 257 | "end": 17.78, 258 | "confidence": 0.9881501 259 | }, 260 | { 261 | "word": "women", 262 | "start": 17.78, 263 | "end": 18.02, 264 | "confidence": 0.9790949 265 | }, 266 | { 267 | "word": "who", 268 | "start": 18.02, 269 | "end": 18.18, 270 | "confidence": 0.97190136 271 | }, 272 | { 273 | "word": "came", 274 | "start": 18.18, 275 | "end": 18.42, 276 | "confidence": 0.98705274 277 | }, 278 | { 279 | "word": "before", 280 | "start": 18.42, 281 | "end": 18.8, 282 | "confidence": 0.55442697 283 | }, 284 | { 285 | "word": "us", 286 | "start": 18.8, 287 | "end": 19.3, 288 | "confidence": 0.9728483 289 | }, 290 | { 291 | "word": "who", 292 | "start": 19.46, 293 | "end": 19.96, 294 | "confidence": 0.97716033 295 | }, 296 | { 297 | "word": "were", 298 | "start": 20.22, 299 | "end": 20.46, 300 | "confidence": 0.94137776 301 | }, 302 | { 303 | "word": "skilled", 304 | "start": 20.46, 305 | "end": 20.9, 306 | "confidence": 0.16880928 307 | }, 308 | { 309 | "word": "and", 310 | "start": 20.9, 311 | "end": 21.14, 312 | "confidence": 0.9861166 313 | }, 314 | { 315 | "word": "qualified", 316 | "start": 21.14, 317 | "end": 21.64, 318 | "confidence": 0.98559004 319 | }, 320 | { 321 | "word": "and", 322 | "start": 22.38, 323 | "end": 22.58, 324 | "confidence": 0.9992938 325 | }, 326 | { 327 | "word": "didn't", 328 | "start": 22.58, 329 | "end": 22.88, 330 | "confidence": 0.9978927 331 | }, 332 | { 333 | "word": "get", 334 | "start": 22.88, 335 | "end": 23.38, 336 | "confidence": 0.99836797 337 | }, 338 | { 339 | "word": "the", 340 | "start": 23.4, 341 | "end": 23.66, 342 | "confidence": 0.99690944 343 | }, 344 | { 345 | "word": "same", 346 | "start": 23.66, 347 | "end": 23.9, 348 | "confidence": 0.9925182 349 | }, 350 | { 351 | "word": "opportunities", 352 | "start": 23.9, 353 | "end": 24.4, 354 | "confidence": 0.9980082 355 | }, 356 | { 357 | "word": "that", 358 | "start": 24.52, 359 | "end": 24.68, 360 | "confidence": 0.99836177 361 | }, 362 | { 363 | "word": "we", 364 | "start": 24.68, 365 | "end": 24.86, 366 | "confidence": 0.9639874 367 | }, 368 | { 369 | "word": "have", 370 | "start": 24.86, 371 | "end": 25.06, 372 | "confidence": 0.911611 373 | }, 374 | { 375 | "word": "today", 376 | "start": 25.06, 377 | "end": 25.56, 378 | "confidence": 0.049012642 379 | } 380 | ] 381 | } 382 | ] 383 | } 384 | ] 385 | } 386 | } 387 | -------------------------------------------------------------------------------- /test/test_assembly.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import re 3 | from deepgram_captions.srt import srt 4 | from deepgram_captions.webvtt import webvtt 5 | from deepgram_captions.converters import AssemblyAIConverter 6 | import json 7 | 8 | json_file_assemblyai_transcription = "test/assemblyai_transcription.json" 9 | json_file_assemblyai_utterances = "test/assemblyai_utterances.json" 10 | 11 | assemblyai_transcription = None 12 | assemblyai_utterances = None 13 | 14 | with open(json_file_assemblyai_transcription, "r") as json_file: 15 | assemblyai_transcription = json.load(json_file) 16 | with open(json_file_assemblyai_utterances, "r") as json_file: 17 | assemblyai_utterances = json.load(json_file) 18 | 19 | 20 | @pytest.mark.parametrize( 21 | "input_data", 22 | [ 23 | AssemblyAIConverter(assemblyai_transcription), 24 | AssemblyAIConverter(assemblyai_utterances), 25 | ], 26 | ) 27 | def test_webvtt_start_with_webvtt(input_data): 28 | """ 29 | Test that WebVTT captions start with "WEBVTT". 30 | """ 31 | result = webvtt(input_data) 32 | result = webvtt(input_data) 33 | 34 | lines = result.strip().split("\n") 35 | 36 | if lines: 37 | first_line = lines[0].strip() 38 | assert ( 39 | first_line == "WEBVTT" 40 | ), f"WebVTT captions do not start with 'WEBVTT': {first_line}" 41 | 42 | 43 | @pytest.mark.parametrize( 44 | "input_data", 45 | [ 46 | AssemblyAIConverter(assemblyai_transcription), 47 | AssemblyAIConverter(assemblyai_utterances), 48 | ], 49 | ) 50 | def test_webvtt_timestamp_format(input_data): 51 | """ 52 | Test if timestamps in the WebVTT output have the correct format. 53 | """ 54 | result = webvtt(input_data) 55 | timestamp_pattern = r"\d{2}:\d{2}:\d{2}\.\d{3} --> \d{2}:\d{2}:\d{2}\.\d{3}" 56 | webvtt_captions = webvtt(result) 57 | timestamp_lines = re.findall(timestamp_pattern, webvtt_captions) 58 | for timestamp_line in timestamp_lines: 59 | assert ( 60 | re.match(timestamp_pattern, timestamp_line) is not None 61 | ), f"Timestamp format is incorrect: {timestamp_line}" 62 | 63 | 64 | @pytest.mark.parametrize( 65 | "input_data", 66 | [ 67 | AssemblyAIConverter(assemblyai_transcription), 68 | AssemblyAIConverter(assemblyai_utterances), 69 | ], 70 | ) 71 | def test_webvtt_speaker_format(input_data): 72 | """ 73 | Test if the WebVTT output contains speaker information in the correct format. 74 | """ 75 | result = webvtt(input_data) 76 | caption_pattern = r"\d{2}:\d{2}:\d{2}\.\d{3} --> \d{2}:\d{2}:\d{2}\.\d{3}\n[^\n]+\n[^\n]+" 77 | captions = re.findall(caption_pattern, result) 78 | for caption in captions: 79 | assert ( 80 | re.match(r"", caption.split("\n")[1]) is not None 81 | ), f"Speaker format is incorrect: {caption}" 82 | 83 | 84 | @pytest.mark.parametrize( 85 | "input_data", 86 | [ 87 | AssemblyAIConverter(assemblyai_transcription), 88 | AssemblyAIConverter(assemblyai_utterances), 89 | ], 90 | ) 91 | def test_srt_format(input_data): 92 | """ 93 | Test if SRT captions follow the correct format. 94 | """ 95 | result = srt(input_data) 96 | srt_captions = result.split("\n\n") 97 | 98 | for index, caption in enumerate(srt_captions, start=1): 99 | if caption.strip(): 100 | lines = caption.split("\n") 101 | assert lines[0] == str(index), f"Caption number is incorrect: {lines[0]}" 102 | 103 | timestamp_pattern = r"\d{2}:\d{2}:\d{2},\d{3} --> \d{2}:\d{2}:\d{2},\d{3}" 104 | assert ( 105 | re.match(timestamp_pattern, lines[1]) is not None 106 | ), f"Timestamp format is incorrect: {lines[1]}" 107 | 108 | assert len(lines) > 2, "Speech content is missing" 109 | 110 | 111 | @pytest.mark.parametrize( 112 | "input_data", 113 | [ 114 | AssemblyAIConverter(assemblyai_transcription), 115 | AssemblyAIConverter(assemblyai_utterances), 116 | ], 117 | ) 118 | def test_srt_timestamp_format(input_data): 119 | """ 120 | Test if timestamps in the SRT output have the correct format. 121 | """ 122 | result = srt(input_data) 123 | srt_captions = result.split("\n\n") 124 | timestamp_pattern = r"\d{2}:\d{2}:\d{2}\,\d{3} --> \d{2}:\d{2}:\d{2}\,\d{3}" 125 | for caption in srt_captions: 126 | if caption.strip(): 127 | lines = caption.split("\n") 128 | assert ( 129 | re.match(timestamp_pattern, lines[1]) is not None 130 | ), f"Timestamp format is incorrect: {lines[1]}" 131 | 132 | 133 | @pytest.mark.parametrize( 134 | "input_data", 135 | [ 136 | AssemblyAIConverter(assemblyai_transcription), 137 | AssemblyAIConverter(assemblyai_utterances), 138 | ], 139 | ) 140 | def test_first_caption_number(input_data): 141 | """ 142 | Test that the first caption number in the SRT output is 1. 143 | """ 144 | result = srt(input_data) 145 | srt_captions = result.split("\n\n") 146 | 147 | if srt_captions: 148 | first_caption_lines = srt_captions[0].split("\n") 149 | first_caption_number = int(first_caption_lines[0]) 150 | 151 | assert ( 152 | first_caption_number == 1 153 | ), f"First caption number is not 1: {first_caption_number}" 154 | -------------------------------------------------------------------------------- /test/test_deepgram.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import re 3 | from deepgram_captions.srt import srt 4 | from deepgram_captions.webvtt import webvtt 5 | from deepgram_captions.converters import DeepgramConverter 6 | import json 7 | 8 | json_file_dg_transcription = "test/dg_transcription.json" 9 | json_file_dg_utterances = "test/dg_utterances.json" 10 | json_file_dg_speakers = "test/dg_speakers.json" 11 | json_file_dg_speakers_no_utterances = "test/dg_speakers_no_utterances.json" 12 | 13 | dg_transcription = None 14 | dg_utterances = None 15 | dg_speakers = None 16 | dg_speakers_no_utterances = None 17 | 18 | with open(json_file_dg_transcription, "r") as json_file: 19 | dg_transcription = json.load(json_file) 20 | with open(json_file_dg_utterances, "r") as json_file: 21 | dg_utterances = json.load(json_file) 22 | with open(json_file_dg_speakers, "r") as json_file: 23 | dg_speakers = json.load(json_file) 24 | with open(json_file_dg_speakers_no_utterances, "r") as json_file: 25 | dg_speakers_no_utterances = json.load(json_file) 26 | 27 | 28 | @pytest.mark.parametrize( 29 | "input_data", 30 | [ 31 | DeepgramConverter(dg_transcription), 32 | DeepgramConverter(dg_utterances), 33 | DeepgramConverter(dg_speakers), 34 | DeepgramConverter(dg_speakers_no_utterances), 35 | ], 36 | ) 37 | def test_webvtt_start_with_webvtt(input_data): 38 | """ 39 | Test that WebVTT captions start with "WEBVTT". 40 | """ 41 | result = webvtt(input_data) 42 | result = webvtt(input_data) 43 | 44 | lines = result.strip().split("\n") 45 | 46 | if lines: 47 | first_line = lines[0].strip() 48 | assert ( 49 | first_line == "WEBVTT" 50 | ), f"WebVTT captions do not start with 'WEBVTT': {first_line}" 51 | 52 | 53 | @pytest.mark.parametrize( 54 | "input_data", 55 | [ 56 | DeepgramConverter(dg_transcription), 57 | DeepgramConverter(dg_utterances), 58 | DeepgramConverter(dg_speakers), 59 | DeepgramConverter(dg_speakers_no_utterances), 60 | ], 61 | ) 62 | def test_webvtt_header(input_data): 63 | """ 64 | Test if the WebVTT format header is generated correctly. 65 | """ 66 | result = webvtt(input_data) 67 | assert isinstance(result, str), "Result should be a string" 68 | assert "NOTE" in result, "Result should contain 'NOTE' in header" 69 | assert ( 70 | "Transcription provided by Deepgram" in result 71 | ), "Result should name Deepgram as transcription source in header" 72 | assert "Request Id" in result, "Result should contain Request Id in header" 73 | assert "Created" in result, "Result should contain Created timestamp in header" 74 | assert "Duration" in result, "Result should contain Duration information in header" 75 | assert "Channels" in result, "Result should contain Channels information in header" 76 | 77 | 78 | @pytest.mark.parametrize( 79 | "input_data", 80 | [ 81 | DeepgramConverter(dg_transcription), 82 | DeepgramConverter(dg_utterances), 83 | DeepgramConverter(dg_speakers), 84 | DeepgramConverter(dg_speakers_no_utterances), 85 | ], 86 | ) 87 | def test_webvtt_timestamp_format(input_data): 88 | """ 89 | Test if timestamps in the WebVTT output have the correct format. 90 | """ 91 | result = webvtt(input_data) 92 | timestamp_pattern = r"\d{2}:\d{2}:\d{2}\.\d{3} --> \d{2}:\d{2}:\d{2}\.\d{3}" 93 | webvtt_captions = webvtt(result) 94 | timestamp_lines = re.findall(timestamp_pattern, webvtt_captions) 95 | for timestamp_line in timestamp_lines: 96 | assert ( 97 | re.match(timestamp_pattern, timestamp_line) is not None 98 | ), f"Timestamp format is incorrect: {timestamp_line}" 99 | 100 | 101 | @pytest.mark.parametrize( 102 | "input_data", 103 | [DeepgramConverter(dg_speakers), DeepgramConverter(dg_speakers_no_utterances)], 104 | ) 105 | def test_webvtt_speaker_format(input_data): 106 | """ 107 | Test if the WebVTT output contains speaker information in the correct format. 108 | """ 109 | result = webvtt(input_data) 110 | caption_pattern = r"\d{2}:\d{2}:\d{2}\.\d{3} --> \d{2}:\d{2}:\d{2}\.\d{3}\n[^\n]+\n[^\n]+" 111 | captions = re.findall(caption_pattern, result) 112 | for caption in captions: 113 | assert ( 114 | re.match(r"", caption.split("\n")[1]) is not None 115 | ), f"Speaker format is incorrect: {caption}" 116 | 117 | 118 | @pytest.mark.parametrize( 119 | "input_data", 120 | [ 121 | DeepgramConverter(dg_transcription), 122 | DeepgramConverter(dg_utterances), 123 | DeepgramConverter(dg_speakers), 124 | DeepgramConverter(dg_speakers_no_utterances), 125 | ], 126 | ) 127 | def test_srt_format(input_data): 128 | """ 129 | Test if SRT captions follow the correct format. 130 | """ 131 | result = srt(input_data) 132 | srt_captions = result.split("\n\n") 133 | 134 | for index, caption in enumerate(srt_captions, start=1): 135 | if caption.strip(): 136 | lines = caption.split("\n") 137 | assert lines[0] == str(index), f"Caption number is incorrect: {lines[0]}" 138 | 139 | timestamp_pattern = r"\d{2}:\d{2}:\d{2},\d{3} --> \d{2}:\d{2}:\d{2},\d{3}" 140 | assert ( 141 | re.match(timestamp_pattern, lines[1]) is not None 142 | ), f"Timestamp format is incorrect: {lines[1]}" 143 | 144 | assert len(lines) > 2, "Speech content is missing" 145 | 146 | 147 | @pytest.mark.parametrize( 148 | "input_data", 149 | [ 150 | DeepgramConverter(dg_transcription), 151 | DeepgramConverter(dg_utterances), 152 | DeepgramConverter(dg_speakers), 153 | DeepgramConverter(dg_speakers_no_utterances), 154 | ], 155 | ) 156 | def test_srt_timestamp_format(input_data): 157 | """ 158 | Test if timestamps in the SRT output have the correct format. 159 | """ 160 | result = srt(input_data) 161 | srt_captions = result.split("\n\n") 162 | timestamp_pattern = r"\d{2}:\d{2}:\d{2}\,\d{3} --> \d{2}:\d{2}:\d{2}\,\d{3}" 163 | for caption in srt_captions: 164 | if caption.strip(): 165 | lines = caption.split("\n") 166 | assert ( 167 | re.match(timestamp_pattern, lines[1]) is not None 168 | ), f"Timestamp format is incorrect: {lines[1]}" 169 | 170 | 171 | @pytest.mark.parametrize( 172 | "input_data", 173 | [ 174 | DeepgramConverter(dg_transcription), 175 | DeepgramConverter(dg_utterances), 176 | DeepgramConverter(dg_speakers), 177 | DeepgramConverter(dg_speakers_no_utterances), 178 | ], 179 | ) 180 | def test_first_caption_number(input_data): 181 | """ 182 | Test that the first caption number in the SRT output is 1. 183 | """ 184 | result = srt(input_data) 185 | srt_captions = result.split("\n\n") 186 | 187 | if srt_captions: 188 | first_caption_lines = srt_captions[0].split("\n") 189 | first_caption_number = int(first_caption_lines[0]) 190 | 191 | assert ( 192 | first_caption_number == 1 193 | ), f"First caption number is not 1: {first_caption_number}" 194 | 195 | 196 | @pytest.mark.parametrize( 197 | "input_data", 198 | [ 199 | DeepgramConverter(dg_speakers), 200 | DeepgramConverter(dg_speakers_no_utterances), 201 | ], 202 | ) 203 | def test_srt_speaker_format(input_data): 204 | """ 205 | Test if the SRT output contains speaker information in the correct format. 206 | """ 207 | result = srt(input_data) 208 | srt_captions = result.split("\n\n") 209 | speaker_pattern = r"\[speaker \d+\]" 210 | 211 | for caption in srt_captions: 212 | if caption.strip(): 213 | lines = caption.split("\n") 214 | 215 | # Check if speaker information is present 216 | if len(lines) > 2 and re.match(speaker_pattern, lines[2]): 217 | assert ( 218 | re.match(speaker_pattern, lines[2]) is not None 219 | ), f"Speaker format is incorrect: {lines[2]}" 220 | 221 | 222 | if __name__ == "__main__": 223 | pytest.main() 224 | -------------------------------------------------------------------------------- /test/test_whisper.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import re 3 | from deepgram_captions.srt import srt 4 | from deepgram_captions.webvtt import webvtt 5 | from deepgram_captions.converters import WhisperTimestampedConverter 6 | import json 7 | 8 | 9 | json_file_whisper_timestamped = "test/whisper_timestamped.json" 10 | 11 | whisper_timestamped = None 12 | 13 | with open(json_file_whisper_timestamped, "r") as json_file: 14 | whisper_timestamped = json.load(json_file) 15 | 16 | 17 | @pytest.mark.parametrize( 18 | "input_data", 19 | [ 20 | WhisperTimestampedConverter(whisper_timestamped), 21 | ], 22 | ) 23 | def test_webvtt_start_with_webvtt(input_data): 24 | """ 25 | Test that WebVTT captions start with "WEBVTT". 26 | """ 27 | result = webvtt(input_data) 28 | result = webvtt(input_data) 29 | 30 | lines = result.strip().split("\n") 31 | 32 | if lines: 33 | first_line = lines[0].strip() 34 | assert ( 35 | first_line == "WEBVTT" 36 | ), f"WebVTT captions do not start with 'WEBVTT': {first_line}" 37 | 38 | 39 | @pytest.mark.parametrize( 40 | "input_data", 41 | [ 42 | WhisperTimestampedConverter(whisper_timestamped), 43 | ], 44 | ) 45 | def test_webvtt_timestamp_format(input_data): 46 | """ 47 | Test if timestamps in the WebVTT output have the correct format. 48 | """ 49 | result = webvtt(input_data) 50 | timestamp_pattern = r"\d{2}:\d{2}:\d{2}\.\d{3} --> \d{2}:\d{2}:\d{2}\.\d{3}" 51 | webvtt_captions = webvtt(result) 52 | timestamp_lines = re.findall(timestamp_pattern, webvtt_captions) 53 | for timestamp_line in timestamp_lines: 54 | assert ( 55 | re.match(timestamp_pattern, timestamp_line) is not None 56 | ), f"Timestamp format is incorrect: {timestamp_line}" 57 | 58 | 59 | @pytest.mark.parametrize( 60 | "input_data", 61 | [ 62 | WhisperTimestampedConverter(whisper_timestamped), 63 | ], 64 | ) 65 | def test_webvtt_speaker_format(input_data): 66 | """ 67 | Test if the WebVTT output contains speaker information in the correct format. 68 | """ 69 | result = webvtt(input_data) 70 | caption_pattern = r"\d{2}:\d{2}:\d{2}\.\d{3} --> \d{2}:\d{2}:\d{2}\.\d{3}\n[^\n]+\n[^\n]+" 71 | captions = re.findall(caption_pattern, result) 72 | for caption in captions: 73 | assert ( 74 | re.match(r"", caption.split("\n")[1]) is not None 75 | ), f"Speaker format is incorrect: {caption}" 76 | 77 | 78 | @pytest.mark.parametrize( 79 | "input_data", 80 | [ 81 | WhisperTimestampedConverter(whisper_timestamped), 82 | ], 83 | ) 84 | def test_srt_format(input_data): 85 | """ 86 | Test if SRT captions follow the correct format. 87 | """ 88 | result = srt(input_data) 89 | srt_captions = result.split("\n\n") 90 | 91 | for index, caption in enumerate(srt_captions, start=1): 92 | if caption.strip(): 93 | lines = caption.split("\n") 94 | assert lines[0] == str(index), f"Caption number is incorrect: {lines[0]}" 95 | 96 | timestamp_pattern = r"\d{2}:\d{2}:\d{2},\d{3} --> \d{2}:\d{2}:\d{2},\d{3}" 97 | assert ( 98 | re.match(timestamp_pattern, lines[1]) is not None 99 | ), f"Timestamp format is incorrect: {lines[1]}" 100 | 101 | assert len(lines) > 2, "Speech content is missing" 102 | 103 | 104 | @pytest.mark.parametrize( 105 | "input_data", 106 | [ 107 | WhisperTimestampedConverter(whisper_timestamped), 108 | ], 109 | ) 110 | def test_srt_timestamp_format(input_data): 111 | """ 112 | Test if timestamps in the SRT output have the correct format. 113 | """ 114 | result = srt(input_data) 115 | srt_captions = result.split("\n\n") 116 | timestamp_pattern = r"\d{2}:\d{2}:\d{2}\,\d{3} --> \d{2}:\d{2}:\d{2}\,\d{3}" 117 | for caption in srt_captions: 118 | if caption.strip(): 119 | lines = caption.split("\n") 120 | assert ( 121 | re.match(timestamp_pattern, lines[1]) is not None 122 | ), f"Timestamp format is incorrect: {lines[1]}" 123 | 124 | 125 | @pytest.mark.parametrize( 126 | "input_data", 127 | [ 128 | WhisperTimestampedConverter(whisper_timestamped), 129 | ], 130 | ) 131 | def test_first_caption_number(input_data): 132 | """ 133 | Test that the first caption number in the SRT output is 1. 134 | """ 135 | result = srt(input_data) 136 | srt_captions = result.split("\n\n") 137 | 138 | if srt_captions: 139 | first_caption_lines = srt_captions[0].split("\n") 140 | first_caption_number = int(first_caption_lines[0]) 141 | 142 | assert ( 143 | first_caption_number == 1 144 | ), f"First caption number is not 1: {first_caption_number}" 145 | 146 | 147 | if __name__ == "__main__": 148 | pytest.main() 149 | -------------------------------------------------------------------------------- /test/whisper_timestamped.json: -------------------------------------------------------------------------------- 1 | { 2 | "text": " Yeah, as much as it's worth celebrating the first spacewalk with an all-female team, I think many of us are looking forward to it just being normal. And I think if it signifies anything, it is to honor the women who came before us, who were skilled and qualified and didn't get the same opportunities that we have today.", 3 | "segments": [ 4 | { 5 | "start": 0.12, 6 | "end": 7.62, 7 | "text": " Yeah, as much as it's worth celebrating the first spacewalk with an all-female team,", 8 | "confidence": 0.89, 9 | "words": [ 10 | { 11 | "text": "Yeah,", 12 | "start": 0.12, 13 | "end": 0.38, 14 | "confidence": 0.74 15 | }, 16 | { 17 | "text": "as", 18 | "start": 0.98, 19 | "end": 1.12, 20 | "confidence": 0.95 21 | }, 22 | { 23 | "text": "much", 24 | "start": 1.12, 25 | "end": 1.36, 26 | "confidence": 0.98 27 | }, 28 | { 29 | "text": "as", 30 | "start": 1.36, 31 | "end": 1.88, 32 | "confidence": 0.97 33 | }, 34 | { 35 | "text": "it's", 36 | "start": 1.88, 37 | "end": 2.36, 38 | "confidence": 0.98 39 | }, 40 | { 41 | "text": "worth", 42 | "start": 2.36, 43 | "end": 2.88, 44 | "confidence": 1.0 45 | }, 46 | { 47 | "text": "celebrating", 48 | "start": 2.88, 49 | "end": 3.66, 50 | "confidence": 0.99 51 | }, 52 | { 53 | "text": "the", 54 | "start": 3.66, 55 | "end": 4.78, 56 | "confidence": 0.96 57 | }, 58 | { 59 | "text": "first", 60 | "start": 4.78, 61 | "end": 5.38, 62 | "confidence": 0.99 63 | }, 64 | { 65 | "text": "spacewalk", 66 | "start": 5.38, 67 | "end": 6.0, 68 | "confidence": 0.75 69 | }, 70 | { 71 | "text": "with", 72 | "start": 6.0, 73 | "end": 6.72, 74 | "confidence": 0.61 75 | }, 76 | { 77 | "text": "an", 78 | "start": 6.72, 79 | "end": 6.88, 80 | "confidence": 0.99 81 | }, 82 | { 83 | "text": "all-female", 84 | "start": 6.88, 85 | "end": 7.3, 86 | "confidence": 0.85 87 | }, 88 | { 89 | "text": "team,", 90 | "start": 7.3, 91 | "end": 7.62, 92 | "confidence": 1.0 93 | } 94 | ] 95 | }, 96 | { 97 | "start": 8.54, 98 | "end": 12.36, 99 | "text": " I think many of us are looking forward to it just being normal.", 100 | "confidence": 1.0, 101 | "words": [ 102 | { 103 | "text": "I", 104 | "start": 8.54, 105 | "end": 8.68, 106 | "confidence": 1.0 107 | }, 108 | { 109 | "text": "think", 110 | "start": 8.68, 111 | "end": 8.9, 112 | "confidence": 1.0 113 | }, 114 | { 115 | "text": "many", 116 | "start": 8.9, 117 | "end": 9.16, 118 | "confidence": 0.99 119 | }, 120 | { 121 | "text": "of", 122 | "start": 9.16, 123 | "end": 9.38, 124 | "confidence": 1.0 125 | }, 126 | { 127 | "text": "us", 128 | "start": 9.38, 129 | "end": 9.86, 130 | "confidence": 1.0 131 | }, 132 | { 133 | "text": "are", 134 | "start": 9.86, 135 | "end": 10.3, 136 | "confidence": 1.0 137 | }, 138 | { 139 | "text": "looking", 140 | "start": 10.3, 141 | "end": 10.6, 142 | "confidence": 0.99 143 | }, 144 | { 145 | "text": "forward", 146 | "start": 10.6, 147 | "end": 10.82, 148 | "confidence": 1.0 149 | }, 150 | { 151 | "text": "to", 152 | "start": 10.82, 153 | "end": 10.96, 154 | "confidence": 0.99 155 | }, 156 | { 157 | "text": "it", 158 | "start": 10.96, 159 | "end": 11.18, 160 | "confidence": 0.99 161 | }, 162 | { 163 | "text": "just", 164 | "start": 11.18, 165 | "end": 11.4, 166 | "confidence": 0.99 167 | }, 168 | { 169 | "text": "being", 170 | "start": 11.4, 171 | "end": 11.86, 172 | "confidence": 1.0 173 | }, 174 | { 175 | "text": "normal.", 176 | "start": 11.86, 177 | "end": 12.36, 178 | "confidence": 1.0 179 | } 180 | ] 181 | }, 182 | { 183 | "start": 12.98, 184 | "end": 19.0, 185 | "text": " And I think if it signifies anything, it is to honor the women who came before us,", 186 | "confidence": 0.98, 187 | "words": [ 188 | { 189 | "text": "And", 190 | "start": 12.98, 191 | "end": 13.26, 192 | "confidence": 0.98 193 | }, 194 | { 195 | "text": "I", 196 | "start": 13.26, 197 | "end": 14.12, 198 | "confidence": 0.91 199 | }, 200 | { 201 | "text": "think", 202 | "start": 14.12, 203 | "end": 14.32, 204 | "confidence": 1.0 205 | }, 206 | { 207 | "text": "if", 208 | "start": 14.32, 209 | "end": 14.5, 210 | "confidence": 0.94 211 | }, 212 | { 213 | "text": "it", 214 | "start": 14.5, 215 | "end": 14.7, 216 | "confidence": 0.99 217 | }, 218 | { 219 | "text": "signifies", 220 | "start": 14.7, 221 | "end": 15.08, 222 | "confidence": 1.0 223 | }, 224 | { 225 | "text": "anything,", 226 | "start": 15.08, 227 | "end": 15.56, 228 | "confidence": 1.0 229 | }, 230 | { 231 | "text": "it", 232 | "start": 16.1, 233 | "end": 16.12, 234 | "confidence": 0.96 235 | }, 236 | { 237 | "text": "is", 238 | "start": 16.12, 239 | "end": 16.52, 240 | "confidence": 1.0 241 | }, 242 | { 243 | "text": "to", 244 | "start": 16.52, 245 | "end": 17.16, 246 | "confidence": 0.99 247 | }, 248 | { 249 | "text": "honor", 250 | "start": 17.16, 251 | "end": 17.42, 252 | "confidence": 1.0 253 | }, 254 | { 255 | "text": "the", 256 | "start": 17.42, 257 | "end": 17.84, 258 | "confidence": 1.0 259 | }, 260 | { 261 | "text": "women", 262 | "start": 17.84, 263 | "end": 18.06, 264 | "confidence": 0.94 265 | }, 266 | { 267 | "text": "who", 268 | "start": 18.06, 269 | "end": 18.2, 270 | "confidence": 0.98 271 | }, 272 | { 273 | "text": "came", 274 | "start": 18.2, 275 | "end": 18.46, 276 | "confidence": 1.0 277 | }, 278 | { 279 | "text": "before", 280 | "start": 18.46, 281 | "end": 18.76, 282 | "confidence": 1.0 283 | }, 284 | { 285 | "text": "us,", 286 | "start": 18.76, 287 | "end": 19.0, 288 | "confidence": 1.0 289 | } 290 | ] 291 | }, 292 | { 293 | "start": 19.82, 294 | "end": 25.32, 295 | "text": " who were skilled and qualified and didn't get the same opportunities that we have today.", 296 | "confidence": 0.96, 297 | "words": [ 298 | { 299 | "text": "who", 300 | "start": 19.82, 301 | "end": 19.98, 302 | "confidence": 1.0 303 | }, 304 | { 305 | "text": "were", 306 | "start": 19.98, 307 | "end": 20.52, 308 | "confidence": 0.99 309 | }, 310 | { 311 | "text": "skilled", 312 | "start": 20.52, 313 | "end": 20.9, 314 | "confidence": 1.0 315 | }, 316 | { 317 | "text": "and", 318 | "start": 20.9, 319 | "end": 21.22, 320 | "confidence": 0.93 321 | }, 322 | { 323 | "text": "qualified", 324 | "start": 21.22, 325 | "end": 21.8, 326 | "confidence": 1.0 327 | }, 328 | { 329 | "text": "and", 330 | "start": 21.8, 331 | "end": 22.64, 332 | "confidence": 0.76 333 | }, 334 | { 335 | "text": "didn't", 336 | "start": 22.64, 337 | "end": 22.96, 338 | "confidence": 0.99 339 | }, 340 | { 341 | "text": "get", 342 | "start": 22.96, 343 | "end": 23.38, 344 | "confidence": 1.0 345 | }, 346 | { 347 | "text": "the", 348 | "start": 23.38, 349 | "end": 23.74, 350 | "confidence": 0.99 351 | }, 352 | { 353 | "text": "same", 354 | "start": 23.74, 355 | "end": 23.94, 356 | "confidence": 1.0 357 | }, 358 | { 359 | "text": "opportunities", 360 | "start": 23.94, 361 | "end": 24.54, 362 | "confidence": 0.99 363 | }, 364 | { 365 | "text": "that", 366 | "start": 24.54, 367 | "end": 24.72, 368 | "confidence": 0.97 369 | }, 370 | { 371 | "text": "we", 372 | "start": 24.72, 373 | "end": 24.88, 374 | "confidence": 1.0 375 | }, 376 | { 377 | "text": "have", 378 | "start": 24.88, 379 | "end": 25.06, 380 | "confidence": 1.0 381 | }, 382 | { 383 | "text": "today.", 384 | "start": 25.06, 385 | "end": 25.32, 386 | "confidence": 0.87 387 | } 388 | ] 389 | } 390 | ], 391 | "language": "en" 392 | } 393 | --------------------------------------------------------------------------------