├── .eslintrc.json
├── .gitignore
├── .prettierrc
├── LICENSE
├── README.md
├── image
└── 20241006174459.png
├── package-lock.json
├── package.json
├── public
├── index.html
├── openai-logomark.svg
└── robots.txt
├── readme
└── realtime-console-demo.png
├── relay-server
├── index.js
└── lib
│ └── relay.js
├── src
├── App.scss
├── App.tsx
├── components
│ ├── Map.scss
│ ├── Map.tsx
│ ├── button
│ │ ├── Button.scss
│ │ └── Button.tsx
│ └── toggle
│ │ ├── Toggle.scss
│ │ └── Toggle.tsx
├── index.css
├── index.tsx
├── lib
│ ├── realtime-api-beta
│ │ ├── dist
│ │ │ ├── index.d.ts
│ │ │ ├── index.d.ts.map
│ │ │ └── lib
│ │ │ │ ├── api.d.ts
│ │ │ │ ├── api.d.ts.map
│ │ │ │ ├── client.d.ts
│ │ │ │ ├── client.d.ts.map
│ │ │ │ ├── conversation.d.ts
│ │ │ │ ├── conversation.d.ts.map
│ │ │ │ ├── event_handler.d.ts
│ │ │ │ ├── event_handler.d.ts.map
│ │ │ │ ├── utils.d.ts
│ │ │ │ └── utils.d.ts.map
│ │ ├── index.js
│ │ └── lib
│ │ │ ├── api.js
│ │ │ ├── client.js
│ │ │ ├── conversation.js
│ │ │ ├── event_handler.js
│ │ │ └── utils.js
│ └── wavtools
│ │ ├── dist
│ │ ├── index.d.ts
│ │ ├── index.d.ts.map
│ │ └── lib
│ │ │ ├── analysis
│ │ │ ├── audio_analysis.d.ts
│ │ │ ├── audio_analysis.d.ts.map
│ │ │ ├── constants.d.ts
│ │ │ └── constants.d.ts.map
│ │ │ ├── wav_packer.d.ts
│ │ │ ├── wav_packer.d.ts.map
│ │ │ ├── wav_recorder.d.ts
│ │ │ ├── wav_recorder.d.ts.map
│ │ │ ├── wav_stream_player.d.ts
│ │ │ ├── wav_stream_player.d.ts.map
│ │ │ └── worklets
│ │ │ ├── audio_processor.d.ts
│ │ │ ├── audio_processor.d.ts.map
│ │ │ ├── stream_processor.d.ts
│ │ │ └── stream_processor.d.ts.map
│ │ ├── index.js
│ │ └── lib
│ │ ├── analysis
│ │ ├── audio_analysis.js
│ │ └── constants.js
│ │ ├── wav_packer.js
│ │ ├── wav_recorder.js
│ │ ├── wav_stream_player.js
│ │ └── worklets
│ │ ├── audio_processor.js
│ │ └── stream_processor.js
├── logo.svg
├── pages
│ ├── ConsolePage.scss
│ └── ConsolePage.tsx
├── react-app-env.d.ts
├── reportWebVitals.ts
├── setupTests.ts
└── utils
│ ├── conversation_config.js
│ └── wav_renderer.ts
└── tsconfig.json
/.eslintrc.json:
--------------------------------------------------------------------------------
1 | {
2 | "parserOptions": {
3 | "sourceType": "module"
4 | },
5 | "env": {
6 | "es2022": true
7 | }
8 | }
9 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # See https://help.github.com/articles/ignoring-files/ for more about ignoring files.
2 |
3 | # dependencies
4 | /node_modules
5 | /.pnp
6 | .pnp.js
7 |
8 | # testing
9 | /coverage
10 |
11 | # production
12 | /build
13 |
14 | # packaging
15 | *.zip
16 | *.tar.gz
17 | *.tar
18 | *.tgz
19 | *.bla
20 |
21 | src/*.txt
22 |
23 | # misc
24 | .DS_Store
25 | .env
26 | .env.local
27 | .env.development.local
28 | .env.test.local
29 | .env.production.local
30 |
31 | npm-debug.log*
32 | yarn-debug.log*
33 | yarn-error.log*
34 |
--------------------------------------------------------------------------------
/.prettierrc:
--------------------------------------------------------------------------------
1 | {
2 | "tabWidth": 2,
3 | "useTabs": false,
4 | "singleQuote": true
5 | }
6 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2024 OpenAI
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # RealGPT
2 |
3 | 基于 OpenAI Realtime Console 修改的语音聊天应用。
4 |
5 | 1. 去掉了地图和memory设置
6 | 1. 添加了 api baseUrl 的设置
7 | 1. 优化了移动端界面
8 |
9 | 
10 |
11 |
12 | https://github.com/user-attachments/assets/f174d332-505e-46b2-8480-c01ded6a31fa
13 |
14 |
15 |
16 | ## Realtime API 代理
17 |
18 | OpenAI Realtime Console 其实自带了一个代理。
19 |
20 | 1. git clone repo
21 | 2. 在repo根目录建立 `.env` 文件,写入Key:
22 |
23 | ```env
24 | OPENAI_API_KEY=sk-...
25 | ```
26 | 3. 运行 npm install && npm relay 即可启动服务
27 |
28 | 将地址填入 RealGPT 设置界面的 api base 即可。如 `ws://localhost:8081`
29 |
30 |
31 | # OpenAI Realtime Console
32 |
33 | The OpenAI Realtime Console is intended as an inspector and interactive API reference
34 | for the OpenAI Realtime API. It comes packaged with two utility libraries,
35 | [openai/openai-realtime-api-beta](https://github.com/openai/openai-realtime-api-beta)
36 | that acts as a **Reference Client** (for browser and Node.js) and
37 | [`/src/lib/wavtools`](./src/lib/wavtools) which allows for simple audio
38 | management in the browser.
39 |
40 |
41 |
42 | # Starting the console
43 |
44 | This is a React project created using `create-react-app` that is bundled via Webpack.
45 | Install it by extracting the contents of this package and using;
46 |
47 | ```shell
48 | $ npm i
49 | ```
50 |
51 | Start your server with:
52 |
53 | ```shell
54 | $ npm start
55 | ```
56 |
57 | It should be available via `localhost:3000`.
58 |
59 | # Table of contents
60 |
61 | 1. [Using the console](#using-the-console)
62 | 1. [Using a server relay](#using-a-server-relay)
63 | 1. [Realtime API reference client](#realtime-api-reference-client)
64 | 1. [Sending streaming audio](#sending-streaming-audio)
65 | 1. [Adding and using tools](#adding-and-using-tools)
66 | 1. [Interrupting the model](#interrupting-the-model)
67 | 1. [Reference client events](#reference-client-events)
68 | 1. [Wavtools](#wavtools)
69 | 1. [WavRecorder quickstart](#wavrecorder-quickstart)
70 | 1. [WavStreamPlayer quickstart](#wavstreamplayer-quickstart)
71 | 1. [Acknowledgements and contact](#acknowledgements-and-contact)
72 |
73 | # Using the console
74 |
75 | The console requires an OpenAI API key (**user key** or **project key**) that has access to the
76 | Realtime API. You'll be prompted on startup to enter it. It will be saved via `localStorage` and can be
77 | changed at any time from the UI.
78 |
79 | To start a session you'll need to **connect**. This will require microphone access.
80 | You can then choose between **manual** (Push-to-talk) and **vad** (Voice Activity Detection)
81 | conversation modes, and switch between them at any time.
82 |
83 | There are two functions enabled;
84 |
85 | - `get_weather`: Ask for the weather anywhere and the model will do its best to pinpoint the
86 | location, show it on a map, and get the weather for that location. Note that it doesn't
87 | have location access, and coordinates are "guessed" from the model's training data so
88 | accuracy might not be perfect.
89 | - `set_memory`: You can ask the model to remember information for you, and it will store it in
90 | a JSON blob on the left.
91 |
92 | You can freely interrupt the model at any time in push-to-talk or VAD mode.
93 |
94 | ## Using a server relay
95 |
96 | If you would like to build a more robust implementation and play around with the reference
97 | client using your own server, we have included a Node.js [Relay Server](/relay-server/index.js).
98 |
99 | ```shell
100 | $ npm run relay
101 | ```
102 |
103 | It will start automatically on `localhost:8081`. **You will need to create a `.env` file**
104 | with `OPENAI_API_KEY=` set to your API key. Note that you should change the following code
105 | in [`ConsolePage.tsx`](/src/pages/ConsolePage.tsx):
106 |
107 | ```javascript
108 | /**
109 | * Change this if you want to connect to a local relay server!
110 | * This will require you to set OPENAI_API_KEY= in a `.env` file
111 | * You can run it with `npm run relay`, in parallel with `npm start`
112 | *
113 | * Simply switch the lines by commenting one and removing the other
114 | */
115 | // const USE_LOCAL_RELAY_SERVER_URL: string | undefined = 'http://localhost:8081';
116 | const USE_LOCAL_RELAY_SERVER_URL: string | undefined = void 0;
117 | ```
118 |
119 | This server is **only a simple message relay**, but it can be extended to:
120 |
121 | - Hide API credentials if you would like to ship an app to play with online
122 | - Handle certain calls you would like to keep secret (e.g. `instructions`) on
123 | the server directly
124 | - Restrict what types of events the client can receive and send
125 |
126 | You will have to implement these features yourself.
127 |
128 | # Realtime API reference client
129 |
130 | The latest reference client and documentation are available on GitHub at
131 | [openai/openai-realtime-api-beta](https://github.com/openai/openai-realtime-api-beta).
132 |
133 | You can use this client yourself in any React (front-end) or Node.js project.
134 | For full documentation, refer to the GitHub repository, but you can use the
135 | guide here as a primer to get started.
136 |
137 | ```javascript
138 | import { RealtimeClient } from '/src/lib/realtime-api-beta/index.js';
139 |
140 | const client = new RealtimeClient({ apiKey: process.env.OPENAI_API_KEY });
141 |
142 | // Can set parameters ahead of connecting
143 | client.updateSession({ instructions: 'You are a great, upbeat friend.' });
144 | client.updateSession({ voice: 'alloy' });
145 | client.updateSession({ turn_detection: 'server_vad' });
146 | client.updateSession({ input_audio_transcription: { model: 'whisper-1' } });
147 |
148 | // Set up event handling
149 | client.on('conversation.updated', ({ item, delta }) => {
150 | const items = client.conversation.getItems(); // can use this to render all items
151 | /* includes all changes to conversations, delta may be populated */
152 | });
153 |
154 | // Connect to Realtime API
155 | await client.connect();
156 |
157 | // Send a item and triggers a generation
158 | client.sendUserMessageContent([{ type: 'text', text: `How are you?` }]);
159 | ```
160 |
161 | ## Sending streaming audio
162 |
163 | To send streaming audio, use the `.appendInputAudio()` method. If you're in `turn_detection: 'disabled'` mode,
164 | then you need to use `.generate()` to tell the model to respond.
165 |
166 | ```javascript
167 | // Send user audio, must be Int16Array or ArrayBuffer
168 | // Default audio format is pcm16 with sample rate of 24,000 Hz
169 | // This populates 1s of noise in 0.1s chunks
170 | for (let i = 0; i < 10; i++) {
171 | const data = new Int16Array(2400);
172 | for (let n = 0; n < 2400; n++) {
173 | const value = Math.floor((Math.random() * 2 - 1) * 0x8000);
174 | data[n] = value;
175 | }
176 | client.appendInputAudio(data);
177 | }
178 | // Pending audio is committed and model is asked to generate
179 | client.createResponse();
180 | ```
181 |
182 | ## Adding and using tools
183 |
184 | Working with tools is easy. Just call `.addTool()` and set a callback as the second parameter.
185 | The callback will be executed with the parameters for the tool, and the result will be automatically
186 | sent back to the model.
187 |
188 | ```javascript
189 | // We can add tools as well, with callbacks specified
190 | client.addTool(
191 | {
192 | name: 'get_weather',
193 | description:
194 | 'Retrieves the weather for a given lat, lng coordinate pair. Specify a label for the location.',
195 | parameters: {
196 | type: 'object',
197 | properties: {
198 | lat: {
199 | type: 'number',
200 | description: 'Latitude',
201 | },
202 | lng: {
203 | type: 'number',
204 | description: 'Longitude',
205 | },
206 | location: {
207 | type: 'string',
208 | description: 'Name of the location',
209 | },
210 | },
211 | required: ['lat', 'lng', 'location'],
212 | },
213 | },
214 | async ({ lat, lng, location }) => {
215 | const result = await fetch(
216 | `https://api.open-meteo.com/v1/forecast?latitude=${lat}&longitude=${lng}¤t=temperature_2m,wind_speed_10m`
217 | );
218 | const json = await result.json();
219 | return json;
220 | }
221 | );
222 | ```
223 |
224 | ## Interrupting the model
225 |
226 | You may want to manually interrupt the model, especially in `turn_detection: 'disabled'` mode.
227 | To do this, we can use:
228 |
229 | ```javascript
230 | // id is the id of the item currently being generated
231 | // sampleCount is the number of audio samples that have been heard by the listener
232 | client.cancelResponse(id, sampleCount);
233 | ```
234 |
235 | This method will cause the model to immediately cease generation, but also truncate the
236 | item being played by removing all audio after `sampleCount` and clearing the text
237 | response. By using this method you can interrupt the model and prevent it from "remembering"
238 | anything it has generated that is ahead of where the user's state is.
239 |
240 | ## Reference client events
241 |
242 | There are five main client events for application control flow in `RealtimeClient`.
243 | Note that this is only an overview of using the client, the full Realtime API
244 | event specification is considerably larger, if you need more control check out the GitHub repository:
245 | [openai/openai-realtime-api-beta](https://github.com/openai/openai-realtime-api-beta).
246 |
247 | ```javascript
248 | // errors like connection failures
249 | client.on('error', (event) => {
250 | // do thing
251 | });
252 |
253 | // in VAD mode, the user starts speaking
254 | // we can use this to stop audio playback of a previous response if necessary
255 | client.on('conversation.interrupted', () => {
256 | /* do something */
257 | });
258 |
259 | // includes all changes to conversations
260 | // delta may be populated
261 | client.on('conversation.updated', ({ item, delta }) => {
262 | // get all items, e.g. if you need to update a chat window
263 | const items = client.conversation.getItems();
264 | switch (item.type) {
265 | case 'message':
266 | // system, user, or assistant message (item.role)
267 | break;
268 | case 'function_call':
269 | // always a function call from the model
270 | break;
271 | case 'function_call_output':
272 | // always a response from the user / application
273 | break;
274 | }
275 | if (delta) {
276 | // Only one of the following will be populated for any given event
277 | // delta.audio = Int16Array, audio added
278 | // delta.transcript = string, transcript added
279 | // delta.arguments = string, function arguments added
280 | }
281 | });
282 |
283 | // only triggered after item added to conversation
284 | client.on('conversation.item.appended', ({ item }) => {
285 | /* item status can be 'in_progress' or 'completed' */
286 | });
287 |
288 | // only triggered after item completed in conversation
289 | // will always be triggered after conversation.item.appended
290 | client.on('conversation.item.completed', ({ item }) => {
291 | /* item status will always be 'completed' */
292 | });
293 | ```
294 |
295 | # Wavtools
296 |
297 | Wavtools contains easy management of PCM16 audio streams in the browser, both
298 | recording and playing.
299 |
300 | ## WavRecorder Quickstart
301 |
302 | ```javascript
303 | import { WavRecorder } from '/src/lib/wavtools/index.js';
304 |
305 | const wavRecorder = new WavRecorder({ sampleRate: 24000 });
306 | wavRecorder.getStatus(); // "ended"
307 |
308 | // request permissions, connect microphone
309 | await wavRecorder.begin();
310 | wavRecorder.getStatus(); // "paused"
311 |
312 | // Start recording
313 | // This callback will be triggered in chunks of 8192 samples by default
314 | // { mono, raw } are Int16Array (PCM16) mono & full channel data
315 | await wavRecorder.record((data) => {
316 | const { mono, raw } = data;
317 | });
318 | wavRecorder.getStatus(); // "recording"
319 |
320 | // Stop recording
321 | await wavRecorder.pause();
322 | wavRecorder.getStatus(); // "paused"
323 |
324 | // outputs "audio/wav" audio file
325 | const audio = await wavRecorder.save();
326 |
327 | // clears current audio buffer and starts recording
328 | await wavRecorder.clear();
329 | await wavRecorder.record();
330 |
331 | // get data for visualization
332 | const frequencyData = wavRecorder.getFrequencies();
333 |
334 | // Stop recording, disconnects microphone, output file
335 | await wavRecorder.pause();
336 | const finalAudio = await wavRecorder.end();
337 |
338 | // Listen for device change; e.g. if somebody disconnects a microphone
339 | // deviceList is array of MediaDeviceInfo[] + `default` property
340 | wavRecorder.listenForDeviceChange((deviceList) => {});
341 | ```
342 |
343 | ## WavStreamPlayer Quickstart
344 |
345 | ```javascript
346 | import { WavStreamPlayer } from '/src/lib/wavtools/index.js';
347 |
348 | const wavStreamPlayer = new WavStreamPlayer({ sampleRate: 24000 });
349 |
350 | // Connect to audio output
351 | await wavStreamPlayer.connect();
352 |
353 | // Create 1s of empty PCM16 audio
354 | const audio = new Int16Array(24000);
355 | // Queue 3s of audio, will start playing immediately
356 | wavStreamPlayer.add16BitPCM(audio, 'my-track');
357 | wavStreamPlayer.add16BitPCM(audio, 'my-track');
358 | wavStreamPlayer.add16BitPCM(audio, 'my-track');
359 |
360 | // get data for visualization
361 | const frequencyData = wavStreamPlayer.getFrequencies();
362 |
363 | // Interrupt the audio (halt playback) at any time
364 | // To restart, need to call .add16BitPCM() again
365 | const trackOffset = await wavStreamPlayer.interrupt();
366 | trackOffset.trackId; // "my-track"
367 | trackOffset.offset; // sample number
368 | trackOffset.currentTime; // time in track
369 | ```
370 |
371 | # Acknowledgements and contact
372 |
373 | Thanks for checking out the Realtime Console. We hope you have fun with the Realtime API.
374 | Special thanks to the whole Realtime API team for making this possible. Please feel free
375 | to reach out, ask questions, or give feedback by creating an issue on the repository.
376 | You can also reach out and let us know what you think directly!
377 |
378 | - OpenAI Developers / [@OpenAIDevs](https://x.com/OpenAIDevs)
379 | - Jordan Sitkin / API / [@dustmason](https://x.com/dustmason)
380 | - Mark Hudnall / API / [@landakram](https://x.com/landakram)
381 | - Peter Bakkum / API / [@pbbakkum](https://x.com/pbbakkum)
382 | - Atty Eleti / API / [@athyuttamre](https://x.com/athyuttamre)
383 | - Jason Clark / API / [@onebitToo](https://x.com/onebitToo)
384 | - Karolis Kosas / Design / [@karoliskosas](https://x.com/karoliskosas)
385 | - Keith Horwood / API + DX / [@keithwhor](https://x.com/keithwhor)
386 | - Romain Huet / DX / [@romainhuet](https://x.com/romainhuet)
387 | - Katia Gil Guzman / DX / [@kagigz](https://x.com/kagigz)
388 | - Ilan Bigio / DX / [@ilanbigio](https://x.com/ilanbigio)
389 | - Kevin Whinnery / DX / [@kevinwhinnery](https://x.com/kevinwhinnery)
390 |
--------------------------------------------------------------------------------
/image/20241006174459.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/easychen/realgpt/d2943085d1dc3787b23c5af07147423df9327102/image/20241006174459.png
--------------------------------------------------------------------------------
/package.json:
--------------------------------------------------------------------------------
1 | {
2 | "name": "openai-realtime-console",
3 | "version": "0.0.0",
4 | "type": "module",
5 | "private": true,
6 | "dependencies": {
7 | "@testing-library/jest-dom": "^5.17.0",
8 | "@testing-library/react": "^13.4.0",
9 | "@testing-library/user-event": "^13.5.0",
10 | "@types/jest": "^27.5.2",
11 | "@types/leaflet": "^1.9.12",
12 | "@types/node": "^16.18.108",
13 | "@types/react": "^18.3.5",
14 | "@types/react-dom": "^18.3.0",
15 | "leaflet": "^1.9.4",
16 | "react": "^18.3.1",
17 | "react-dom": "^18.3.1",
18 | "react-feather": "^2.0.10",
19 | "react-leaflet": "^4.2.1",
20 | "react-scripts": "^5.0.1",
21 | "sass": "^1.78.0",
22 | "save": "^2.9.0",
23 | "typescript": "^4.9.5",
24 | "web-vitals": "^2.1.4",
25 | "ws": "^8.18.0"
26 | },
27 | "scripts": {
28 | "start": "react-scripts start",
29 | "build": "react-scripts build",
30 | "test": "react-scripts test",
31 | "eject": "react-scripts eject",
32 | "zip": "zip -r realtime-api-console.zip . -x 'node_modules' 'node_modules/*' 'node_modules/**' '.git' '.git/*' '.git/**' '.DS_Store' '*/.DS_Store' 'package-lock.json' '*.zip' '*.tar.gz' '*.tar' '.env'",
33 | "relay": "nodemon ./relay-server/index.js"
34 | },
35 | "eslintConfig": {
36 | "extends": [
37 | "react-app",
38 | "react-app/jest"
39 | ]
40 | },
41 | "browserslist": {
42 | "production": [
43 | ">0.2%",
44 | "not dead",
45 | "not op_mini all"
46 | ],
47 | "development": [
48 | "last 1 chrome version",
49 | "last 1 firefox version",
50 | "last 1 safari version"
51 | ]
52 | },
53 | "devDependencies": {
54 | "@babel/plugin-proposal-private-property-in-object": "^7.21.11",
55 | "nodemon": "^3.1.7"
56 | }
57 | }
58 |
--------------------------------------------------------------------------------
/public/index.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 | realtime console
8 |
9 |
13 |
14 |
20 |
25 |
26 |
27 | You need to enable JavaScript to run this app.
28 |
29 |
39 |
40 |
41 |
--------------------------------------------------------------------------------
/public/openai-logomark.svg:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/public/robots.txt:
--------------------------------------------------------------------------------
1 | # https://www.robotstxt.org/robotstxt.html
2 | User-agent: *
3 | Disallow:
4 |
--------------------------------------------------------------------------------
/readme/realtime-console-demo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/easychen/realgpt/d2943085d1dc3787b23c5af07147423df9327102/readme/realtime-console-demo.png
--------------------------------------------------------------------------------
/relay-server/index.js:
--------------------------------------------------------------------------------
1 | import { RealtimeRelay } from './lib/relay.js';
2 | import dotenv from 'dotenv';
3 | dotenv.config({ override: true });
4 |
5 | const OPENAI_API_KEY = process.env.OPENAI_API_KEY;
6 |
7 | if (!OPENAI_API_KEY) {
8 | console.error(
9 | `Environment variable "OPENAI_API_KEY" is required.\n` +
10 | `Please set it in your .env file.`
11 | );
12 | process.exit(1);
13 | }
14 |
15 | const PORT = parseInt(process.env.PORT) || 8081;
16 |
17 | const relay = new RealtimeRelay(OPENAI_API_KEY);
18 | relay.listen(PORT);
19 |
--------------------------------------------------------------------------------
/relay-server/lib/relay.js:
--------------------------------------------------------------------------------
1 | import { WebSocketServer } from 'ws';
2 | import { RealtimeClient } from '../../src/lib/realtime-api-beta/index.js';
3 |
4 | export class RealtimeRelay {
5 | constructor(apiKey) {
6 | this.apiKey = apiKey;
7 | this.sockets = new WeakMap();
8 | this.wss = null;
9 | }
10 |
11 | listen(port) {
12 | this.wss = new WebSocketServer({ port });
13 | this.wss.on('connection', this.connectionHandler.bind(this));
14 | this.log(`Listening on ws://localhost:${port}`);
15 | }
16 |
17 | async connectionHandler(ws, req) {
18 | if (!req.url) {
19 | this.log('No URL provided, closing connection.');
20 | ws.close();
21 | return;
22 | }
23 |
24 | const url = new URL(req.url, `http://${req.headers.host}`);
25 | const pathname = url.pathname;
26 |
27 | if (pathname !== '/') {
28 | this.log(`Invalid pathname: "${pathname}"`);
29 | ws.close();
30 | return;
31 | }
32 |
33 | // Instantiate new client
34 | this.log(`Connecting with key "${this.apiKey.slice(0, 3)}..."`);
35 | const client = new RealtimeClient({ apiKey: this.apiKey });
36 |
37 | // Relay: OpenAI Realtime API Event -> Browser Event
38 | client.realtime.on('server.*', (event) => {
39 | this.log(`Relaying "${event.type}" to Client`);
40 | ws.send(JSON.stringify(event));
41 | });
42 | client.realtime.on('close', () => ws.close());
43 |
44 | // Relay: Browser Event -> OpenAI Realtime API Event
45 | // We need to queue data waiting for the OpenAI connection
46 | const messageQueue = [];
47 | const messageHandler = (data) => {
48 | try {
49 | const event = JSON.parse(data);
50 | this.log(`Relaying "${event.type}" to OpenAI`);
51 | client.realtime.send(event.type, event);
52 | } catch (e) {
53 | console.error(e.message);
54 | this.log(`Error parsing event from client: ${data}`);
55 | }
56 | };
57 | ws.on('message', (data) => {
58 | if (!client.isConnected()) {
59 | messageQueue.push(data);
60 | } else {
61 | messageHandler(data);
62 | }
63 | });
64 | ws.on('close', () => client.disconnect());
65 |
66 | // Connect to OpenAI Realtime API
67 | try {
68 | this.log(`Connecting to OpenAI...`);
69 | await client.connect();
70 | } catch (e) {
71 | this.log(`Error connecting to OpenAI: ${e.message}`);
72 | ws.close();
73 | return;
74 | }
75 | this.log(`Connected to OpenAI successfully!`);
76 | while (messageQueue.length) {
77 | messageHandler(messageQueue.shift());
78 | }
79 | }
80 |
81 | log(...args) {
82 | console.log(`[RealtimeRelay]`, ...args);
83 | }
84 | }
85 |
--------------------------------------------------------------------------------
/src/App.scss:
--------------------------------------------------------------------------------
1 | [data-component='App'] {
2 | height: 100%;
3 | width: 100%;
4 | position: relative;
5 | }
6 |
--------------------------------------------------------------------------------
/src/App.tsx:
--------------------------------------------------------------------------------
1 | import { ConsolePage } from './pages/ConsolePage';
2 | import './App.scss';
3 |
4 | function App() {
5 | return (
6 |
7 |
8 |
9 | );
10 | }
11 |
12 | export default App;
13 |
--------------------------------------------------------------------------------
/src/components/Map.scss:
--------------------------------------------------------------------------------
1 | [data-component='Map'] {
2 | position: absolute;
3 | width: 100%;
4 | height: 100%;
5 | .leaflet-container {
6 | height: 100%;
7 | width: 100%;
8 | }
9 | }
10 |
--------------------------------------------------------------------------------
/src/components/Map.tsx:
--------------------------------------------------------------------------------
1 | import { MapContainer, TileLayer, Marker, Popup, useMap } from 'react-leaflet';
2 | import { LatLngTuple } from 'leaflet';
3 | import './Map.scss';
4 |
5 | function ChangeView({ center, zoom }: { center: LatLngTuple; zoom: number }) {
6 | const map = useMap();
7 | map.setView(center, zoom);
8 | return null;
9 | }
10 |
11 | export function Map({
12 | center,
13 | location = 'My Location',
14 | }: {
15 | center: LatLngTuple;
16 | location?: string;
17 | }) {
18 | return (
19 |
20 |
27 |
28 |
29 |
30 | {location}
31 |
32 |
33 |
34 | );
35 | }
36 |
--------------------------------------------------------------------------------
/src/components/button/Button.scss:
--------------------------------------------------------------------------------
1 | [data-component='Button'] {
2 | display: flex;
3 | align-items: center;
4 | gap: 8px;
5 | font-family: 'Roboto Mono', monospace;
6 | font-size: 12px;
7 | font-optical-sizing: auto;
8 | font-weight: 400;
9 | font-style: normal;
10 | border: none;
11 | background-color: #ececf1;
12 | color: #101010;
13 | border-radius: 1000px;
14 | padding: 8px 24px;
15 | min-height: 42px;
16 | transition: transform 0.1s ease-in-out, background-color 0.1s ease-in-out;
17 | outline: none;
18 |
19 | &.button-style-action {
20 | background-color: #101010;
21 | color: #ececf1;
22 | &:hover:not([disabled]) {
23 | background-color: #404040;
24 | }
25 | }
26 |
27 | &.button-style-alert {
28 | background-color: #f00;
29 | color: #ececf1;
30 | &:hover:not([disabled]) {
31 | background-color: #f00;
32 | }
33 | }
34 |
35 | &.button-style-flush {
36 | background-color: rgba(255, 255, 255, 0);
37 | }
38 |
39 | &[disabled] {
40 | color: #999;
41 | }
42 |
43 | &:not([disabled]) {
44 | cursor: pointer;
45 | }
46 |
47 | &:hover:not([disabled]) {
48 | background-color: #d8d8d8;
49 | }
50 |
51 | &:active:not([disabled]) {
52 | transform: translateY(1px);
53 | }
54 |
55 | .icon {
56 | display: flex;
57 | &.icon-start {
58 | margin-left: -8px;
59 | }
60 | &.icon-end {
61 | margin-right: -8px;
62 | }
63 | svg {
64 | width: 16px;
65 | height: 16px;
66 | }
67 | }
68 |
69 | &.icon-red .icon {
70 | color: #cc0000;
71 | }
72 | &.icon-green .icon {
73 | color: #009900;
74 | }
75 | &.icon-grey .icon {
76 | color: #909090;
77 | }
78 | &.icon-fill {
79 | svg {
80 | fill: currentColor;
81 | }
82 | }
83 | }
84 |
--------------------------------------------------------------------------------
/src/components/button/Button.tsx:
--------------------------------------------------------------------------------
1 | import React from 'react';
2 | import './Button.scss';
3 |
4 | import { Icon } from 'react-feather';
5 |
6 | interface ButtonProps extends React.ButtonHTMLAttributes {
7 | label?: string;
8 | icon?: Icon;
9 | iconPosition?: 'start' | 'end';
10 | iconColor?: 'red' | 'green' | 'grey';
11 | iconFill?: boolean;
12 | buttonStyle?: 'regular' | 'action' | 'alert' | 'flush';
13 | }
14 |
15 | export function Button({
16 | label = 'Okay',
17 | icon = void 0,
18 | iconPosition = 'start',
19 | iconColor = void 0,
20 | iconFill = false,
21 | buttonStyle = 'regular',
22 | ...rest
23 | }: ButtonProps) {
24 | const StartIcon = iconPosition === 'start' ? icon : null;
25 | const EndIcon = iconPosition === 'end' ? icon : null;
26 | const classList = [];
27 | if (iconColor) {
28 | classList.push(`icon-${iconColor}`);
29 | }
30 | if (iconFill) {
31 | classList.push(`icon-fill`);
32 | }
33 | classList.push(`button-style-${buttonStyle}`);
34 |
35 | return (
36 |
37 | {StartIcon && (
38 |
39 |
40 |
41 | )}
42 | {label}
43 | {EndIcon && (
44 |
45 |
46 |
47 | )}
48 |
49 | );
50 | }
51 |
--------------------------------------------------------------------------------
/src/components/toggle/Toggle.scss:
--------------------------------------------------------------------------------
1 | [data-component='Toggle'] {
2 | position: relative;
3 | display: flex;
4 | align-items: center;
5 | gap: 8px;
6 | cursor: pointer;
7 | overflow: hidden;
8 |
9 | background-color: #ececf1;
10 | color: #101010;
11 | height: 40px;
12 | border-radius: 1000px;
13 |
14 | &:hover {
15 | background-color: #d8d8d8;
16 | }
17 |
18 | div.label {
19 | position: relative;
20 | color: #666;
21 | transition: color 0.1s ease-in-out;
22 | padding: 0px 16px;
23 | z-index: 2;
24 | user-select: none;
25 | }
26 |
27 | div.label.right {
28 | margin-left: -8px;
29 | }
30 |
31 | .toggle-background {
32 | background-color: #101010;
33 | position: absolute;
34 | top: 0px;
35 | left: 0px;
36 | width: auto;
37 | bottom: 0px;
38 | z-index: 1;
39 | border-radius: 1000px;
40 | transition: left 0.1s ease-in-out, width 0.1s ease-in-out;
41 | }
42 |
43 | &[data-enabled='true'] {
44 | div.label.right {
45 | color: #fff;
46 | }
47 | }
48 |
49 | &[data-enabled='false'] {
50 | div.label.left {
51 | color: #fff;
52 | }
53 | }
54 | }
55 |
--------------------------------------------------------------------------------
/src/components/toggle/Toggle.tsx:
--------------------------------------------------------------------------------
1 | import { useState, useEffect, useRef } from 'react';
2 |
3 | import './Toggle.scss';
4 |
5 | export function Toggle({
6 | defaultValue = false,
7 | values,
8 | labels,
9 | onChange = () => {},
10 | }: {
11 | defaultValue?: string | boolean;
12 | values?: string[];
13 | labels?: string[];
14 | onChange?: (isEnabled: boolean, value: string) => void;
15 | }) {
16 | if (typeof defaultValue === 'string') {
17 | defaultValue = !!Math.max(0, (values || []).indexOf(defaultValue));
18 | }
19 |
20 | const leftRef = useRef(null);
21 | const rightRef = useRef(null);
22 | const bgRef = useRef(null);
23 | const [value, setValue] = useState(defaultValue);
24 |
25 | const toggleValue = () => {
26 | const v = !value;
27 | const index = +v;
28 | setValue(v);
29 | onChange(v, (values || [])[index]);
30 | };
31 |
32 | useEffect(() => {
33 | const leftEl = leftRef.current;
34 | const rightEl = rightRef.current;
35 | const bgEl = bgRef.current;
36 | if (leftEl && rightEl && bgEl) {
37 | if (value) {
38 | bgEl.style.left = rightEl.offsetLeft + 'px';
39 | bgEl.style.width = rightEl.offsetWidth + 'px';
40 | } else {
41 | bgEl.style.left = '';
42 | bgEl.style.width = leftEl.offsetWidth + 'px';
43 | }
44 | }
45 | }, [value]);
46 |
47 | return (
48 |
53 | {labels && (
54 |
55 | {labels[0]}
56 |
57 | )}
58 | {labels && (
59 |
60 | {labels[1]}
61 |
62 | )}
63 |
64 |
65 | );
66 | }
67 |
--------------------------------------------------------------------------------
/src/index.css:
--------------------------------------------------------------------------------
1 | html,
2 | body {
3 | padding: 0px;
4 | margin: 0px;
5 | position: relative;
6 | width: 100%;
7 | height: 100%;
8 | font-family: 'Assistant', sans-serif;
9 | font-optical-sizing: auto;
10 | font-weight: 400;
11 | font-style: normal;
12 | color: #18181b;
13 | -webkit-font-smoothing: antialiased;
14 | -moz-osx-font-smoothing: grayscale;
15 | }
16 |
17 | #root {
18 | position: relative;
19 | width: 100%;
20 | height: 100%;
21 | }
22 |
--------------------------------------------------------------------------------
/src/index.tsx:
--------------------------------------------------------------------------------
1 | import React from 'react';
2 | import ReactDOM from 'react-dom/client';
3 | import './index.css';
4 | import App from './App';
5 | import reportWebVitals from './reportWebVitals';
6 |
7 | const root = ReactDOM.createRoot(
8 | document.getElementById('root') as HTMLElement
9 | );
10 | root.render(
11 |
12 |
13 |
14 | );
15 |
16 | // If you want to start measuring performance in your app, pass a function
17 | // to log results (for example: reportWebVitals(console.log))
18 | // or send to an analytics endpoint. Learn more: https://bit.ly/CRA-vitals
19 | reportWebVitals();
20 |
--------------------------------------------------------------------------------
/src/lib/realtime-api-beta/dist/index.d.ts:
--------------------------------------------------------------------------------
1 | import { RealtimeAPI } from './lib/api.js';
2 | import { RealtimeConversation } from './lib/conversation.js';
3 | import { RealtimeClient } from './lib/client.js';
4 | import { RealtimeUtils } from './lib/utils.js';
5 | export { RealtimeAPI, RealtimeConversation, RealtimeClient, RealtimeUtils };
6 | //# sourceMappingURL=index.d.ts.map
--------------------------------------------------------------------------------
/src/lib/realtime-api-beta/dist/index.d.ts.map:
--------------------------------------------------------------------------------
1 | {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../index.js"],"names":[],"mappings":"4BAC4B,cAAc;qCACL,uBAAuB;+BAC7B,iBAAiB;8BAHlB,gBAAgB"}
--------------------------------------------------------------------------------
/src/lib/realtime-api-beta/dist/lib/api.d.ts:
--------------------------------------------------------------------------------
1 | export class RealtimeAPI extends RealtimeEventHandler {
2 | /**
3 | * Create a new RealtimeAPI instance
4 | * @param {{url?: string, apiKey?: string, dangerouslyAllowAPIKeyInBrowser?: boolean, debug?: boolean}} [settings]
5 | * @returns {RealtimeAPI}
6 | */
7 | constructor({ url, apiKey, dangerouslyAllowAPIKeyInBrowser, debug }?: {
8 | url?: string;
9 | apiKey?: string;
10 | dangerouslyAllowAPIKeyInBrowser?: boolean;
11 | debug?: boolean;
12 | });
13 | defaultUrl: string;
14 | url: string;
15 | apiKey: string;
16 | debug: boolean;
17 | ws: any;
18 | /**
19 | * Tells us whether or not the WebSocket is connected
20 | * @returns {boolean}
21 | */
22 | isConnected(): boolean;
23 | /**
24 | * Writes WebSocket logs to console
25 | * @param {...any} args
26 | * @returns {true}
27 | */
28 | log(...args: any[]): true;
29 | /**
30 | * Connects to Realtime API Websocket Server
31 | * @param {{model?: string}} [settings]
32 | * @returns {Promise}
33 | */
34 | connect({ model }?: {
35 | model?: string;
36 | }): Promise;
37 | /**
38 | * Disconnects from Realtime API server
39 | * @param {WebSocket} [ws]
40 | * @returns {true}
41 | */
42 | disconnect(ws?: WebSocket): true;
43 | /**
44 | * Receives an event from WebSocket and dispatches as "server.{eventName}" and "server.*" events
45 | * @param {string} eventName
46 | * @param {{[key: string]: any}} event
47 | * @returns {true}
48 | */
49 | receive(eventName: string, event: {
50 | [key: string]: any;
51 | }): true;
52 | /**
53 | * Sends an event to WebSocket and dispatches as "client.{eventName}" and "client.*" events
54 | * @param {string} eventName
55 | * @param {{[key: string]: any}} event
56 | * @returns {true}
57 | */
58 | send(eventName: string, data: any): true;
59 | }
60 | import { RealtimeEventHandler } from './event_handler.js';
61 | //# sourceMappingURL=api.d.ts.map
--------------------------------------------------------------------------------
/src/lib/realtime-api-beta/dist/lib/api.d.ts.map:
--------------------------------------------------------------------------------
1 | {"version":3,"file":"api.d.ts","sourceRoot":"","sources":["../../lib/api.js"],"names":[],"mappings":"AAGA;IACE;;;;OAIG;IACH,sEAHW;QAAC,GAAG,CAAC,EAAE,MAAM,CAAC;QAAC,MAAM,CAAC,EAAE,MAAM,CAAC;QAAC,+BAA+B,CAAC,EAAE,OAAO,CAAC;QAAC,KAAK,CAAC,EAAE,OAAO,CAAA;KAAC,EAiBrG;IAZC,mBAAoD;IACpD,YAAiC;IACjC,eAA4B;IAC5B,eAAoB;IACpB,QAAc;IAUhB;;;OAGG;IACH,eAFa,OAAO,CAInB;IAED;;;;OAIG;IACH,aAHe,GAAG,EAAA,GACL,IAAI,CAehB;IAED;;;;OAIG;IACH,oBAHW;QAAC,KAAK,CAAC,EAAE,MAAM,CAAA;KAAC,GACd,OAAO,CAAC,IAAI,CAAC,CAkGzB;IAED;;;;OAIG;IACH,gBAHW,SAAS,GACP,IAAI,CAQhB;IAED;;;;;OAKG;IACH,mBAJW,MAAM,SACN;QAAC,CAAC,GAAG,EAAE,MAAM,GAAG,GAAG,CAAA;KAAC,GAClB,IAAI,CAOhB;IAED;;;;;OAKG;IACH,gBAJW,MAAM,cAEJ,IAAI,CAoBhB;CACF;qCA/MoC,oBAAoB"}
--------------------------------------------------------------------------------
/src/lib/realtime-api-beta/dist/lib/client.d.ts:
--------------------------------------------------------------------------------
1 | /**
2 | * Valid audio formats
3 | * @typedef {"pcm16"|"g711-ulaw"|"g711-alaw"} AudioFormatType
4 | */
5 | /**
6 | * @typedef {Object} AudioTranscriptionType
7 | * @property {boolean} [enabled]
8 | * @property {"whisper-1"} model
9 | */
10 | /**
11 | * @typedef {Object} TurnDetectionServerVadType
12 | * @property {"server_vad"} type
13 | * @property {number} [threshold]
14 | * @property {number} [prefix_padding_ms]
15 | * @property {number} [silence_duration_ms]
16 | */
17 | /**
18 | * Tool definitions
19 | * @typedef {Object} ToolDefinitionType
20 | * @property {"function"} [type]
21 | * @property {string} name
22 | * @property {string} description
23 | * @property {{[key: string]: any}} parameters
24 | */
25 | /**
26 | * @typedef {Object} SessionResourceType
27 | * @property {string} [model]
28 | * @property {string[]} [modalities]
29 | * @property {string} [instructions]
30 | * @property {"alloy"|"shimmer"|"echo"} [voice]
31 | * @property {AudioFormatType} [input_audio_format]
32 | * @property {AudioFormatType} [output_audio_format]
33 | * @property {AudioTranscriptionType|null} [input_audio_transcription]
34 | * @property {TurnDetectionServerVadType|null} [turn_detection]
35 | * @property {ToolDefinitionType[]} [tools]
36 | * @property {"auto"|"none"|"required"|{type:"function",name:string}} [tool_choice]
37 | * @property {number} [temperature]
38 | * @property {number|"inf"} [max_response_output_tokens]
39 | */
40 | /**
41 | * @typedef {"in_progress"|"completed"|"incomplete"} ItemStatusType
42 | */
43 | /**
44 | * @typedef {Object} InputTextContentType
45 | * @property {"input_text"} type
46 | * @property {string} text
47 | */
48 | /**
49 | * @typedef {Object} InputAudioContentType
50 | * @property {"input_audio"} type
51 | * @property {string} [audio] base64-encoded audio data
52 | * @property {string|null} [transcript]
53 | */
54 | /**
55 | * @typedef {Object} TextContentType
56 | * @property {"text"} type
57 | * @property {string} text
58 | */
59 | /**
60 | * @typedef {Object} AudioContentType
61 | * @property {"audio"} type
62 | * @property {string} [audio] base64-encoded audio data
63 | * @property {string|null} [transcript]
64 | */
65 | /**
66 | * @typedef {Object} SystemItemType
67 | * @property {string|null} [previous_item_id]
68 | * @property {"message"} type
69 | * @property {ItemStatusType} status
70 | * @property {"system"} role
71 | * @property {Array} content
72 | */
73 | /**
74 | * @typedef {Object} UserItemType
75 | * @property {string|null} [previous_item_id]
76 | * @property {"message"} type
77 | * @property {ItemStatusType} status
78 | * @property {"system"} role
79 | * @property {Array} content
80 | */
81 | /**
82 | * @typedef {Object} AssistantItemType
83 | * @property {string|null} [previous_item_id]
84 | * @property {"message"} type
85 | * @property {ItemStatusType} status
86 | * @property {"assistant"} role
87 | * @property {Array} content
88 | */
89 | /**
90 | * @typedef {Object} FunctionCallItemType
91 | * @property {string|null} [previous_item_id]
92 | * @property {"function_call"} type
93 | * @property {ItemStatusType} status
94 | * @property {string} call_id
95 | * @property {string} name
96 | * @property {string} arguments
97 | */
98 | /**
99 | * @typedef {Object} FunctionCallOutputItemType
100 | * @property {string|null} [previous_item_id]
101 | * @property {"function_call_output"} type
102 | * @property {string} call_id
103 | * @property {string} output
104 | */
105 | /**
106 | * @typedef {Object} FormattedToolType
107 | * @property {"function"} type
108 | * @property {string} name
109 | * @property {string} call_id
110 | * @property {string} arguments
111 | */
112 | /**
113 | * @typedef {Object} FormattedPropertyType
114 | * @property {Int16Array} [audio]
115 | * @property {string} [text]
116 | * @property {string} [transcript]
117 | * @property {FormattedToolType} [tool]
118 | * @property {string} [output]
119 | * @property {any} [file]
120 | */
121 | /**
122 | * @typedef {Object} FormattedItemType
123 | * @property {string} id
124 | * @property {string} object
125 | * @property {"user"|"assistant"|"system"} [role]
126 | * @property {FormattedPropertyType} formatted
127 | */
128 | /**
129 | * @typedef {SystemItemType|UserItemType|AssistantItemType|FunctionCallItemType|FunctionCallOutputItemType} BaseItemType
130 | */
131 | /**
132 | * @typedef {FormattedItemType & BaseItemType} ItemType
133 | */
134 | /**
135 | * @typedef {Object} IncompleteResponseStatusType
136 | * @property {"incomplete"} type
137 | * @property {"interruption"|"max_output_tokens"|"content_filter"} reason
138 | */
139 | /**
140 | * @typedef {Object} FailedResponseStatusType
141 | * @property {"failed"} type
142 | * @property {{code: string, message: string}|null} error
143 | */
144 | /**
145 | * @typedef {Object} UsageType
146 | * @property {number} total_tokens
147 | * @property {number} input_tokens
148 | * @property {number} output_tokens
149 | */
150 | /**
151 | * @typedef {Object} ResponseResourceType
152 | * @property {"in_progress"|"completed"|"incomplete"|"cancelled"|"failed"} status
153 | * @property {IncompleteResponseStatusType|FailedResponseStatusType|null} status_details
154 | * @property {ItemType[]} output
155 | * @property {UsageType|null} usage
156 | */
157 | /**
158 | * RealtimeClient Class
159 | * @class
160 | */
161 | export class RealtimeClient extends RealtimeEventHandler {
162 | /**
163 | * Create a new RealtimeClient instance
164 | * @param {{url?: string, apiKey?: string, dangerouslyAllowAPIKeyInBrowser?: boolean, debug?: boolean}} [settings]
165 | */
166 | constructor({ url, apiKey, dangerouslyAllowAPIKeyInBrowser, debug }?: {
167 | url?: string;
168 | apiKey?: string;
169 | dangerouslyAllowAPIKeyInBrowser?: boolean;
170 | debug?: boolean;
171 | });
172 | defaultSessionConfig: {
173 | modalities: string[];
174 | instructions: string;
175 | voice: string;
176 | input_audio_format: string;
177 | output_audio_format: string;
178 | input_audio_transcription: any;
179 | turn_detection: any;
180 | tools: any[];
181 | tool_choice: string;
182 | temperature: number;
183 | max_response_output_tokens: number;
184 | };
185 | sessionConfig: {};
186 | transcriptionModels: {
187 | model: string;
188 | }[];
189 | defaultServerVadConfig: {
190 | type: string;
191 | threshold: number;
192 | prefix_padding_ms: number;
193 | silence_duration_ms: number;
194 | };
195 | realtime: RealtimeAPI;
196 | conversation: RealtimeConversation;
197 | /**
198 | * Resets sessionConfig and conversationConfig to default
199 | * @private
200 | * @returns {true}
201 | */
202 | private _resetConfig;
203 | sessionCreated: boolean;
204 | tools: {};
205 | inputAudioBuffer: any;
206 | /**
207 | * Sets up event handlers for a fully-functional application control flow
208 | * @private
209 | * @returns {true}
210 | */
211 | private _addAPIEventHandlers;
212 | /**
213 | * Tells us whether the realtime socket is connected and the session has started
214 | * @returns {boolean}
215 | */
216 | isConnected(): boolean;
217 | /**
218 | * Resets the client instance entirely: disconnects and clears active config
219 | * @returns {true}
220 | */
221 | reset(): true;
222 | /**
223 | * Connects to the Realtime WebSocket API
224 | * Updates session config and conversation config
225 | * @returns {Promise}
226 | */
227 | connect(): Promise;
228 | /**
229 | * Waits for a session.created event to be executed before proceeding
230 | * @returns {Promise}
231 | */
232 | waitForSessionCreated(): Promise;
233 | /**
234 | * Disconnects from the Realtime API and clears the conversation history
235 | */
236 | disconnect(): void;
237 | /**
238 | * Gets the active turn detection mode
239 | * @returns {"server_vad"|null}
240 | */
241 | getTurnDetectionType(): "server_vad" | null;
242 | /**
243 | * Add a tool and handler
244 | * @param {ToolDefinitionType} definition
245 | * @param {function} handler
246 | * @returns {{definition: ToolDefinitionType, handler: function}}
247 | */
248 | addTool(definition: ToolDefinitionType, handler: Function): {
249 | definition: ToolDefinitionType;
250 | handler: Function;
251 | };
252 | /**
253 | * Removes a tool
254 | * @param {string} name
255 | * @returns {true}
256 | */
257 | removeTool(name: string): true;
258 | /**
259 | * Deletes an item
260 | * @param {string} id
261 | * @returns {true}
262 | */
263 | deleteItem(id: string): true;
264 | /**
265 | * Updates session configuration
266 | * If the client is not yet connected, will save details and instantiate upon connection
267 | * @param {SessionResourceType} [sessionConfig]
268 | */
269 | updateSession({ modalities, instructions, voice, input_audio_format, output_audio_format, input_audio_transcription, turn_detection, tools, tool_choice, temperature, max_response_output_tokens, }?: SessionResourceType): boolean;
270 | /**
271 | * Sends user message content and generates a response
272 | * @param {Array} content
273 | * @returns {true}
274 | */
275 | sendUserMessageContent(content?: Array): true;
276 | /**
277 | * Appends user audio to the existing audio buffer
278 | * @param {Int16Array|ArrayBuffer} arrayBuffer
279 | * @returns {true}
280 | */
281 | appendInputAudio(arrayBuffer: Int16Array | ArrayBuffer): true;
282 | /**
283 | * Forces a model response generation
284 | * @returns {true}
285 | */
286 | createResponse(): true;
287 | /**
288 | * Cancels the ongoing server generation and truncates ongoing generation, if applicable
289 | * If no id provided, will simply call `cancel_generation` command
290 | * @param {string} id The id of the message to cancel
291 | * @param {number} [sampleCount] The number of samples to truncate past for the ongoing generation
292 | * @returns {{item: (AssistantItemType | null)}}
293 | */
294 | cancelResponse(id: string, sampleCount?: number): {
295 | item: (AssistantItemType | null);
296 | };
297 | /**
298 | * Utility for waiting for the next `conversation.item.appended` event to be triggered by the server
299 | * @returns {Promise<{item: ItemType}>}
300 | */
301 | waitForNextItem(): Promise<{
302 | item: ItemType;
303 | }>;
304 | /**
305 | * Utility for waiting for the next `conversation.item.completed` event to be triggered by the server
306 | * @returns {Promise<{item: ItemType}>}
307 | */
308 | waitForNextCompletedItem(): Promise<{
309 | item: ItemType;
310 | }>;
311 | }
312 | /**
313 | * Valid audio formats
314 | */
315 | export type AudioFormatType = "pcm16" | "g711-ulaw" | "g711-alaw";
316 | export type AudioTranscriptionType = {
317 | enabled?: boolean;
318 | model: "whisper-1";
319 | };
320 | export type TurnDetectionServerVadType = {
321 | type: "server_vad";
322 | threshold?: number;
323 | prefix_padding_ms?: number;
324 | silence_duration_ms?: number;
325 | };
326 | /**
327 | * Tool definitions
328 | */
329 | export type ToolDefinitionType = {
330 | type?: "function";
331 | name: string;
332 | description: string;
333 | parameters: {
334 | [key: string]: any;
335 | };
336 | };
337 | export type SessionResourceType = {
338 | model?: string;
339 | modalities?: string[];
340 | instructions?: string;
341 | voice?: "alloy" | "shimmer" | "echo";
342 | input_audio_format?: AudioFormatType;
343 | output_audio_format?: AudioFormatType;
344 | input_audio_transcription?: AudioTranscriptionType | null;
345 | turn_detection?: TurnDetectionServerVadType | null;
346 | tools?: ToolDefinitionType[];
347 | tool_choice?: "auto" | "none" | "required" | {
348 | type: "function";
349 | name: string;
350 | };
351 | temperature?: number;
352 | max_response_output_tokens?: number | "inf";
353 | };
354 | export type ItemStatusType = "in_progress" | "completed" | "incomplete";
355 | export type InputTextContentType = {
356 | type: "input_text";
357 | text: string;
358 | };
359 | export type InputAudioContentType = {
360 | type: "input_audio";
361 | /**
362 | * base64-encoded audio data
363 | */
364 | audio?: string;
365 | transcript?: string | null;
366 | };
367 | export type TextContentType = {
368 | type: "text";
369 | text: string;
370 | };
371 | export type AudioContentType = {
372 | type: "audio";
373 | /**
374 | * base64-encoded audio data
375 | */
376 | audio?: string;
377 | transcript?: string | null;
378 | };
379 | export type SystemItemType = {
380 | previous_item_id?: string | null;
381 | type: "message";
382 | status: ItemStatusType;
383 | role: "system";
384 | content: Array;
385 | };
386 | export type UserItemType = {
387 | previous_item_id?: string | null;
388 | type: "message";
389 | status: ItemStatusType;
390 | role: "system";
391 | content: Array;
392 | };
393 | export type AssistantItemType = {
394 | previous_item_id?: string | null;
395 | type: "message";
396 | status: ItemStatusType;
397 | role: "assistant";
398 | content: Array;
399 | };
400 | export type FunctionCallItemType = {
401 | previous_item_id?: string | null;
402 | type: "function_call";
403 | status: ItemStatusType;
404 | call_id: string;
405 | name: string;
406 | arguments: string;
407 | };
408 | export type FunctionCallOutputItemType = {
409 | previous_item_id?: string | null;
410 | type: "function_call_output";
411 | call_id: string;
412 | output: string;
413 | };
414 | export type FormattedToolType = {
415 | type: "function";
416 | name: string;
417 | call_id: string;
418 | arguments: string;
419 | };
420 | export type FormattedPropertyType = {
421 | audio?: Int16Array;
422 | text?: string;
423 | transcript?: string;
424 | tool?: FormattedToolType;
425 | output?: string;
426 | file?: any;
427 | };
428 | export type FormattedItemType = {
429 | id: string;
430 | object: string;
431 | role?: "user" | "assistant" | "system";
432 | formatted: FormattedPropertyType;
433 | };
434 | export type BaseItemType = SystemItemType | UserItemType | AssistantItemType | FunctionCallItemType | FunctionCallOutputItemType;
435 | export type ItemType = FormattedItemType & BaseItemType;
436 | export type IncompleteResponseStatusType = {
437 | type: "incomplete";
438 | reason: "interruption" | "max_output_tokens" | "content_filter";
439 | };
440 | export type FailedResponseStatusType = {
441 | type: "failed";
442 | error: {
443 | code: string;
444 | message: string;
445 | } | null;
446 | };
447 | export type UsageType = {
448 | total_tokens: number;
449 | input_tokens: number;
450 | output_tokens: number;
451 | };
452 | export type ResponseResourceType = {
453 | status: "in_progress" | "completed" | "incomplete" | "cancelled" | "failed";
454 | status_details: IncompleteResponseStatusType | FailedResponseStatusType | null;
455 | output: ItemType[];
456 | usage: UsageType | null;
457 | };
458 | import { RealtimeEventHandler } from './event_handler.js';
459 | import { RealtimeAPI } from './api.js';
460 | import { RealtimeConversation } from './conversation.js';
461 | //# sourceMappingURL=client.d.ts.map
--------------------------------------------------------------------------------
/src/lib/realtime-api-beta/dist/lib/client.d.ts.map:
--------------------------------------------------------------------------------
1 | {"version":3,"file":"client.d.ts","sourceRoot":"","sources":["../../lib/client.js"],"names":[],"mappings":"AAKA;;;GAGG;AAEH;;;;GAIG;AAEH;;;;;;GAMG;AAEH;;;;;;;GAOG;AAEH;;;;;;;;;;;;;;GAcG;AAEH;;GAEG;AAEH;;;;GAIG;AAEH;;;;;GAKG;AAEH;;;;GAIG;AAEH;;;;;GAKG;AAEH;;;;;;;GAOG;AAEH;;;;;;;GAOG;AAEH;;;;;;;GAOG;AAEH;;;;;;;;GAQG;AAEH;;;;;;GAMG;AAEH;;;;;;GAMG;AAEH;;;;;;;;GAQG;AAEH;;;;;;GAMG;AAEH;;GAEG;AAEH;;GAEG;AAEH;;;;GAIG;AAEH;;;;GAIG;AAEH;;;;;GAKG;AAEH;;;;;;GAMG;AAEH;;;GAGG;AACH;IACE;;;OAGG;IACH,sEAFW;QAAC,GAAG,CAAC,EAAE,MAAM,CAAC;QAAC,MAAM,CAAC,EAAE,MAAM,CAAC;QAAC,+BAA+B,CAAC,EAAE,OAAO,CAAC;QAAC,KAAK,CAAC,EAAE,OAAO,CAAA;KAAC,EAsCrG;IAlCC;;;;;;;;;;;;MAYC;IACD,kBAAuB;IACvB;;QAIC;IACD;;;;;MAKC;IACD,sBAKE;IACF,mCAA8C;IAKhD;;;;OAIG;IACH,qBAMC;IALC,wBAA2B;IAC3B,UAAe;IAEf,sBAAyC;IAI3C;;;;OAIG;IACH,6BAkHC;IAED;;;OAGG;IACH,eAFa,OAAO,CAInB;IAED;;;OAGG;IACH,SAFa,IAAI,CAShB;IAED;;;;OAIG;IACH,WAFa,OAAO,CAAC,IAAI,CAAC,CASzB;IAED;;;OAGG;IACH,yBAFa,OAAO,CAAC,IAAI,CAAC,CAUzB;IAED;;OAEG;IACH,mBAIC;IAED;;;OAGG;IACH,wBAFa,YAAY,GAAC,IAAI,CAI7B;IAED;;;;;OAKG;IACH,oBAJW,kBAAkB,sBAEhB;QAAC,UAAU,EAAE,kBAAkB,CAAC;QAAC,OAAO,WAAU;KAAC,CAkB/D;IAED;;;;OAIG;IACH,iBAHW,MAAM,GACJ,IAAI,CAQhB;IAED;;;;OAIG;IACH,eAHW,MAAM,GACJ,IAAI,CAKhB;IAED;;;;OAIG;IACH,sMAFW,mBAAmB,WA4D7B;IAED;;;;OAIG;IACH,iCAHW,KAAK,CAAC,oBAAoB,GAAC,qBAAqB,CAAC,GAC/C,IAAI,CAqBhB;IAED;;;;OAIG;IACH,8BAHW,UAAU,GAAC,WAAW,GACpB,IAAI,CAahB;IAED;;;OAGG;IACH,kBAFa,IAAI,CAahB;IAED;;;;;;OAMG;IACH,mBAJW,MAAM,gBACN,MAAM,GACJ;QAAC,IAAI,EAAE,CAAC,iBAAiB,GAAG,IAAI,CAAC,CAAA;KAAC,CAgC9C;IAED;;;OAGG;IACH,mBAFa,OAAO,CAAC;QAAC,IAAI,EAAE,QAAQ,CAAA;KAAC,CAAC,CAMrC;IAED;;;OAGG;IACH,4BAFa,OAAO,CAAC;QAAC,IAAI,EAAE,QAAQ,CAAA;KAAC,CAAC,CAMrC;CACF;;;;8BAhpBY,OAAO,GAAC,WAAW,GAAC,WAAW;;cAK9B,OAAO;WACP,WAAW;;;UAKX,YAAY;gBACZ,MAAM;wBACN,MAAM;0BACN,MAAM;;;;;;WAMN,UAAU;UACV,MAAM;iBACN,MAAM;gBACN;QAAC,CAAC,GAAG,EAAE,MAAM,GAAG,GAAG,CAAA;KAAC;;;YAKpB,MAAM;iBACN,MAAM,EAAE;mBACR,MAAM;YACN,OAAO,GAAC,SAAS,GAAC,MAAM;yBACxB,eAAe;0BACf,eAAe;gCACf,sBAAsB,GAAC,IAAI;qBAC3B,0BAA0B,GAAC,IAAI;YAC/B,kBAAkB,EAAE;kBACpB,MAAM,GAAC,MAAM,GAAC,UAAU,GAAC;QAAC,IAAI,EAAC,UAAU,CAAC;QAAA,IAAI,EAAC,MAAM,CAAA;KAAC;kBACtD,MAAM;iCACN,MAAM,GAAC,KAAK;;6BAIb,aAAa,GAAC,WAAW,GAAC,YAAY;;UAKrC,YAAY;UACZ,MAAM;;;UAKN,aAAa;;;;YACb,MAAM;iBACN,MAAM,GAAC,IAAI;;;UAKX,MAAM;UACN,MAAM;;;UAKN,OAAO;;;;YACP,MAAM;iBACN,MAAM,GAAC,IAAI;;;uBAKX,MAAM,GAAC,IAAI;UACX,SAAS;YACT,cAAc;UACd,QAAQ;aACR,KAAK,CAAC,oBAAoB,CAAC;;;uBAK3B,MAAM,GAAC,IAAI;UACX,SAAS;YACT,cAAc;UACd,QAAQ;aACR,KAAK,CAAC,oBAAoB,GAAC,qBAAqB,CAAC;;;uBAKjD,MAAM,GAAC,IAAI;UACX,SAAS;YACT,cAAc;UACd,WAAW;aACX,KAAK,CAAC,eAAe,GAAC,gBAAgB,CAAC;;;uBAKvC,MAAM,GAAC,IAAI;UACX,eAAe;YACf,cAAc;aACd,MAAM;UACN,MAAM;eACN,MAAM;;;uBAKN,MAAM,GAAC,IAAI;UACX,sBAAsB;aACtB,MAAM;YACN,MAAM;;;UAKN,UAAU;UACV,MAAM;aACN,MAAM;eACN,MAAM;;;YAKN,UAAU;WACV,MAAM;iBACN,MAAM;WACN,iBAAiB;aACjB,MAAM;WACN,GAAG;;;QAKH,MAAM;YACN,MAAM;WACN,MAAM,GAAC,WAAW,GAAC,QAAQ;eAC3B,qBAAqB;;2BAItB,cAAc,GAAC,YAAY,GAAC,iBAAiB,GAAC,oBAAoB,GAAC,0BAA0B;uBAI7F,iBAAiB,GAAG,YAAY;;UAK/B,YAAY;YACZ,cAAc,GAAC,mBAAmB,GAAC,gBAAgB;;;UAKnD,QAAQ;WACR;QAAC,IAAI,EAAE,MAAM,CAAC;QAAC,OAAO,EAAE,MAAM,CAAA;KAAC,GAAC,IAAI;;;kBAKpC,MAAM;kBACN,MAAM;mBACN,MAAM;;;YAKN,aAAa,GAAC,WAAW,GAAC,YAAY,GAAC,WAAW,GAAC,QAAQ;oBAC3D,4BAA4B,GAAC,wBAAwB,GAAC,IAAI;YAC1D,QAAQ,EAAE;WACV,SAAS,GAAC,IAAI;;qCAtLS,oBAAoB;4BAC7B,UAAU;qCACD,mBAAmB"}
--------------------------------------------------------------------------------
/src/lib/realtime-api-beta/dist/lib/conversation.d.ts:
--------------------------------------------------------------------------------
1 | /**
2 | * Contains text and audio information about a item
3 | * Can also be used as a delta
4 | * @typedef {Object} ItemContentDeltaType
5 | * @property {string} [text]
6 | * @property {Int16Array} [audio]
7 | * @property {string} [arguments]
8 | * @property {string} [transcript]
9 | */
10 | /**
11 | * RealtimeConversation holds conversation history
12 | * and performs event validation for RealtimeAPI
13 | * @class
14 | */
15 | export class RealtimeConversation {
16 | defaultFrequency: number;
17 | EventProcessors: {
18 | 'conversation.item.created': (event: any) => {
19 | item: any;
20 | delta: any;
21 | };
22 | 'conversation.item.truncated': (event: any) => {
23 | item: any;
24 | delta: any;
25 | };
26 | 'conversation.item.deleted': (event: any) => {
27 | item: any;
28 | delta: any;
29 | };
30 | 'conversation.item.input_audio_transcription.completed': (event: any) => {
31 | item: any;
32 | delta: {
33 | transcript: any;
34 | };
35 | };
36 | 'input_audio_buffer.speech_started': (event: any) => {
37 | item: any;
38 | delta: any;
39 | };
40 | 'input_audio_buffer.speech_stopped': (event: any, inputAudioBuffer: any) => {
41 | item: any;
42 | delta: any;
43 | };
44 | 'response.created': (event: any) => {
45 | item: any;
46 | delta: any;
47 | };
48 | 'response.output_item.added': (event: any) => {
49 | item: any;
50 | delta: any;
51 | };
52 | 'response.output_item.done': (event: any) => {
53 | item: any;
54 | delta: any;
55 | };
56 | 'response.content_part.added': (event: any) => {
57 | item: any;
58 | delta: any;
59 | };
60 | 'response.audio_transcript.delta': (event: any) => {
61 | item: any;
62 | delta: {
63 | transcript: any;
64 | };
65 | };
66 | 'response.audio.delta': (event: any) => {
67 | item: any;
68 | delta: {
69 | audio: Int16Array;
70 | };
71 | };
72 | 'response.text.delta': (event: any) => {
73 | item: any;
74 | delta: {
75 | text: any;
76 | };
77 | };
78 | 'response.function_call_arguments.delta': (event: any) => {
79 | item: any;
80 | delta: {
81 | arguments: any;
82 | };
83 | };
84 | };
85 | queuedInputAudio: Int16Array;
86 | /**
87 | * Clears the conversation history and resets to default
88 | * @returns {true}
89 | */
90 | clear(): true;
91 | itemLookup: {};
92 | items: any[];
93 | responseLookup: {};
94 | responses: any[];
95 | queuedSpeechItems: {};
96 | queuedTranscriptItems: {};
97 | /**
98 | * Queue input audio for manual speech event
99 | * @param {Int16Array} inputAudio
100 | * @returns {Int16Array}
101 | */
102 | queueInputAudio(inputAudio: Int16Array): Int16Array;
103 | /**
104 | * Process an event from the WebSocket server and compose items
105 | * @param {Object} event
106 | * @param {...any} args
107 | * @returns {item: import('./client.js').ItemType | null, delta: ItemContentDeltaType | null}
108 | */
109 | processEvent(event: any, ...args: any[]): item;
110 | /**
111 | * Retrieves a item by id
112 | * @param {string} id
113 | * @returns {import('./client.js').ItemType}
114 | */
115 | getItem(id: string): import("./client.js").ItemType;
116 | /**
117 | * Retrieves all items in the conversation
118 | * @returns {import('./client.js').ItemType[]}
119 | */
120 | getItems(): import("./client.js").ItemType[];
121 | }
122 | /**
123 | * Contains text and audio information about a item
124 | * Can also be used as a delta
125 | */
126 | export type ItemContentDeltaType = {
127 | text?: string;
128 | audio?: Int16Array;
129 | arguments?: string;
130 | transcript?: string;
131 | };
132 | //# sourceMappingURL=conversation.d.ts.map
--------------------------------------------------------------------------------
/src/lib/realtime-api-beta/dist/lib/conversation.d.ts.map:
--------------------------------------------------------------------------------
1 | {"version":3,"file":"conversation.d.ts","sourceRoot":"","sources":["../../lib/conversation.js"],"names":[],"mappings":"AAEA;;;;;;;;GAQG;AAEH;;;;GAIG;AACH;IACE,yBAA0B;IAE1B;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;MAuNE;IAlLQ,6BAA4B;IA4LtC;;;OAGG;IACH,SAFa,IAAI,CAWhB;IARC,eAAoB;IACpB,aAAe;IACf,mBAAwB;IACxB,iBAAmB;IACnB,sBAA2B;IAC3B,0BAA+B;IAKjC;;;;OAIG;IACH,4BAHW,UAAU,GACR,UAAU,CAKtB;IAED;;;;;OAKG;IACH,kCAHe,GAAG,EAAA,GACL,IAAI,CAkBhB;IAED;;;;OAIG;IACH,YAHW,MAAM,GACJ,OAAO,aAAa,EAAE,QAAQ,CAI1C;IAED;;;OAGG;IACH,YAFa,OAAO,aAAa,EAAE,QAAQ,EAAE,CAI5C;CACF;;;;;;WAhTa,MAAM;YACN,UAAU;gBACV,MAAM;iBACN,MAAM"}
--------------------------------------------------------------------------------
/src/lib/realtime-api-beta/dist/lib/event_handler.d.ts:
--------------------------------------------------------------------------------
1 | /**
2 | * Inherited class for RealtimeAPI and RealtimeClient
3 | * Adds basic event handling
4 | * @class
5 | */
6 | export class RealtimeEventHandler {
7 | eventHandlers: {};
8 | nextEventHandlers: {};
9 | /**
10 | * Clears all event handlers
11 | * @returns {true}
12 | */
13 | clearEventHandlers(): true;
14 | /**
15 | * Listen to specific events
16 | * @param {string} eventName The name of the event to listen to
17 | * @param {EventHandlerCallbackType} callback Code to execute on event
18 | * @returns {EventHandlerCallbackType}
19 | */
20 | on(eventName: string, callback: EventHandlerCallbackType): EventHandlerCallbackType;
21 | /**
22 | * Listen for the next event of a specified type
23 | * @param {string} eventName The name of the event to listen to
24 | * @param {EventHandlerCallbackType} callback Code to execute on event
25 | * @returns {EventHandlerCallbackType}
26 | */
27 | onNext(eventName: string, callback: EventHandlerCallbackType): EventHandlerCallbackType;
28 | /**
29 | * Turns off event listening for specific events
30 | * Calling without a callback will remove all listeners for the event
31 | * @param {string} eventName
32 | * @param {EventHandlerCallbackType} [callback]
33 | * @returns {true}
34 | */
35 | off(eventName: string, callback?: EventHandlerCallbackType): true;
36 | /**
37 | * Turns off event listening for the next event of a specific type
38 | * Calling without a callback will remove all listeners for the next event
39 | * @param {string} eventName
40 | * @param {EventHandlerCallbackType} [callback]
41 | * @returns {true}
42 | */
43 | offNext(eventName: string, callback?: EventHandlerCallbackType): true;
44 | /**
45 | * Waits for next event of a specific type and returns the payload
46 | * @param {string} eventName
47 | * @param {number|null} [timeout]
48 | * @returns {Promise<{[key: string]: any}|null>}
49 | */
50 | waitForNext(eventName: string, timeout?: number | null): Promise<{
51 | [key: string]: any;
52 | } | null>;
53 | /**
54 | * Executes all events in the order they were added, with .on() event handlers executing before .onNext() handlers
55 | * @param {string} eventName
56 | * @param {any} event
57 | * @returns {true}
58 | */
59 | dispatch(eventName: string, event: any): true;
60 | }
61 | //# sourceMappingURL=event_handler.d.ts.map
--------------------------------------------------------------------------------
/src/lib/realtime-api-beta/dist/lib/event_handler.d.ts.map:
--------------------------------------------------------------------------------
1 | {"version":3,"file":"event_handler.d.ts","sourceRoot":"","sources":["../../lib/event_handler.js"],"names":[],"mappings":"AAOA;;;;GAIG;AACH;IAMI,kBAAuB;IACvB,sBAA2B;IAG7B;;;OAGG;IACH,sBAFa,IAAI,CAMhB;IAED;;;;;OAKG;IACH,cAJW,MAAM,YACN,wBAAwB,GACtB,wBAAwB,CAMpC;IAED;;;;;OAKG;IACH,kBAJW,MAAM,YACN,wBAAwB,GACtB,wBAAwB,CAKpC;IAED;;;;;;OAMG;IACH,eAJW,MAAM,aACN,wBAAwB,GACtB,IAAI,CAgBhB;IAED;;;;;;OAMG;IACH,mBAJW,MAAM,aACN,wBAAwB,GACtB,IAAI,CAgBhB;IAED;;;;;OAKG;IACH,uBAJW,MAAM,YACN,MAAM,GAAC,IAAI,GACT,OAAO,CAAC;QAAC,CAAC,GAAG,EAAE,MAAM,GAAG,GAAG,CAAA;KAAC,GAAC,IAAI,CAAC,CAgB9C;IAED;;;;;OAKG;IACH,oBAJW,MAAM,SACN,GAAG,GACD,IAAI,CAahB;CACF"}
--------------------------------------------------------------------------------
/src/lib/realtime-api-beta/dist/lib/utils.d.ts:
--------------------------------------------------------------------------------
1 | /**
2 | * Basic utilities for the RealtimeAPI
3 | * @class
4 | */
5 | export class RealtimeUtils {
6 | /**
7 | * Converts Float32Array of amplitude data to ArrayBuffer in Int16Array format
8 | * @param {Float32Array} float32Array
9 | * @returns {ArrayBuffer}
10 | */
11 | static floatTo16BitPCM(float32Array: Float32Array): ArrayBuffer;
12 | /**
13 | * Converts a base64 string to an ArrayBuffer
14 | * @param {string} base64
15 | * @returns {ArrayBuffer}
16 | */
17 | static base64ToArrayBuffer(base64: string): ArrayBuffer;
18 | /**
19 | * Converts an ArrayBuffer, Int16Array or Float32Array to a base64 string
20 | * @param {ArrayBuffer|Int16Array|Float32Array} arrayBuffer
21 | * @returns {string}
22 | */
23 | static arrayBufferToBase64(arrayBuffer: ArrayBuffer | Int16Array | Float32Array): string;
24 | /**
25 | * Merge two Int16Arrays from Int16Arrays or ArrayBuffers
26 | * @param {ArrayBuffer|Int16Array} left
27 | * @param {ArrayBuffer|Int16Array} right
28 | * @returns {Int16Array}
29 | */
30 | static mergeInt16Arrays(left: ArrayBuffer | Int16Array, right: ArrayBuffer | Int16Array): Int16Array;
31 | /**
32 | * Generates an id to send with events and messages
33 | * @param {string} prefix
34 | * @param {number} [length]
35 | * @returns {string}
36 | */
37 | static generateId(prefix: string, length?: number): string;
38 | }
39 | //# sourceMappingURL=utils.d.ts.map
--------------------------------------------------------------------------------
/src/lib/realtime-api-beta/dist/lib/utils.d.ts.map:
--------------------------------------------------------------------------------
1 | {"version":3,"file":"utils.d.ts","sourceRoot":"","sources":["../../lib/utils.js"],"names":[],"mappings":"AAGA;;;GAGG;AACH;IACE;;;;OAIG;IACH,qCAHW,YAAY,GACV,WAAW,CAWvB;IAED;;;;OAIG;IACH,mCAHW,MAAM,GACJ,WAAW,CAUvB;IAED;;;;OAIG;IACH,wCAHW,WAAW,GAAC,UAAU,GAAC,YAAY,GACjC,MAAM,CAgBlB;IAED;;;;;OAKG;IACH,8BAJW,WAAW,GAAC,UAAU,SACtB,WAAW,GAAC,UAAU,GACpB,UAAU,CAoBtB;IAED;;;;;OAKG;IACH,0BAJW,MAAM,WACN,MAAM,GACJ,MAAM,CAUlB;CACF"}
--------------------------------------------------------------------------------
/src/lib/realtime-api-beta/index.js:
--------------------------------------------------------------------------------
1 | import { RealtimeUtils } from './lib/utils.js';
2 | import { RealtimeAPI } from './lib/api.js';
3 | import { RealtimeConversation } from './lib/conversation.js';
4 | import { RealtimeClient } from './lib/client.js';
5 |
6 | export { RealtimeAPI, RealtimeConversation, RealtimeClient, RealtimeUtils };
7 |
--------------------------------------------------------------------------------
/src/lib/realtime-api-beta/lib/api.js:
--------------------------------------------------------------------------------
1 | import { RealtimeEventHandler } from './event_handler.js';
2 | import { RealtimeUtils } from './utils.js';
3 |
4 | export class RealtimeAPI extends RealtimeEventHandler {
5 | /**
6 | * Create a new RealtimeAPI instance
7 | * @param {{url?: string, apiKey?: string, apiBase?: string, dangerouslyAllowAPIKeyInBrowser?: boolean, debug?: boolean}} [settings]
8 | * @returns {RealtimeAPI}
9 | */
10 | constructor({ url, apiKey, apiBase, dangerouslyAllowAPIKeyInBrowser, debug } = {}) {
11 | super();
12 | this.defaultUrl = 'wss://api.openai.com/v1/realtime';
13 | this.url = url || apiBase || this.defaultUrl;
14 | this.apiKey = apiKey || null;
15 | this.debug = !!debug;
16 | this.ws = null;
17 | if (globalThis.WebSocket && this.apiKey) {
18 | if (!dangerouslyAllowAPIKeyInBrowser) {
19 | throw new Error(
20 | `Can not provide API key in the browser without "dangerouslyAllowAPIKeyInBrowser" set to true`
21 | );
22 | }
23 | }
24 | }
25 |
26 | /**
27 | * Tells us whether or not the WebSocket is connected
28 | * @returns {boolean}
29 | */
30 | isConnected() {
31 | return !!this.ws;
32 | }
33 |
34 | /**
35 | * Writes WebSocket logs to console
36 | * @param {...any} args
37 | * @returns {true}
38 | */
39 | log(...args) {
40 | const date = new Date().toISOString();
41 | const logs = [`[Websocket/${date}]`].concat(args).map((arg) => {
42 | if (typeof arg === 'object' && arg !== null) {
43 | return JSON.stringify(arg, null, 2);
44 | } else {
45 | return arg;
46 | }
47 | });
48 | if (this.debug) {
49 | console.log(...logs);
50 | }
51 | return true;
52 | }
53 |
54 | /**
55 | * Connects to Realtime API Websocket Server
56 | * @param {{model?: string}} [settings]
57 | * @returns {Promise}
58 | */
59 | async connect({ model } = { model: 'gpt-4o-realtime-preview-2024-10-01' }) {
60 | if (!this.apiKey && this.url === this.defaultUrl) {
61 | console.warn(`No apiKey provided for connection to "${this.url}"`);
62 | }
63 | if (this.isConnected()) {
64 | throw new Error(`Already connected`);
65 | }
66 | if (globalThis.WebSocket) {
67 | /**
68 | * Web browser
69 | */
70 | if (this.apiKey) {
71 | console.warn(
72 | 'Warning: Connecting using API key in the browser, this is not recommended'
73 | );
74 | }
75 | const WebSocket = globalThis.WebSocket;
76 | const ws = new WebSocket(`${this.url}${model ? `?model=${model}` : ''}`, [
77 | 'realtime',
78 | `openai-insecure-api-key.${this.apiKey}`,
79 | 'openai-beta.realtime-v1',
80 | ]);
81 | ws.addEventListener('message', (event) => {
82 | const message = JSON.parse(event.data);
83 | this.receive(message.type, message);
84 | });
85 | return new Promise((resolve, reject) => {
86 | const connectionErrorHandler = () => {
87 | this.disconnect(ws);
88 | reject(new Error(`Could not connect to "${this.url}"`));
89 | };
90 | ws.addEventListener('error', connectionErrorHandler);
91 | ws.addEventListener('open', () => {
92 | this.log(`Connected to "${this.url}"`);
93 | ws.removeEventListener('error', connectionErrorHandler);
94 | ws.addEventListener('error', () => {
95 | this.disconnect(ws);
96 | this.log(`Error, disconnected from "${this.url}"`);
97 | this.dispatch('close', { error: true });
98 | });
99 | ws.addEventListener('close', () => {
100 | this.disconnect(ws);
101 | this.log(`Disconnected from "${this.url}"`);
102 | this.dispatch('close', { error: false });
103 | });
104 | this.ws = ws;
105 | resolve(true);
106 | });
107 | });
108 | } else {
109 | /**
110 | * Node.js
111 | */
112 | const moduleName = 'ws';
113 | const wsModule = await import(/* webpackIgnore: true */ moduleName);
114 | const WebSocket = wsModule.default;
115 | const ws = new WebSocket(
116 | 'wss://api.openai.com/v1/realtime?model=gpt-4o-realtime-preview-2024-10-01',
117 | [],
118 | {
119 | finishRequest: (request) => {
120 | // Auth
121 | request.setHeader('Authorization', `Bearer ${this.apiKey}`);
122 | request.setHeader('OpenAI-Beta', 'realtime=v1');
123 | request.end();
124 | },
125 | }
126 | );
127 | ws.on('message', (data) => {
128 | const message = JSON.parse(data.toString());
129 | this.receive(message.type, message);
130 | });
131 | return new Promise((resolve, reject) => {
132 | const connectionErrorHandler = () => {
133 | this.disconnect(ws);
134 | reject(new Error(`Could not connect to "${this.url}"`));
135 | };
136 | ws.on('error', connectionErrorHandler);
137 | ws.on('open', () => {
138 | this.log(`Connected to "${this.url}"`);
139 | ws.removeListener('error', connectionErrorHandler);
140 | ws.on('error', () => {
141 | this.disconnect(ws);
142 | this.log(`Error, disconnected from "${this.url}"`);
143 | this.dispatch('close', { error: true });
144 | });
145 | ws.on('close', () => {
146 | this.disconnect(ws);
147 | this.log(`Disconnected from "${this.url}"`);
148 | this.dispatch('close', { error: false });
149 | });
150 | this.ws = ws;
151 | resolve(true);
152 | });
153 | });
154 | }
155 | }
156 |
157 | /**
158 | * Disconnects from Realtime API server
159 | * @param {WebSocket} [ws]
160 | * @returns {true}
161 | */
162 | disconnect(ws) {
163 | if (!ws || this.ws === ws) {
164 | this.ws && this.ws.close();
165 | this.ws = null;
166 | return true;
167 | }
168 | }
169 |
170 | /**
171 | * Receives an event from WebSocket and dispatches as "server.{eventName}" and "server.*" events
172 | * @param {string} eventName
173 | * @param {{[key: string]: any}} event
174 | * @returns {true}
175 | */
176 | receive(eventName, event) {
177 | this.log(`received:`, eventName, event);
178 | this.dispatch(`server.${eventName}`, event);
179 | this.dispatch('server.*', event);
180 | return true;
181 | }
182 |
183 | /**
184 | * Sends an event to WebSocket and dispatches as "client.{eventName}" and "client.*" events
185 | * @param {string} eventName
186 | * @param {{[key: string]: any}} event
187 | * @returns {true}
188 | */
189 | send(eventName, data) {
190 | if (!this.isConnected()) {
191 | throw new Error(`RealtimeAPI is not connected`);
192 | }
193 | data = data || {};
194 | if (typeof data !== 'object') {
195 | throw new Error(`data must be an object`);
196 | }
197 | const event = {
198 | event_id: RealtimeUtils.generateId('evt_'),
199 | type: eventName,
200 | ...data,
201 | };
202 | this.dispatch(`client.${eventName}`, event);
203 | this.dispatch('client.*', event);
204 | this.log(`sent:`, eventName, event);
205 | this.ws.send(JSON.stringify(event));
206 | return true;
207 | }
208 | }
209 |
--------------------------------------------------------------------------------
/src/lib/realtime-api-beta/lib/client.js:
--------------------------------------------------------------------------------
1 | import { RealtimeEventHandler } from './event_handler.js';
2 | import { RealtimeAPI } from './api.js';
3 | import { RealtimeConversation } from './conversation.js';
4 | import { RealtimeUtils } from './utils.js';
5 |
6 | /**
7 | * Valid audio formats
8 | * @typedef {"pcm16"|"g711-ulaw"|"g711-alaw"} AudioFormatType
9 | */
10 |
11 | /**
12 | * @typedef {Object} AudioTranscriptionType
13 | * @property {boolean} [enabled]
14 | * @property {"whisper-1"} model
15 | */
16 |
17 | /**
18 | * @typedef {Object} TurnDetectionServerVadType
19 | * @property {"server_vad"} type
20 | * @property {number} [threshold]
21 | * @property {number} [prefix_padding_ms]
22 | * @property {number} [silence_duration_ms]
23 | */
24 |
25 | /**
26 | * Tool definitions
27 | * @typedef {Object} ToolDefinitionType
28 | * @property {"function"} [type]
29 | * @property {string} name
30 | * @property {string} description
31 | * @property {{[key: string]: any}} parameters
32 | */
33 |
34 | /**
35 | * @typedef {Object} SessionResourceType
36 | * @property {string} [model]
37 | * @property {string[]} [modalities]
38 | * @property {string} [instructions]
39 | * @property {"alloy"|"shimmer"|"echo"} [voice]
40 | * @property {AudioFormatType} [input_audio_format]
41 | * @property {AudioFormatType} [output_audio_format]
42 | * @property {AudioTranscriptionType|null} [input_audio_transcription]
43 | * @property {TurnDetectionServerVadType|null} [turn_detection]
44 | * @property {ToolDefinitionType[]} [tools]
45 | * @property {"auto"|"none"|"required"|{type:"function",name:string}} [tool_choice]
46 | * @property {number} [temperature]
47 | * @property {number|"inf"} [max_response_output_tokens]
48 | */
49 |
50 | /**
51 | * @typedef {"in_progress"|"completed"|"incomplete"} ItemStatusType
52 | */
53 |
54 | /**
55 | * @typedef {Object} InputTextContentType
56 | * @property {"input_text"} type
57 | * @property {string} text
58 | */
59 |
60 | /**
61 | * @typedef {Object} InputAudioContentType
62 | * @property {"input_audio"} type
63 | * @property {string} [audio] base64-encoded audio data
64 | * @property {string|null} [transcript]
65 | */
66 |
67 | /**
68 | * @typedef {Object} TextContentType
69 | * @property {"text"} type
70 | * @property {string} text
71 | */
72 |
73 | /**
74 | * @typedef {Object} AudioContentType
75 | * @property {"audio"} type
76 | * @property {string} [audio] base64-encoded audio data
77 | * @property {string|null} [transcript]
78 | */
79 |
80 | /**
81 | * @typedef {Object} SystemItemType
82 | * @property {string|null} [previous_item_id]
83 | * @property {"message"} type
84 | * @property {ItemStatusType} status
85 | * @property {"system"} role
86 | * @property {Array} content
87 | */
88 |
89 | /**
90 | * @typedef {Object} UserItemType
91 | * @property {string|null} [previous_item_id]
92 | * @property {"message"} type
93 | * @property {ItemStatusType} status
94 | * @property {"system"} role
95 | * @property {Array} content
96 | */
97 |
98 | /**
99 | * @typedef {Object} AssistantItemType
100 | * @property {string|null} [previous_item_id]
101 | * @property {"message"} type
102 | * @property {ItemStatusType} status
103 | * @property {"assistant"} role
104 | * @property {Array} content
105 | */
106 |
107 | /**
108 | * @typedef {Object} FunctionCallItemType
109 | * @property {string|null} [previous_item_id]
110 | * @property {"function_call"} type
111 | * @property {ItemStatusType} status
112 | * @property {string} call_id
113 | * @property {string} name
114 | * @property {string} arguments
115 | */
116 |
117 | /**
118 | * @typedef {Object} FunctionCallOutputItemType
119 | * @property {string|null} [previous_item_id]
120 | * @property {"function_call_output"} type
121 | * @property {string} call_id
122 | * @property {string} output
123 | */
124 |
125 | /**
126 | * @typedef {Object} FormattedToolType
127 | * @property {"function"} type
128 | * @property {string} name
129 | * @property {string} call_id
130 | * @property {string} arguments
131 | */
132 |
133 | /**
134 | * @typedef {Object} FormattedPropertyType
135 | * @property {Int16Array} [audio]
136 | * @property {string} [text]
137 | * @property {string} [transcript]
138 | * @property {FormattedToolType} [tool]
139 | * @property {string} [output]
140 | * @property {any} [file]
141 | */
142 |
143 | /**
144 | * @typedef {Object} FormattedItemType
145 | * @property {string} id
146 | * @property {string} object
147 | * @property {"user"|"assistant"|"system"} [role]
148 | * @property {FormattedPropertyType} formatted
149 | */
150 |
151 | /**
152 | * @typedef {SystemItemType|UserItemType|AssistantItemType|FunctionCallItemType|FunctionCallOutputItemType} BaseItemType
153 | */
154 |
155 | /**
156 | * @typedef {FormattedItemType & BaseItemType} ItemType
157 | */
158 |
159 | /**
160 | * @typedef {Object} IncompleteResponseStatusType
161 | * @property {"incomplete"} type
162 | * @property {"interruption"|"max_output_tokens"|"content_filter"} reason
163 | */
164 |
165 | /**
166 | * @typedef {Object} FailedResponseStatusType
167 | * @property {"failed"} type
168 | * @property {{code: string, message: string}|null} error
169 | */
170 |
171 | /**
172 | * @typedef {Object} UsageType
173 | * @property {number} total_tokens
174 | * @property {number} input_tokens
175 | * @property {number} output_tokens
176 | */
177 |
178 | /**
179 | * @typedef {Object} ResponseResourceType
180 | * @property {"in_progress"|"completed"|"incomplete"|"cancelled"|"failed"} status
181 | * @property {IncompleteResponseStatusType|FailedResponseStatusType|null} status_details
182 | * @property {ItemType[]} output
183 | * @property {UsageType|null} usage
184 | */
185 |
186 | /**
187 | * RealtimeClient Class
188 | * @class
189 | */
190 | export class RealtimeClient extends RealtimeEventHandler {
191 | /**
192 | * Create a new RealtimeClient instance
193 | * @param {{url?: string, apiBase?: string, apiKey?: string, dangerouslyAllowAPIKeyInBrowser?: boolean, debug?: boolean}} [settings]
194 | */
195 | constructor({ url, apiBase, apiKey, dangerouslyAllowAPIKeyInBrowser, debug } = {}) {
196 | super();
197 | this.defaultSessionConfig = {
198 | modalities: ['text', 'audio'],
199 | instructions: '',
200 | voice: 'alloy',
201 | input_audio_format: 'pcm16',
202 | output_audio_format: 'pcm16',
203 | input_audio_transcription: null,
204 | turn_detection: null,
205 | tools: [],
206 | tool_choice: 'auto',
207 | temperature: 0.8,
208 | max_response_output_tokens: 4096,
209 | };
210 | this.sessionConfig = {};
211 | this.transcriptionModels = [
212 | {
213 | model: 'whisper-1',
214 | },
215 | ];
216 | this.defaultServerVadConfig = {
217 | type: 'server_vad',
218 | threshold: 0.5, // 0.0 to 1.0,
219 | prefix_padding_ms: 300, // How much audio to include in the audio stream before the speech starts.
220 | silence_duration_ms: 200, // How long to wait to mark the speech as stopped.
221 | };
222 | this.realtime = new RealtimeAPI({
223 | url,
224 | apiKey,
225 | apiBase,
226 | dangerouslyAllowAPIKeyInBrowser,
227 | debug,
228 | });
229 | this.conversation = new RealtimeConversation();
230 | this._resetConfig();
231 | this._addAPIEventHandlers();
232 | }
233 |
234 | /**
235 | * Resets sessionConfig and conversationConfig to default
236 | * @private
237 | * @returns {true}
238 | */
239 | _resetConfig() {
240 | this.sessionCreated = false;
241 | this.tools = {};
242 | this.sessionConfig = JSON.parse(JSON.stringify(this.defaultSessionConfig));
243 | this.inputAudioBuffer = new Int16Array(0);
244 | return true;
245 | }
246 |
247 | /**
248 | * Sets up event handlers for a fully-functional application control flow
249 | * @private
250 | * @returns {true}
251 | */
252 | _addAPIEventHandlers() {
253 | // Event Logging handlers
254 | this.realtime.on('client.*', (event) => {
255 | const realtimeEvent = {
256 | time: new Date().toISOString(),
257 | source: 'client',
258 | event: event,
259 | };
260 | this.dispatch('realtime.event', realtimeEvent);
261 | });
262 | this.realtime.on('server.*', (event) => {
263 | const realtimeEvent = {
264 | time: new Date().toISOString(),
265 | source: 'server',
266 | event: event,
267 | };
268 | this.dispatch('realtime.event', realtimeEvent);
269 | });
270 |
271 | // Handles session created event, can optionally wait for it
272 | this.realtime.on(
273 | 'server.session.created',
274 | () => (this.sessionCreated = true),
275 | );
276 |
277 | // Setup for application control flow
278 | const handler = (event, ...args) => {
279 | const { item, delta } = this.conversation.processEvent(event, ...args);
280 | return { item, delta };
281 | };
282 | const handlerWithDispatch = (event, ...args) => {
283 | const { item, delta } = handler(event, ...args);
284 | if (item) {
285 | // FIXME: If statement is only here because item.input_audio_transcription.completed
286 | // can fire before `item.created`, resulting in empty item.
287 | // This happens in VAD mode with empty audio
288 | this.dispatch('conversation.updated', { item, delta });
289 | }
290 | return { item, delta };
291 | };
292 | const callTool = async (tool) => {
293 | try {
294 | const jsonArguments = JSON.parse(tool.arguments);
295 | const toolConfig = this.tools[tool.name];
296 | if (!toolConfig) {
297 | throw new Error(`Tool "${tool.name}" has not been added`);
298 | }
299 | const result = await toolConfig.handler(jsonArguments);
300 | this.realtime.send('conversation.item.create', {
301 | item: {
302 | type: 'function_call_output',
303 | call_id: tool.call_id,
304 | output: JSON.stringify(result),
305 | },
306 | });
307 | } catch (e) {
308 | this.realtime.send('conversation.item.create', {
309 | item: {
310 | type: 'function_call_output',
311 | call_id: tool.call_id,
312 | output: JSON.stringify({ error: e.message }),
313 | },
314 | });
315 | }
316 | this.createResponse();
317 | };
318 |
319 | // Handlers to update internal conversation state
320 | this.realtime.on('server.response.created', handler);
321 | this.realtime.on('server.response.output_item.added', handler);
322 | this.realtime.on('server.response.content_part.added', handler);
323 | this.realtime.on('server.input_audio_buffer.speech_started', (event) => {
324 | handler(event);
325 | this.dispatch('conversation.interrupted');
326 | });
327 | this.realtime.on('server.input_audio_buffer.speech_stopped', (event) =>
328 | handler(event, this.inputAudioBuffer),
329 | );
330 |
331 | // Handlers to update application state
332 | this.realtime.on('server.conversation.item.created', (event) => {
333 | const { item } = handlerWithDispatch(event);
334 | this.dispatch('conversation.item.appended', { item });
335 | if (item.status === 'completed') {
336 | this.dispatch('conversation.item.completed', { item });
337 | }
338 | });
339 | this.realtime.on('server.conversation.item.truncated', handlerWithDispatch);
340 | this.realtime.on('server.conversation.item.deleted', handlerWithDispatch);
341 | this.realtime.on(
342 | 'server.conversation.item.input_audio_transcription.completed',
343 | handlerWithDispatch,
344 | );
345 | this.realtime.on(
346 | 'server.response.audio_transcript.delta',
347 | handlerWithDispatch,
348 | );
349 | this.realtime.on('server.response.audio.delta', handlerWithDispatch);
350 | this.realtime.on('server.response.text.delta', handlerWithDispatch);
351 | this.realtime.on(
352 | 'server.response.function_call_arguments.delta',
353 | handlerWithDispatch,
354 | );
355 | this.realtime.on('server.response.output_item.done', async (event) => {
356 | const { item } = handlerWithDispatch(event);
357 | if (item.status === 'completed') {
358 | this.dispatch('conversation.item.completed', { item });
359 | }
360 | if (item.formatted.tool) {
361 | callTool(item.formatted.tool);
362 | }
363 | });
364 |
365 | return true;
366 | }
367 |
368 | /**
369 | * Tells us whether the realtime socket is connected and the session has started
370 | * @returns {boolean}
371 | */
372 | isConnected() {
373 | return this.realtime.isConnected();
374 | }
375 |
376 | /**
377 | * Resets the client instance entirely: disconnects and clears active config
378 | * @returns {true}
379 | */
380 | reset() {
381 | this.disconnect();
382 | this.clearEventHandlers();
383 | this.realtime.clearEventHandlers();
384 | this._resetConfig();
385 | this._addAPIEventHandlers();
386 | return true;
387 | }
388 |
389 | /**
390 | * Connects to the Realtime WebSocket API
391 | * Updates session config and conversation config
392 | * @returns {Promise}
393 | */
394 | async connect() {
395 | if (this.isConnected()) {
396 | throw new Error(`Already connected, use .disconnect() first`);
397 | }
398 | await this.realtime.connect();
399 | this.updateSession();
400 | return true;
401 | }
402 |
403 | /**
404 | * Waits for a session.created event to be executed before proceeding
405 | * @returns {Promise}
406 | */
407 | async waitForSessionCreated() {
408 | if (!this.isConnected()) {
409 | throw new Error(`Not connected, use .connect() first`);
410 | }
411 | while (!this.sessionCreated) {
412 | await new Promise((r) => setTimeout(() => r(), 1));
413 | }
414 | return true;
415 | }
416 |
417 | /**
418 | * Disconnects from the Realtime API and clears the conversation history
419 | */
420 | disconnect() {
421 | this.sessionCreated = false;
422 | this.conversation.clear();
423 | this.realtime.isConnected() && this.realtime.disconnect();
424 | }
425 |
426 | /**
427 | * Gets the active turn detection mode
428 | * @returns {"server_vad"|null}
429 | */
430 | getTurnDetectionType() {
431 | return this.sessionConfig.turn_detection?.type || null;
432 | }
433 |
434 | /**
435 | * Add a tool and handler
436 | * @param {ToolDefinitionType} definition
437 | * @param {function} handler
438 | * @returns {{definition: ToolDefinitionType, handler: function}}
439 | */
440 | addTool(definition, handler) {
441 | if (!definition?.name) {
442 | throw new Error(`Missing tool name in definition`);
443 | }
444 | const name = definition?.name;
445 | if (this.tools[name]) {
446 | throw new Error(
447 | `Tool "${name}" already added. Please use .removeTool("${name}") before trying to add again.`,
448 | );
449 | }
450 | if (typeof handler !== 'function') {
451 | throw new Error(`Tool "${name}" handler must be a function`);
452 | }
453 | this.tools[name] = { definition, handler };
454 | this.updateSession();
455 | return this.tools[name];
456 | }
457 |
458 | /**
459 | * Removes a tool
460 | * @param {string} name
461 | * @returns {true}
462 | */
463 | removeTool(name) {
464 | if (!this.tools[name]) {
465 | throw new Error(`Tool "${name}" does not exist, can not be removed.`);
466 | }
467 | delete this.tools[name];
468 | return true;
469 | }
470 |
471 | /**
472 | * Deletes an item
473 | * @param {string} id
474 | * @returns {true}
475 | */
476 | deleteItem(id) {
477 | this.realtime.send('conversation.item.delete', { item_id: id });
478 | return true;
479 | }
480 |
481 | /**
482 | * Updates session configuration
483 | * If the client is not yet connected, will save details and instantiate upon connection
484 | * @param {SessionResourceType} [sessionConfig]
485 | */
486 | updateSession({
487 | modalities = void 0,
488 | instructions = void 0,
489 | voice = void 0,
490 | input_audio_format = void 0,
491 | output_audio_format = void 0,
492 | input_audio_transcription = void 0,
493 | turn_detection = void 0,
494 | tools = void 0,
495 | tool_choice = void 0,
496 | temperature = void 0,
497 | max_response_output_tokens = void 0,
498 | } = {}) {
499 | modalities !== void 0 && (this.sessionConfig.modalities = modalities);
500 | instructions !== void 0 && (this.sessionConfig.instructions = instructions);
501 | voice !== void 0 && (this.sessionConfig.voice = voice);
502 | input_audio_format !== void 0 &&
503 | (this.sessionConfig.input_audio_format = input_audio_format);
504 | output_audio_format !== void 0 &&
505 | (this.sessionConfig.output_audio_format = output_audio_format);
506 | input_audio_transcription !== void 0 &&
507 | (this.sessionConfig.input_audio_transcription =
508 | input_audio_transcription);
509 | turn_detection !== void 0 &&
510 | (this.sessionConfig.turn_detection = turn_detection);
511 | tools !== void 0 && (this.sessionConfig.tools = tools);
512 | tool_choice !== void 0 && (this.sessionConfig.tool_choice = tool_choice);
513 | temperature !== void 0 && (this.sessionConfig.temperature = temperature);
514 | max_response_output_tokens !== void 0 &&
515 | (this.sessionConfig.max_response_output_tokens =
516 | max_response_output_tokens);
517 | // Load tools from tool definitions + already loaded tools
518 | const useTools = [].concat(
519 | (tools || []).map((toolDefinition) => {
520 | const definition = {
521 | type: 'function',
522 | ...toolDefinition,
523 | };
524 | if (this.tools[definition?.name]) {
525 | throw new Error(
526 | `Tool "${definition?.name}" has already been defined`,
527 | );
528 | }
529 | return definition;
530 | }),
531 | Object.keys(this.tools).map((key) => {
532 | return {
533 | type: 'function',
534 | ...this.tools[key].definition,
535 | };
536 | }),
537 | );
538 | const session = { ...this.sessionConfig };
539 | session.tools = useTools;
540 | if (this.realtime.isConnected()) {
541 | this.realtime.send('session.update', { session });
542 | }
543 | return true;
544 | }
545 |
546 | /**
547 | * Sends user message content and generates a response
548 | * @param {Array} content
549 | * @returns {true}
550 | */
551 | sendUserMessageContent(content = []) {
552 | if (content.length) {
553 | for (const c of content) {
554 | if (c.type === 'input_audio') {
555 | if (c.audio instanceof ArrayBuffer || c.audio instanceof Int16Array) {
556 | c.audio = RealtimeUtils.arrayBufferToBase64(c.audio);
557 | }
558 | }
559 | }
560 | this.realtime.send('conversation.item.create', {
561 | item: {
562 | type: 'message',
563 | role: 'user',
564 | content,
565 | },
566 | });
567 | }
568 | this.createResponse();
569 | return true;
570 | }
571 |
572 | /**
573 | * Appends user audio to the existing audio buffer
574 | * @param {Int16Array|ArrayBuffer} arrayBuffer
575 | * @returns {true}
576 | */
577 | appendInputAudio(arrayBuffer) {
578 | if (arrayBuffer.byteLength > 0) {
579 | this.realtime.send('input_audio_buffer.append', {
580 | audio: RealtimeUtils.arrayBufferToBase64(arrayBuffer),
581 | });
582 | this.inputAudioBuffer = RealtimeUtils.mergeInt16Arrays(
583 | this.inputAudioBuffer,
584 | arrayBuffer,
585 | );
586 | }
587 | return true;
588 | }
589 |
590 | /**
591 | * Forces a model response generation
592 | * @returns {true}
593 | */
594 | createResponse() {
595 | if (
596 | this.getTurnDetectionType() === null &&
597 | this.inputAudioBuffer.byteLength > 0
598 | ) {
599 | this.realtime.send('input_audio_buffer.commit');
600 | this.conversation.queueInputAudio(this.inputAudioBuffer);
601 | this.inputAudioBuffer = new Int16Array(0);
602 | }
603 | this.realtime.send('response.create');
604 | return true;
605 | }
606 |
607 | /**
608 | * Cancels the ongoing server generation and truncates ongoing generation, if applicable
609 | * If no id provided, will simply call `cancel_generation` command
610 | * @param {string} id The id of the message to cancel
611 | * @param {number} [sampleCount] The number of samples to truncate past for the ongoing generation
612 | * @returns {{item: (AssistantItemType | null)}}
613 | */
614 | cancelResponse(id, sampleCount = 0) {
615 | if (!id) {
616 | this.realtime.send('response.cancel');
617 | return { item: null };
618 | } else if (id) {
619 | const item = this.conversation.getItem(id);
620 | if (!item) {
621 | throw new Error(`Could not find item "${id}"`);
622 | }
623 | if (item.type !== 'message') {
624 | throw new Error(`Can only cancelResponse messages with type "message"`);
625 | } else if (item.role !== 'assistant') {
626 | throw new Error(
627 | `Can only cancelResponse messages with role "assistant"`,
628 | );
629 | }
630 | this.realtime.send('response.cancel');
631 | const audioIndex = item.content.findIndex((c) => c.type === 'audio');
632 | if (audioIndex === -1) {
633 | throw new Error(`Could not find audio on item to cancel`);
634 | }
635 | this.realtime.send('conversation.item.truncate', {
636 | item_id: id,
637 | content_index: audioIndex,
638 | audio_end_ms: Math.floor(
639 | (sampleCount / this.conversation.defaultFrequency) * 1000,
640 | ),
641 | });
642 | return { item };
643 | }
644 | }
645 |
646 | /**
647 | * Utility for waiting for the next `conversation.item.appended` event to be triggered by the server
648 | * @returns {Promise<{item: ItemType}>}
649 | */
650 | async waitForNextItem() {
651 | const event = await this.waitForNext('conversation.item.appended');
652 | const { item } = event;
653 | return { item };
654 | }
655 |
656 | /**
657 | * Utility for waiting for the next `conversation.item.completed` event to be triggered by the server
658 | * @returns {Promise<{item: ItemType}>}
659 | */
660 | async waitForNextCompletedItem() {
661 | const event = await this.waitForNext('conversation.item.completed');
662 | const { item } = event;
663 | return { item };
664 | }
665 | }
666 |
--------------------------------------------------------------------------------
/src/lib/realtime-api-beta/lib/conversation.js:
--------------------------------------------------------------------------------
1 | import { RealtimeUtils } from './utils.js';
2 |
3 | /**
4 | * Contains text and audio information about a item
5 | * Can also be used as a delta
6 | * @typedef {Object} ItemContentDeltaType
7 | * @property {string} [text]
8 | * @property {Int16Array} [audio]
9 | * @property {string} [arguments]
10 | * @property {string} [transcript]
11 | */
12 |
13 | /**
14 | * RealtimeConversation holds conversation history
15 | * and performs event validation for RealtimeAPI
16 | * @class
17 | */
18 | export class RealtimeConversation {
19 | defaultFrequency = 24_000; // 24,000 Hz
20 |
21 | EventProcessors = {
22 | 'conversation.item.created': (event) => {
23 | const { item } = event;
24 | // deep copy values
25 | const newItem = JSON.parse(JSON.stringify(item));
26 | if (!this.itemLookup[newItem.id]) {
27 | this.itemLookup[newItem.id] = newItem;
28 | this.items.push(newItem);
29 | }
30 | newItem.formatted = {};
31 | newItem.formatted.audio = new Int16Array(0);
32 | newItem.formatted.text = '';
33 | newItem.formatted.transcript = '';
34 | // If we have a speech item, can populate audio
35 | if (this.queuedSpeechItems[newItem.id]) {
36 | newItem.formatted.audio = this.queuedSpeechItems[newItem.id].audio;
37 | delete this.queuedSpeechItems[newItem.id]; // free up some memory
38 | }
39 | // Populate formatted text if it comes out on creation
40 | if (newItem.content) {
41 | const textContent = newItem.content.filter((c) =>
42 | ['text', 'input_text'].includes(c.type),
43 | );
44 | for (const content of textContent) {
45 | newItem.formatted.text += content.text;
46 | }
47 | }
48 | // If we have a transcript item, can pre-populate transcript
49 | if (this.queuedTranscriptItems[newItem.id]) {
50 | newItem.formatted.transcript = this.queuedTranscriptItems.transcript;
51 | delete this.queuedTranscriptItems[newItem.id];
52 | }
53 | if (newItem.type === 'message') {
54 | if (newItem.role === 'user') {
55 | newItem.status = 'completed';
56 | if (this.queuedInputAudio) {
57 | newItem.formatted.audio = this.queuedInputAudio;
58 | this.queuedInputAudio = null;
59 | }
60 | } else {
61 | newItem.status = 'in_progress';
62 | }
63 | } else if (newItem.type === 'function_call') {
64 | newItem.formatted.tool = {
65 | type: 'function',
66 | name: newItem.name,
67 | call_id: newItem.call_id,
68 | arguments: '',
69 | };
70 | newItem.status = 'in_progress';
71 | } else if (newItem.type === 'function_call_output') {
72 | newItem.status = 'completed';
73 | newItem.formatted.output = newItem.output;
74 | }
75 | return { item: newItem, delta: null };
76 | },
77 | 'conversation.item.truncated': (event) => {
78 | const { item_id, audio_end_ms } = event;
79 | const item = this.itemLookup[item_id];
80 | if (!item) {
81 | throw new Error(`item.truncated: Item "${item_id}" not found`);
82 | }
83 | const endIndex = Math.floor(
84 | (audio_end_ms * this.defaultFrequency) / 1000,
85 | );
86 | item.formatted.transcript = '';
87 | item.formatted.audio = item.formatted.audio.slice(0, endIndex);
88 | return { item, delta: null };
89 | },
90 | 'conversation.item.deleted': (event) => {
91 | const { item_id } = event;
92 | const item = this.itemLookup[item_id];
93 | if (!item) {
94 | throw new Error(`item.deleted: Item "${item_id}" not found`);
95 | }
96 | delete this.itemLookup[item.id];
97 | const index = this.items.indexOf(item);
98 | if (index > -1) {
99 | this.items.splice(index, 1);
100 | }
101 | return { item, delta: null };
102 | },
103 | 'conversation.item.input_audio_transcription.completed': (event) => {
104 | const { item_id, content_index, transcript } = event;
105 | const item = this.itemLookup[item_id];
106 | // We use a single space to represent an empty transcript for .formatted values
107 | // Otherwise it looks like no transcript provided
108 | const formattedTranscript = transcript || ' ';
109 | if (!item) {
110 | // We can receive transcripts in VAD mode before item.created
111 | // This happens specifically when audio is empty
112 | this.queuedTranscriptItems[item_id] = {
113 | transcript: formattedTranscript,
114 | };
115 | return { item: null, delta: null };
116 | } else {
117 | item.content[content_index].transcript = transcript;
118 | item.formatted.transcript = formattedTranscript;
119 | return { item, delta: { transcript } };
120 | }
121 | },
122 | 'input_audio_buffer.speech_started': (event) => {
123 | const { item_id, audio_start_ms } = event;
124 | this.queuedSpeechItems[item_id] = { audio_start_ms };
125 | return { item: null, delta: null };
126 | },
127 | 'input_audio_buffer.speech_stopped': (event, inputAudioBuffer) => {
128 | const { item_id, audio_end_ms } = event;
129 | const speech = this.queuedSpeechItems[item_id];
130 | speech.audio_end_ms = audio_end_ms;
131 | if (inputAudioBuffer) {
132 | const startIndex = Math.floor(
133 | (speech.audio_start_ms * this.defaultFrequency) / 1000,
134 | );
135 | const endIndex = Math.floor(
136 | (speech.audio_end_ms * this.defaultFrequency) / 1000,
137 | );
138 | speech.audio = inputAudioBuffer.slice(startIndex, endIndex);
139 | }
140 | return { item: null, delta: null };
141 | },
142 | 'response.created': (event) => {
143 | const { response } = event;
144 | if (!this.responseLookup[response.id]) {
145 | this.responseLookup[response.id] = response;
146 | this.responses.push(response);
147 | }
148 | return { item: null, delta: null };
149 | },
150 | 'response.output_item.added': (event) => {
151 | const { response_id, item } = event;
152 | const response = this.responseLookup[response_id];
153 | if (!response) {
154 | throw new Error(
155 | `response.output_item.added: Response "${response_id}" not found`,
156 | );
157 | }
158 | response.output.push(item.id);
159 | return { item: null, delta: null };
160 | },
161 | 'response.output_item.done': (event) => {
162 | const { item } = event;
163 | if (!item) {
164 | throw new Error(`response.output_item.done: Missing "item"`);
165 | }
166 | const foundItem = this.itemLookup[item.id];
167 | if (!foundItem) {
168 | throw new Error(
169 | `response.output_item.done: Item "${item.id}" not found`,
170 | );
171 | }
172 | foundItem.status = item.status;
173 | return { item: foundItem, delta: null };
174 | },
175 | 'response.content_part.added': (event) => {
176 | const { item_id, part } = event;
177 | const item = this.itemLookup[item_id];
178 | if (!item) {
179 | throw new Error(
180 | `response.content_part.added: Item "${item_id}" not found`,
181 | );
182 | }
183 | item.content.push(part);
184 | return { item, delta: null };
185 | },
186 | 'response.audio_transcript.delta': (event) => {
187 | const { item_id, content_index, delta } = event;
188 | const item = this.itemLookup[item_id];
189 | if (!item) {
190 | throw new Error(
191 | `response.audio_transcript.delta: Item "${item_id}" not found`,
192 | );
193 | }
194 | item.content[content_index].transcript += delta;
195 | item.formatted.transcript += delta;
196 | return { item, delta: { transcript: delta } };
197 | },
198 | 'response.audio.delta': (event) => {
199 | const { item_id, content_index, delta } = event;
200 | const item = this.itemLookup[item_id];
201 | if (!item) {
202 | throw new Error(`response.audio.delta: Item "${item_id}" not found`);
203 | }
204 | // This never gets renderered, we care about the file data instead
205 | // item.content[content_index].audio += delta;
206 | const arrayBuffer = RealtimeUtils.base64ToArrayBuffer(delta);
207 | const appendValues = new Int16Array(arrayBuffer);
208 | item.formatted.audio = RealtimeUtils.mergeInt16Arrays(
209 | item.formatted.audio,
210 | appendValues,
211 | );
212 | return { item, delta: { audio: appendValues } };
213 | },
214 | 'response.text.delta': (event) => {
215 | const { item_id, content_index, delta } = event;
216 | const item = this.itemLookup[item_id];
217 | if (!item) {
218 | throw new Error(`response.text.delta: Item "${item_id}" not found`);
219 | }
220 | item.content[content_index].text += delta;
221 | item.formatted.text += delta;
222 | return { item, delta: { text: delta } };
223 | },
224 | 'response.function_call_arguments.delta': (event) => {
225 | const { item_id, delta } = event;
226 | const item = this.itemLookup[item_id];
227 | if (!item) {
228 | throw new Error(
229 | `response.function_call_arguments.delta: Item "${item_id}" not found`,
230 | );
231 | }
232 | item.arguments += delta;
233 | item.formatted.tool.arguments += delta;
234 | return { item, delta: { arguments: delta } };
235 | },
236 | };
237 |
238 | /**
239 | * Create a new RealtimeConversation instance
240 | * @returns {RealtimeConversation}
241 | */
242 | constructor() {
243 | this.clear();
244 | }
245 |
246 | /**
247 | * Clears the conversation history and resets to default
248 | * @returns {true}
249 | */
250 | clear() {
251 | this.itemLookup = {};
252 | this.items = [];
253 | this.responseLookup = {};
254 | this.responses = [];
255 | this.queuedSpeechItems = {};
256 | this.queuedTranscriptItems = {};
257 | this.queuedInputAudio = null;
258 | return true;
259 | }
260 |
261 | /**
262 | * Queue input audio for manual speech event
263 | * @param {Int16Array} inputAudio
264 | * @returns {Int16Array}
265 | */
266 | queueInputAudio(inputAudio) {
267 | this.queuedInputAudio = inputAudio;
268 | return inputAudio;
269 | }
270 |
271 | /**
272 | * Process an event from the WebSocket server and compose items
273 | * @param {Object} event
274 | * @param {...any} args
275 | * @returns {item: import('./client.js').ItemType | null, delta: ItemContentDeltaType | null}
276 | */
277 | processEvent(event, ...args) {
278 | if (!event.event_id) {
279 | console.error(event);
280 | throw new Error(`Missing "event_id" on event`);
281 | }
282 | if (!event.type) {
283 | console.error(event);
284 | throw new Error(`Missing "type" on event`);
285 | }
286 | const eventProcessor = this.EventProcessors[event.type];
287 | if (!eventProcessor) {
288 | throw new Error(
289 | `Missing conversation event processor for "${event.type}"`,
290 | );
291 | }
292 | return eventProcessor.call(this, event, ...args);
293 | }
294 |
295 | /**
296 | * Retrieves a item by id
297 | * @param {string} id
298 | * @returns {import('./client.js').ItemType}
299 | */
300 | getItem(id) {
301 | return this.itemLookup[id] || null;
302 | }
303 |
304 | /**
305 | * Retrieves all items in the conversation
306 | * @returns {import('./client.js').ItemType[]}
307 | */
308 | getItems() {
309 | return this.items.slice();
310 | }
311 | }
312 |
--------------------------------------------------------------------------------
/src/lib/realtime-api-beta/lib/event_handler.js:
--------------------------------------------------------------------------------
1 | /**
2 | * EventHandler callback
3 | * @typedef {(event: {[key: string]: any}): void} EventHandlerCallbackType
4 | */
5 |
6 | const sleep = (t) => new Promise((r) => setTimeout(() => r(), t));
7 |
8 | /**
9 | * Inherited class for RealtimeAPI and RealtimeClient
10 | * Adds basic event handling
11 | * @class
12 | */
13 | export class RealtimeEventHandler {
14 | /**
15 | * Create a new RealtimeEventHandler instance
16 | * @returns {RealtimeEventHandler}
17 | */
18 | constructor() {
19 | this.eventHandlers = {};
20 | this.nextEventHandlers = {};
21 | }
22 |
23 | /**
24 | * Clears all event handlers
25 | * @returns {true}
26 | */
27 | clearEventHandlers() {
28 | this.eventHandlers = {};
29 | this.nextEventHandlers = {};
30 | return true;
31 | }
32 |
33 | /**
34 | * Listen to specific events
35 | * @param {string} eventName The name of the event to listen to
36 | * @param {EventHandlerCallbackType} callback Code to execute on event
37 | * @returns {EventHandlerCallbackType}
38 | */
39 | on(eventName, callback) {
40 | this.eventHandlers[eventName] = this.eventHandlers[eventName] || [];
41 | this.eventHandlers[eventName].push(callback);
42 | callback;
43 | }
44 |
45 | /**
46 | * Listen for the next event of a specified type
47 | * @param {string} eventName The name of the event to listen to
48 | * @param {EventHandlerCallbackType} callback Code to execute on event
49 | * @returns {EventHandlerCallbackType}
50 | */
51 | onNext(eventName, callback) {
52 | this.nextEventHandlers[eventName] = this.nextEventHandlers[eventName] || [];
53 | this.nextEventHandlers[eventName].push(callback);
54 | }
55 |
56 | /**
57 | * Turns off event listening for specific events
58 | * Calling without a callback will remove all listeners for the event
59 | * @param {string} eventName
60 | * @param {EventHandlerCallbackType} [callback]
61 | * @returns {true}
62 | */
63 | off(eventName, callback) {
64 | const handlers = this.eventHandlers[eventName] || [];
65 | if (callback) {
66 | const index = handlers.indexOf(callback);
67 | if (index === -1) {
68 | throw new Error(
69 | `Could not turn off specified event listener for "${eventName}": not found as a listener`,
70 | );
71 | }
72 | handlers.splice(index, 1);
73 | } else {
74 | delete this.eventHandlers[eventName];
75 | }
76 | return true;
77 | }
78 |
79 | /**
80 | * Turns off event listening for the next event of a specific type
81 | * Calling without a callback will remove all listeners for the next event
82 | * @param {string} eventName
83 | * @param {EventHandlerCallbackType} [callback]
84 | * @returns {true}
85 | */
86 | offNext(eventName, callback) {
87 | const nextHandlers = this.nextEventHandlers[eventName] || [];
88 | if (callback) {
89 | const index = nextHandlers.indexOf(callback);
90 | if (index === -1) {
91 | throw new Error(
92 | `Could not turn off specified next event listener for "${eventName}": not found as a listener`,
93 | );
94 | }
95 | nextHandlers.splice(index, 1);
96 | } else {
97 | delete this.nextEventHandlers[eventName];
98 | }
99 | return true;
100 | }
101 |
102 | /**
103 | * Waits for next event of a specific type and returns the payload
104 | * @param {string} eventName
105 | * @param {number|null} [timeout]
106 | * @returns {Promise<{[key: string]: any}|null>}
107 | */
108 | async waitForNext(eventName, timeout = null) {
109 | const t0 = Date.now();
110 | let nextEvent;
111 | this.onNext(eventName, (event) => (nextEvent = event));
112 | while (!nextEvent) {
113 | if (timeout) {
114 | const t1 = Date.now();
115 | if (t1 - t0 > timeout) {
116 | return null;
117 | }
118 | }
119 | await sleep(1);
120 | }
121 | return nextEvent;
122 | }
123 |
124 | /**
125 | * Executes all events in the order they were added, with .on() event handlers executing before .onNext() handlers
126 | * @param {string} eventName
127 | * @param {any} event
128 | * @returns {true}
129 | */
130 | dispatch(eventName, event) {
131 | const handlers = [].concat(this.eventHandlers[eventName] || []);
132 | for (const handler of handlers) {
133 | handler(event);
134 | }
135 | const nextHandlers = [].concat(this.nextEventHandlers[eventName] || []);
136 | for (const nextHandler of nextHandlers) {
137 | nextHandler(event);
138 | }
139 | delete this.nextEventHandlers[eventName];
140 | return true;
141 | }
142 | }
143 |
--------------------------------------------------------------------------------
/src/lib/realtime-api-beta/lib/utils.js:
--------------------------------------------------------------------------------
1 | const atob = globalThis.atob;
2 | const btoa = globalThis.btoa;
3 |
4 | /**
5 | * Basic utilities for the RealtimeAPI
6 | * @class
7 | */
8 | export class RealtimeUtils {
9 | /**
10 | * Converts Float32Array of amplitude data to ArrayBuffer in Int16Array format
11 | * @param {Float32Array} float32Array
12 | * @returns {ArrayBuffer}
13 | */
14 | static floatTo16BitPCM(float32Array) {
15 | const buffer = new ArrayBuffer(float32Array.length * 2);
16 | const view = new DataView(buffer);
17 | let offset = 0;
18 | for (let i = 0; i < float32Array.length; i++, offset += 2) {
19 | let s = Math.max(-1, Math.min(1, float32Array[i]));
20 | view.setInt16(offset, s < 0 ? s * 0x8000 : s * 0x7fff, true);
21 | }
22 | return buffer;
23 | }
24 |
25 | /**
26 | * Converts a base64 string to an ArrayBuffer
27 | * @param {string} base64
28 | * @returns {ArrayBuffer}
29 | */
30 | static base64ToArrayBuffer(base64) {
31 | const binaryString = atob(base64);
32 | const len = binaryString.length;
33 | const bytes = new Uint8Array(len);
34 | for (let i = 0; i < len; i++) {
35 | bytes[i] = binaryString.charCodeAt(i);
36 | }
37 | return bytes.buffer;
38 | }
39 |
40 | /**
41 | * Converts an ArrayBuffer, Int16Array or Float32Array to a base64 string
42 | * @param {ArrayBuffer|Int16Array|Float32Array} arrayBuffer
43 | * @returns {string}
44 | */
45 | static arrayBufferToBase64(arrayBuffer) {
46 | if (arrayBuffer instanceof Float32Array) {
47 | arrayBuffer = this.floatTo16BitPCM(arrayBuffer);
48 | } else if (arrayBuffer instanceof Int16Array) {
49 | arrayBuffer = arrayBuffer.buffer;
50 | }
51 | let binary = '';
52 | let bytes = new Uint8Array(arrayBuffer);
53 | const chunkSize = 0x8000; // 32KB chunk size
54 | for (let i = 0; i < bytes.length; i += chunkSize) {
55 | let chunk = bytes.subarray(i, i + chunkSize);
56 | binary += String.fromCharCode.apply(null, chunk);
57 | }
58 | return btoa(binary);
59 | }
60 |
61 | /**
62 | * Merge two Int16Arrays from Int16Arrays or ArrayBuffers
63 | * @param {ArrayBuffer|Int16Array} left
64 | * @param {ArrayBuffer|Int16Array} right
65 | * @returns {Int16Array}
66 | */
67 | static mergeInt16Arrays(left, right) {
68 | if (left instanceof ArrayBuffer) {
69 | left = new Int16Array(left);
70 | }
71 | if (right instanceof ArrayBuffer) {
72 | right = new Int16Array(right);
73 | }
74 | if (!(left instanceof Int16Array) || !(right instanceof Int16Array)) {
75 | throw new Error(`Both items must be Int16Array`);
76 | }
77 | const newValues = new Int16Array(left.length + right.length);
78 | for (let i = 0; i < left.length; i++) {
79 | newValues[i] = left[i];
80 | }
81 | for (let j = 0; j < right.length; j++) {
82 | newValues[left.length + j] = right[j];
83 | }
84 | return newValues;
85 | }
86 |
87 | /**
88 | * Generates an id to send with events and messages
89 | * @param {string} prefix
90 | * @param {number} [length]
91 | * @returns {string}
92 | */
93 | static generateId(prefix, length = 21) {
94 | // base58; non-repeating chars
95 | const chars = '123456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz';
96 | const str = Array(length - prefix.length)
97 | .fill(0)
98 | .map((_) => chars[Math.floor(Math.random() * chars.length)])
99 | .join('');
100 | return `${prefix}${str}`;
101 | }
102 | }
103 |
--------------------------------------------------------------------------------
/src/lib/wavtools/dist/index.d.ts:
--------------------------------------------------------------------------------
1 | import { AudioAnalysis } from './lib/analysis/audio_analysis.js';
2 | import { WavPacker } from './lib/wav_packer.js';
3 | import { WavStreamPlayer } from './lib/wav_stream_player.js';
4 | import { WavRecorder } from './lib/wav_recorder.js';
5 | export { AudioAnalysis, WavPacker, WavStreamPlayer, WavRecorder };
6 | //# sourceMappingURL=index.d.ts.map
--------------------------------------------------------------------------------
/src/lib/wavtools/dist/index.d.ts.map:
--------------------------------------------------------------------------------
1 | {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../index.js"],"names":[],"mappings":"8BAC8B,kCAAkC;0BADtC,qBAAqB;gCAEf,4BAA4B;4BAChC,uBAAuB"}
--------------------------------------------------------------------------------
/src/lib/wavtools/dist/lib/analysis/audio_analysis.d.ts:
--------------------------------------------------------------------------------
1 | /**
2 | * Output of AudioAnalysis for the frequency domain of the audio
3 | * @typedef {Object} AudioAnalysisOutputType
4 | * @property {Float32Array} values Amplitude of this frequency between {0, 1} inclusive
5 | * @property {number[]} frequencies Raw frequency bucket values
6 | * @property {string[]} labels Labels for the frequency bucket values
7 | */
8 | /**
9 | * Analyzes audio for visual output
10 | * @class
11 | */
12 | export class AudioAnalysis {
13 | /**
14 | * Retrieves frequency domain data from an AnalyserNode adjusted to a decibel range
15 | * returns human-readable formatting and labels
16 | * @param {AnalyserNode} analyser
17 | * @param {number} sampleRate
18 | * @param {Float32Array} [fftResult]
19 | * @param {"frequency"|"music"|"voice"} [analysisType]
20 | * @param {number} [minDecibels] default -100
21 | * @param {number} [maxDecibels] default -30
22 | * @returns {AudioAnalysisOutputType}
23 | */
24 | static getFrequencies(analyser: AnalyserNode, sampleRate: number, fftResult?: Float32Array, analysisType?: "frequency" | "music" | "voice", minDecibels?: number, maxDecibels?: number): AudioAnalysisOutputType;
25 | /**
26 | * Creates a new AudioAnalysis instance for an HTMLAudioElement
27 | * @param {HTMLAudioElement} audioElement
28 | * @param {AudioBuffer|null} [audioBuffer] If provided, will cache all frequency domain data from the buffer
29 | * @returns {AudioAnalysis}
30 | */
31 | constructor(audioElement: HTMLAudioElement, audioBuffer?: AudioBuffer | null);
32 | fftResults: any[];
33 | audio: HTMLAudioElement;
34 | context: any;
35 | analyser: any;
36 | sampleRate: any;
37 | audioBuffer: any;
38 | /**
39 | * Gets the current frequency domain data from the playing audio track
40 | * @param {"frequency"|"music"|"voice"} [analysisType]
41 | * @param {number} [minDecibels] default -100
42 | * @param {number} [maxDecibels] default -30
43 | * @returns {AudioAnalysisOutputType}
44 | */
45 | getFrequencies(analysisType?: "frequency" | "music" | "voice", minDecibels?: number, maxDecibels?: number): AudioAnalysisOutputType;
46 | /**
47 | * Resume the internal AudioContext if it was suspended due to the lack of
48 | * user interaction when the AudioAnalysis was instantiated.
49 | * @returns {Promise}
50 | */
51 | resumeIfSuspended(): Promise;
52 | }
53 | /**
54 | * Output of AudioAnalysis for the frequency domain of the audio
55 | */
56 | export type AudioAnalysisOutputType = {
57 | /**
58 | * Amplitude of this frequency between {0, 1} inclusive
59 | */
60 | values: Float32Array;
61 | /**
62 | * Raw frequency bucket values
63 | */
64 | frequencies: number[];
65 | /**
66 | * Labels for the frequency bucket values
67 | */
68 | labels: string[];
69 | };
70 | //# sourceMappingURL=audio_analysis.d.ts.map
--------------------------------------------------------------------------------
/src/lib/wavtools/dist/lib/analysis/audio_analysis.d.ts.map:
--------------------------------------------------------------------------------
1 | {"version":3,"file":"audio_analysis.d.ts","sourceRoot":"","sources":["../../../lib/analysis/audio_analysis.js"],"names":[],"mappings":"AAOA;;;;;;GAMG;AAEH;;;GAGG;AACH;IACE;;;;;;;;;;OAUG;IACH,gCARW,YAAY,cACZ,MAAM,cACN,YAAY,iBACZ,WAAW,GAAC,OAAO,GAAC,OAAO,gBAC3B,MAAM,gBACN,MAAM,GACJ,uBAAuB,CAwDnC;IAED;;;;;OAKG;IACH,0BAJW,gBAAgB,gBAChB,WAAW,GAAC,IAAI,EAkE1B;IA9DC,kBAAoB;IA2ClB,wBAAyB;IACzB,aAAkC;IAClC,cAAwB;IACxB,gBAA4B;IAC5B,iBAA8B;IAiBlC;;;;;;OAMG;IACH,8BALW,WAAW,GAAC,OAAO,GAAC,OAAO,gBAC3B,MAAM,gBACN,MAAM,GACJ,uBAAuB,CAwBnC;IAED;;;;OAIG;IACH,qBAFa,OAAO,CAAC,IAAI,CAAC,CAOzB;CACF;;;;;;;;YA9La,YAAY;;;;iBACZ,MAAM,EAAE;;;;YACR,MAAM,EAAE"}
--------------------------------------------------------------------------------
/src/lib/wavtools/dist/lib/analysis/constants.d.ts:
--------------------------------------------------------------------------------
1 | /**
2 | * All note frequencies from 1st to 8th octave
3 | * in format "A#8" (A#, 8th octave)
4 | */
5 | export const noteFrequencies: any[];
6 | export const noteFrequencyLabels: any[];
7 | export const voiceFrequencies: any[];
8 | export const voiceFrequencyLabels: any[];
9 | //# sourceMappingURL=constants.d.ts.map
--------------------------------------------------------------------------------
/src/lib/wavtools/dist/lib/analysis/constants.d.ts.map:
--------------------------------------------------------------------------------
1 | {"version":3,"file":"constants.d.ts","sourceRoot":"","sources":["../../../lib/analysis/constants.js"],"names":[],"mappings":"AA6BA;;;GAGG;AACH,oCAAkC;AAClC,wCAAsC;AActC,qCAKG;AACH,yCAKG"}
--------------------------------------------------------------------------------
/src/lib/wavtools/dist/lib/wav_packer.d.ts:
--------------------------------------------------------------------------------
1 | /**
2 | * Raw wav audio file contents
3 | * @typedef {Object} WavPackerAudioType
4 | * @property {Blob} blob
5 | * @property {string} url
6 | * @property {number} channelCount
7 | * @property {number} sampleRate
8 | * @property {number} duration
9 | */
10 | /**
11 | * Utility class for assembling PCM16 "audio/wav" data
12 | * @class
13 | */
14 | export class WavPacker {
15 | /**
16 | * Converts Float32Array of amplitude data to ArrayBuffer in Int16Array format
17 | * @param {Float32Array} float32Array
18 | * @returns {ArrayBuffer}
19 | */
20 | static floatTo16BitPCM(float32Array: Float32Array): ArrayBuffer;
21 | /**
22 | * Concatenates two ArrayBuffers
23 | * @param {ArrayBuffer} leftBuffer
24 | * @param {ArrayBuffer} rightBuffer
25 | * @returns {ArrayBuffer}
26 | */
27 | static mergeBuffers(leftBuffer: ArrayBuffer, rightBuffer: ArrayBuffer): ArrayBuffer;
28 | /**
29 | * Packs data into an Int16 format
30 | * @private
31 | * @param {number} size 0 = 1x Int16, 1 = 2x Int16
32 | * @param {number} arg value to pack
33 | * @returns
34 | */
35 | private _packData;
36 | /**
37 | * Packs audio into "audio/wav" Blob
38 | * @param {number} sampleRate
39 | * @param {{bitsPerSample: number, channels: Array, data: Int16Array}} audio
40 | * @returns {WavPackerAudioType}
41 | */
42 | pack(sampleRate: number, audio: {
43 | bitsPerSample: number;
44 | channels: Array;
45 | data: Int16Array;
46 | }): WavPackerAudioType;
47 | }
48 | /**
49 | * Raw wav audio file contents
50 | */
51 | export type WavPackerAudioType = {
52 | blob: Blob;
53 | url: string;
54 | channelCount: number;
55 | sampleRate: number;
56 | duration: number;
57 | };
58 | //# sourceMappingURL=wav_packer.d.ts.map
--------------------------------------------------------------------------------
/src/lib/wavtools/dist/lib/wav_packer.d.ts.map:
--------------------------------------------------------------------------------
1 | {"version":3,"file":"wav_packer.d.ts","sourceRoot":"","sources":["../../lib/wav_packer.js"],"names":[],"mappings":"AAAA;;;;;;;;GAQG;AAEH;;;GAGG;AACH;IACE;;;;OAIG;IACH,qCAHW,YAAY,GACV,WAAW,CAWvB;IAED;;;;;OAKG;IACH,gCAJW,WAAW,eACX,WAAW,GACT,WAAW,CASvB;IAED;;;;;;OAMG;IACH,kBAKC;IAED;;;;;OAKG;IACH,iBAJW,MAAM,SACN;QAAC,aAAa,EAAE,MAAM,CAAC;QAAC,QAAQ,EAAE,KAAK,CAAC,YAAY,CAAC,CAAC;QAAC,IAAI,EAAE,UAAU,CAAA;KAAC,GACtE,kBAAkB,CA6C9B;CACF;;;;;UA3Ga,IAAI;SACJ,MAAM;kBACN,MAAM;gBACN,MAAM;cACN,MAAM"}
--------------------------------------------------------------------------------
/src/lib/wavtools/dist/lib/wav_recorder.d.ts:
--------------------------------------------------------------------------------
1 | /**
2 | * Decodes audio into a wav file
3 | * @typedef {Object} DecodedAudioType
4 | * @property {Blob} blob
5 | * @property {string} url
6 | * @property {Float32Array} values
7 | * @property {AudioBuffer} audioBuffer
8 | */
9 | /**
10 | * Records live stream of user audio as PCM16 "audio/wav" data
11 | * @class
12 | */
13 | export class WavRecorder {
14 | /**
15 | * Decodes audio data from multiple formats to a Blob, url, Float32Array and AudioBuffer
16 | * @param {Blob|Float32Array|Int16Array|ArrayBuffer|number[]} audioData
17 | * @param {number} sampleRate
18 | * @param {number} fromSampleRate
19 | * @returns {Promise}
20 | */
21 | static decode(audioData: Blob | Float32Array | Int16Array | ArrayBuffer | number[], sampleRate?: number, fromSampleRate?: number): Promise;
22 | /**
23 | * Create a new WavRecorder instance
24 | * @param {{sampleRate?: number, outputToSpeakers?: boolean, debug?: boolean}} [options]
25 | * @returns {WavRecorder}
26 | */
27 | constructor({ sampleRate, outputToSpeakers, debug, }?: {
28 | sampleRate?: number;
29 | outputToSpeakers?: boolean;
30 | debug?: boolean;
31 | });
32 | scriptSrc: any;
33 | sampleRate: number;
34 | outputToSpeakers: boolean;
35 | debug: boolean;
36 | _deviceChangeCallback: () => Promise;
37 | _devices: any[];
38 | stream: any;
39 | processor: any;
40 | source: any;
41 | node: any;
42 | recording: boolean;
43 | _lastEventId: number;
44 | eventReceipts: {};
45 | eventTimeout: number;
46 | _chunkProcessor: () => void;
47 | _chunkProcessorBuffer: {
48 | raw: ArrayBuffer;
49 | mono: ArrayBuffer;
50 | };
51 | /**
52 | * Logs data in debug mode
53 | * @param {...any} arguments
54 | * @returns {true}
55 | */
56 | log(...args: any[]): true;
57 | /**
58 | * Retrieves the current sampleRate for the recorder
59 | * @returns {number}
60 | */
61 | getSampleRate(): number;
62 | /**
63 | * Retrieves the current status of the recording
64 | * @returns {"ended"|"paused"|"recording"}
65 | */
66 | getStatus(): "ended" | "paused" | "recording";
67 | /**
68 | * Sends an event to the AudioWorklet
69 | * @private
70 | * @param {string} name
71 | * @param {{[key: string]: any}} data
72 | * @param {AudioWorkletNode} [_processor]
73 | * @returns {Promise<{[key: string]: any}>}
74 | */
75 | private _event;
76 | /**
77 | * Sets device change callback, remove if callback provided is `null`
78 | * @param {(Array): void|null} callback
79 | * @returns {true}
80 | */
81 | listenForDeviceChange(callback: any): true;
82 | /**
83 | * Manually request permission to use the microphone
84 | * @returns {Promise}
85 | */
86 | requestPermission(): Promise;
87 | /**
88 | * List all eligible devices for recording, will request permission to use microphone
89 | * @returns {Promise>}
90 | */
91 | listDevices(): Promise>;
94 | /**
95 | * Begins a recording session and requests microphone permissions if not already granted
96 | * Microphone recording indicator will appear on browser tab but status will be "paused"
97 | * @param {string} [deviceId] if no device provided, default device will be used
98 | * @returns {Promise}
99 | */
100 | begin(deviceId?: string): Promise;
101 | analyser: any;
102 | /**
103 | * Gets the current frequency domain data from the recording track
104 | * @param {"frequency"|"music"|"voice"} [analysisType]
105 | * @param {number} [minDecibels] default -100
106 | * @param {number} [maxDecibels] default -30
107 | * @returns {import('./analysis/audio_analysis.js').AudioAnalysisOutputType}
108 | */
109 | getFrequencies(analysisType?: "frequency" | "music" | "voice", minDecibels?: number, maxDecibels?: number): import("./analysis/audio_analysis.js").AudioAnalysisOutputType;
110 | /**
111 | * Pauses the recording
112 | * Keeps microphone stream open but halts storage of audio
113 | * @returns {Promise}
114 | */
115 | pause(): Promise;
116 | /**
117 | * Start recording stream and storing to memory from the connected audio source
118 | * @param {(data: { mono: Int16Array; raw: Int16Array }) => any} [chunkProcessor]
119 | * @param {number} [chunkSize] chunkProcessor will not be triggered until this size threshold met in mono audio
120 | * @returns {Promise}
121 | */
122 | record(chunkProcessor?: (data: {
123 | mono: Int16Array;
124 | raw: Int16Array;
125 | }) => any, chunkSize?: number): Promise;
126 | _chunkProcessorSize: number;
127 | /**
128 | * Clears the audio buffer, empties stored recording
129 | * @returns {Promise}
130 | */
131 | clear(): Promise;
132 | /**
133 | * Reads the current audio stream data
134 | * @returns {Promise<{meanValues: Float32Array, channels: Array}>}
135 | */
136 | read(): Promise<{
137 | meanValues: Float32Array;
138 | channels: Array;
139 | }>;
140 | /**
141 | * Saves the current audio stream to a file
142 | * @param {boolean} [force] Force saving while still recording
143 | * @returns {Promise}
144 | */
145 | save(force?: boolean): Promise;
146 | /**
147 | * Ends the current recording session and saves the result
148 | * @returns {Promise}
149 | */
150 | end(): Promise;
151 | /**
152 | * Performs a full cleanup of WavRecorder instance
153 | * Stops actively listening via microphone and removes existing listeners
154 | * @returns {Promise}
155 | */
156 | quit(): Promise;
157 | }
158 | /**
159 | * Decodes audio into a wav file
160 | */
161 | export type DecodedAudioType = {
162 | blob: Blob;
163 | url: string;
164 | values: Float32Array;
165 | audioBuffer: AudioBuffer;
166 | };
167 | //# sourceMappingURL=wav_recorder.d.ts.map
--------------------------------------------------------------------------------
/src/lib/wavtools/dist/lib/wav_recorder.d.ts.map:
--------------------------------------------------------------------------------
1 | {"version":3,"file":"wav_recorder.d.ts","sourceRoot":"","sources":["../../lib/wav_recorder.js"],"names":[],"mappings":"AAIA;;;;;;;GAOG;AAEH;;;GAGG;AACH;IAsCE;;;;;;OAMG;IACH,yBALW,IAAI,GAAC,YAAY,GAAC,UAAU,GAAC,WAAW,GAAC,MAAM,EAAE,eACjD,MAAM,mBACN,MAAM,GACJ,OAAO,CAAC,gBAAgB,CAAC,CAqErC;IA/GD;;;;OAIG;IACH,uDAHW;QAAC,UAAU,CAAC,EAAE,MAAM,CAAC;QAAC,gBAAgB,CAAC,EAAE,OAAO,CAAC;QAAC,KAAK,CAAC,EAAE,OAAO,CAAA;KAAC,EAiC5E;IAxBC,eAAkC;IAElC,mBAA4B;IAC5B,0BAAwC;IACxC,eAAoB;IACpB,2CAAiC;IACjC,gBAAkB;IAElB,YAAkB;IAClB,eAAqB;IACrB,YAAkB;IAClB,UAAgB;IAChB,mBAAsB;IAEtB,qBAAqB;IACrB,kBAAuB;IACvB,qBAAwB;IAExB,4BAA+B;IAE/B;;;MAGC;IA+EH;;;;OAIG;IACH,qBAFa,IAAI,CAOhB;IAED;;;OAGG;IACH,iBAFa,MAAM,CAIlB;IAED;;;OAGG;IACH,aAFa,OAAO,GAAC,QAAQ,GAAC,WAAW,CAUxC;IAED;;;;;;;OAOG;IACH,eAqBC;IAED;;;;OAIG;IACH,sCAFa,IAAI,CAmChB;IAED;;;OAGG;IACH,qBAFa,OAAO,CAAC,IAAI,CAAC,CAoBzB;IAED;;;OAGG;IACH,eAFa,OAAO,CAAC,KAAK,CAAC,eAAe,GAAG;QAAC,OAAO,EAAE,OAAO,CAAA;KAAC,CAAC,CAAC,CA8BhE;IAED;;;;;OAKG;IACH,iBAHW,MAAM,GACJ,OAAO,CAAC,IAAI,CAAC,CAkFzB;IAHC,cAAwB;IAK1B;;;;;;OAMG;IACH,8BALW,WAAW,GAAC,OAAO,GAAC,OAAO,gBAC3B,MAAM,gBACN,MAAM,GACJ,OAAO,8BAA8B,EAAE,uBAAuB,CAkB1E;IAED;;;;OAIG;IACH,SAFa,OAAO,CAAC,IAAI,CAAC,CAezB;IAED;;;;;OAKG;IACH,wBAJW,CAAC,IAAI,EAAE;QAAE,IAAI,EAAE,UAAU,CAAC;QAAC,GAAG,EAAE,UAAU,CAAA;KAAE,KAAK,GAAG,cACpD,MAAM,GACJ,OAAO,CAAC,IAAI,CAAC,CAoBzB;IATC,4BAAoC;IAWtC;;;OAGG;IACH,SAFa,OAAO,CAAC,IAAI,CAAC,CAQzB;IAED;;;OAGG;IACH,QAFa,OAAO,CAAC;QAAC,UAAU,EAAE,YAAY,CAAC;QAAC,QAAQ,EAAE,KAAK,CAAC,YAAY,CAAC,CAAA;KAAC,CAAC,CAS9E;IAED;;;;OAIG;IACH,aAHW,OAAO,GACL,OAAO,CAAC,OAAO,iBAAiB,EAAE,kBAAkB,CAAC,CAgBjE;IAED;;;OAGG;IACH,OAFa,OAAO,CAAC,OAAO,iBAAiB,EAAE,kBAAkB,CAAC,CA8BjE;IAED;;;;OAIG;IACH,QAFa,OAAO,CAAC,IAAI,CAAC,CAQzB;CACF;;;;;UA1hBa,IAAI;SACJ,MAAM;YACN,YAAY;iBACZ,WAAW"}
--------------------------------------------------------------------------------
/src/lib/wavtools/dist/lib/wav_stream_player.d.ts:
--------------------------------------------------------------------------------
1 | /**
2 | * Plays audio streams received in raw PCM16 chunks from the browser
3 | * @class
4 | */
5 | export class WavStreamPlayer {
6 | /**
7 | * Creates a new WavStreamPlayer instance
8 | * @param {{sampleRate?: number}} options
9 | * @returns {WavStreamPlayer}
10 | */
11 | constructor({ sampleRate }?: {
12 | sampleRate?: number;
13 | });
14 | scriptSrc: any;
15 | sampleRate: number;
16 | context: any;
17 | stream: any;
18 | analyser: any;
19 | trackSampleOffsets: {};
20 | interruptedTrackIds: {};
21 | /**
22 | * Connects the audio context and enables output to speakers
23 | * @returns {Promise}
24 | */
25 | connect(): Promise;
26 | /**
27 | * Gets the current frequency domain data from the playing track
28 | * @param {"frequency"|"music"|"voice"} [analysisType]
29 | * @param {number} [minDecibels] default -100
30 | * @param {number} [maxDecibels] default -30
31 | * @returns {import('./analysis/audio_analysis.js').AudioAnalysisOutputType}
32 | */
33 | getFrequencies(analysisType?: "frequency" | "music" | "voice", minDecibels?: number, maxDecibels?: number): import("./analysis/audio_analysis.js").AudioAnalysisOutputType;
34 | /**
35 | * Starts audio streaming
36 | * @private
37 | * @returns {Promise}
38 | */
39 | private _start;
40 | /**
41 | * Adds 16BitPCM data to the currently playing audio stream
42 | * You can add chunks beyond the current play point and they will be queued for play
43 | * @param {ArrayBuffer|Int16Array} arrayBuffer
44 | * @param {string} [trackId]
45 | * @returns {Int16Array}
46 | */
47 | add16BitPCM(arrayBuffer: ArrayBuffer | Int16Array, trackId?: string): Int16Array;
48 | /**
49 | * Gets the offset (sample count) of the currently playing stream
50 | * @param {boolean} [interrupt]
51 | * @returns {{trackId: string|null, offset: number, currentTime: number}}
52 | */
53 | getTrackSampleOffset(interrupt?: boolean): {
54 | trackId: string | null;
55 | offset: number;
56 | currentTime: number;
57 | };
58 | /**
59 | * Strips the current stream and returns the sample offset of the audio
60 | * @param {boolean} [interrupt]
61 | * @returns {{trackId: string|null, offset: number, currentTime: number}}
62 | */
63 | interrupt(): {
64 | trackId: string | null;
65 | offset: number;
66 | currentTime: number;
67 | };
68 | }
69 | //# sourceMappingURL=wav_stream_player.d.ts.map
--------------------------------------------------------------------------------
/src/lib/wavtools/dist/lib/wav_stream_player.d.ts.map:
--------------------------------------------------------------------------------
1 | {"version":3,"file":"wav_stream_player.d.ts","sourceRoot":"","sources":["../../lib/wav_stream_player.js"],"names":[],"mappings":"AAGA;;;GAGG;AACH;IACE;;;;OAIG;IACH,6BAHW;QAAC,UAAU,CAAC,EAAE,MAAM,CAAA;KAAC,EAW/B;IAPC,eAAmC;IACnC,mBAA4B;IAC5B,aAAmB;IACnB,YAAkB;IAClB,cAAoB;IACpB,uBAA4B;IAC5B,wBAA6B;IAG/B;;;OAGG;IACH,WAFa,OAAO,CAAC,IAAI,CAAC,CAkBzB;IAED;;;;;;OAMG;IACH,8BALW,WAAW,GAAC,OAAO,GAAC,OAAO,gBAC3B,MAAM,gBACN,MAAM,GACJ,OAAO,8BAA8B,EAAE,uBAAuB,CAkB1E;IAED;;;;OAIG;IACH,eAkBC;IAED;;;;;;OAMG;IACH,yBAJW,WAAW,GAAC,UAAU,YACtB,MAAM,GACJ,UAAU,CAqBtB;IAED;;;;OAIG;IACH,iCAHW,OAAO,GACL;QAAC,OAAO,EAAE,MAAM,GAAC,IAAI,CAAC;QAAC,MAAM,EAAE,MAAM,CAAC;QAAC,WAAW,EAAE,MAAM,CAAA;KAAC,CAqBvE;IAED;;;;OAIG;IACH,aAFa;QAAC,OAAO,EAAE,MAAM,GAAC,IAAI,CAAC;QAAC,MAAM,EAAE,MAAM,CAAC;QAAC,WAAW,EAAE,MAAM,CAAA;KAAC,CAIvE;CACF"}
--------------------------------------------------------------------------------
/src/lib/wavtools/dist/lib/worklets/audio_processor.d.ts:
--------------------------------------------------------------------------------
1 | export const AudioProcessorSrc: any;
2 | //# sourceMappingURL=audio_processor.d.ts.map
--------------------------------------------------------------------------------
/src/lib/wavtools/dist/lib/worklets/audio_processor.d.ts.map:
--------------------------------------------------------------------------------
1 | {"version":3,"file":"audio_processor.d.ts","sourceRoot":"","sources":["../../../lib/worklets/audio_processor.js"],"names":[],"mappings":"AAqNA,oCAAqC"}
--------------------------------------------------------------------------------
/src/lib/wavtools/dist/lib/worklets/stream_processor.d.ts:
--------------------------------------------------------------------------------
1 | export const StreamProcessorWorklet: "\nclass StreamProcessor extends AudioWorkletProcessor {\n constructor() {\n super();\n this.hasStarted = false;\n this.hasInterrupted = false;\n this.outputBuffers = [];\n this.bufferLength = 128;\n this.write = { buffer: new Float32Array(this.bufferLength), trackId: null };\n this.writeOffset = 0;\n this.trackSampleOffsets = {};\n this.port.onmessage = (event) => {\n if (event.data) {\n const payload = event.data;\n if (payload.event === 'write') {\n const int16Array = payload.buffer;\n const float32Array = new Float32Array(int16Array.length);\n for (let i = 0; i < int16Array.length; i++) {\n float32Array[i] = int16Array[i] / 0x8000; // Convert Int16 to Float32\n }\n this.writeData(float32Array, payload.trackId);\n } else if (\n payload.event === 'offset' ||\n payload.event === 'interrupt'\n ) {\n const requestId = payload.requestId;\n const trackId = this.write.trackId;\n const offset = this.trackSampleOffsets[trackId] || 0;\n this.port.postMessage({\n event: 'offset',\n requestId,\n trackId,\n offset,\n });\n if (payload.event === 'interrupt') {\n this.hasInterrupted = true;\n }\n } else {\n throw new Error(`Unhandled event \"${payload.event}\"`);\n }\n }\n };\n }\n\n writeData(float32Array, trackId = null) {\n let { buffer } = this.write;\n let offset = this.writeOffset;\n for (let i = 0; i < float32Array.length; i++) {\n buffer[offset++] = float32Array[i];\n if (offset >= buffer.length) {\n this.outputBuffers.push(this.write);\n this.write = { buffer: new Float32Array(this.bufferLength), trackId };\n buffer = this.write.buffer;\n offset = 0;\n }\n }\n this.writeOffset = offset;\n return true;\n }\n\n process(inputs, outputs, parameters) {\n const output = outputs[0];\n const outputChannelData = output[0];\n const outputBuffers = this.outputBuffers;\n if (this.hasInterrupted) {\n this.port.postMessage({ event: 'stop' });\n return false;\n } else if (outputBuffers.length) {\n this.hasStarted = true;\n const { buffer, trackId } = outputBuffers.shift();\n for (let i = 0; i < outputChannelData.length; i++) {\n outputChannelData[i] = buffer[i] || 0;\n }\n if (trackId) {\n this.trackSampleOffsets[trackId] =\n this.trackSampleOffsets[trackId] || 0;\n this.trackSampleOffsets[trackId] += buffer.length;\n }\n return true;\n } else if (this.hasStarted) {\n this.port.postMessage({ event: 'stop' });\n return false;\n } else {\n return true;\n }\n }\n}\n\nregisterProcessor('stream_processor', StreamProcessor);\n";
2 | export const StreamProcessorSrc: any;
3 | //# sourceMappingURL=stream_processor.d.ts.map
--------------------------------------------------------------------------------
/src/lib/wavtools/dist/lib/worklets/stream_processor.d.ts.map:
--------------------------------------------------------------------------------
1 | {"version":3,"file":"stream_processor.d.ts","sourceRoot":"","sources":["../../../lib/worklets/stream_processor.js"],"names":[],"mappings":"AAAA,q4FAyFE;AAMF,qCAAsC"}
--------------------------------------------------------------------------------
/src/lib/wavtools/index.js:
--------------------------------------------------------------------------------
1 | import { WavPacker } from './lib/wav_packer.js';
2 | import { AudioAnalysis } from './lib/analysis/audio_analysis.js';
3 | import { WavStreamPlayer } from './lib/wav_stream_player.js';
4 | import { WavRecorder } from './lib/wav_recorder.js';
5 |
6 | export { AudioAnalysis, WavPacker, WavStreamPlayer, WavRecorder };
7 |
--------------------------------------------------------------------------------
/src/lib/wavtools/lib/analysis/audio_analysis.js:
--------------------------------------------------------------------------------
1 | import {
2 | noteFrequencies,
3 | noteFrequencyLabels,
4 | voiceFrequencies,
5 | voiceFrequencyLabels,
6 | } from './constants.js';
7 |
8 | /**
9 | * Output of AudioAnalysis for the frequency domain of the audio
10 | * @typedef {Object} AudioAnalysisOutputType
11 | * @property {Float32Array} values Amplitude of this frequency between {0, 1} inclusive
12 | * @property {number[]} frequencies Raw frequency bucket values
13 | * @property {string[]} labels Labels for the frequency bucket values
14 | */
15 |
16 | /**
17 | * Analyzes audio for visual output
18 | * @class
19 | */
20 | export class AudioAnalysis {
21 | /**
22 | * Retrieves frequency domain data from an AnalyserNode adjusted to a decibel range
23 | * returns human-readable formatting and labels
24 | * @param {AnalyserNode} analyser
25 | * @param {number} sampleRate
26 | * @param {Float32Array} [fftResult]
27 | * @param {"frequency"|"music"|"voice"} [analysisType]
28 | * @param {number} [minDecibels] default -100
29 | * @param {number} [maxDecibels] default -30
30 | * @returns {AudioAnalysisOutputType}
31 | */
32 | static getFrequencies(
33 | analyser,
34 | sampleRate,
35 | fftResult,
36 | analysisType = 'frequency',
37 | minDecibels = -100,
38 | maxDecibels = -30,
39 | ) {
40 | if (!fftResult) {
41 | fftResult = new Float32Array(analyser.frequencyBinCount);
42 | analyser.getFloatFrequencyData(fftResult);
43 | }
44 | const nyquistFrequency = sampleRate / 2;
45 | const frequencyStep = (1 / fftResult.length) * nyquistFrequency;
46 | let outputValues;
47 | let frequencies;
48 | let labels;
49 | if (analysisType === 'music' || analysisType === 'voice') {
50 | const useFrequencies =
51 | analysisType === 'voice' ? voiceFrequencies : noteFrequencies;
52 | const aggregateOutput = Array(useFrequencies.length).fill(minDecibels);
53 | for (let i = 0; i < fftResult.length; i++) {
54 | const frequency = i * frequencyStep;
55 | const amplitude = fftResult[i];
56 | for (let n = useFrequencies.length - 1; n >= 0; n--) {
57 | if (frequency > useFrequencies[n]) {
58 | aggregateOutput[n] = Math.max(aggregateOutput[n], amplitude);
59 | break;
60 | }
61 | }
62 | }
63 | outputValues = aggregateOutput;
64 | frequencies =
65 | analysisType === 'voice' ? voiceFrequencies : noteFrequencies;
66 | labels =
67 | analysisType === 'voice' ? voiceFrequencyLabels : noteFrequencyLabels;
68 | } else {
69 | outputValues = Array.from(fftResult);
70 | frequencies = outputValues.map((_, i) => frequencyStep * i);
71 | labels = frequencies.map((f) => `${f.toFixed(2)} Hz`);
72 | }
73 | // We normalize to {0, 1}
74 | const normalizedOutput = outputValues.map((v) => {
75 | return Math.max(
76 | 0,
77 | Math.min((v - minDecibels) / (maxDecibels - minDecibels), 1),
78 | );
79 | });
80 | const values = new Float32Array(normalizedOutput);
81 | return {
82 | values,
83 | frequencies,
84 | labels,
85 | };
86 | }
87 |
88 | /**
89 | * Creates a new AudioAnalysis instance for an HTMLAudioElement
90 | * @param {HTMLAudioElement} audioElement
91 | * @param {AudioBuffer|null} [audioBuffer] If provided, will cache all frequency domain data from the buffer
92 | * @returns {AudioAnalysis}
93 | */
94 | constructor(audioElement, audioBuffer = null) {
95 | this.fftResults = [];
96 | if (audioBuffer) {
97 | /**
98 | * Modified from
99 | * https://stackoverflow.com/questions/75063715/using-the-web-audio-api-to-analyze-a-song-without-playing
100 | *
101 | * We do this to populate FFT values for the audio if provided an `audioBuffer`
102 | * The reason to do this is that Safari fails when using `createMediaElementSource`
103 | * This has a non-zero RAM cost so we only opt-in to run it on Safari, Chrome is better
104 | */
105 | const { length, sampleRate } = audioBuffer;
106 | const offlineAudioContext = new OfflineAudioContext({
107 | length,
108 | sampleRate,
109 | });
110 | const source = offlineAudioContext.createBufferSource();
111 | source.buffer = audioBuffer;
112 | const analyser = offlineAudioContext.createAnalyser();
113 | analyser.fftSize = 8192;
114 | analyser.smoothingTimeConstant = 0.1;
115 | source.connect(analyser);
116 | // limit is :: 128 / sampleRate;
117 | // but we just want 60fps - cuts ~1s from 6MB to 1MB of RAM
118 | const renderQuantumInSeconds = 1 / 60;
119 | const durationInSeconds = length / sampleRate;
120 | const analyze = (index) => {
121 | const suspendTime = renderQuantumInSeconds * index;
122 | if (suspendTime < durationInSeconds) {
123 | offlineAudioContext.suspend(suspendTime).then(() => {
124 | const fftResult = new Float32Array(analyser.frequencyBinCount);
125 | analyser.getFloatFrequencyData(fftResult);
126 | this.fftResults.push(fftResult);
127 | analyze(index + 1);
128 | });
129 | }
130 | if (index === 1) {
131 | offlineAudioContext.startRendering();
132 | } else {
133 | offlineAudioContext.resume();
134 | }
135 | };
136 | source.start(0);
137 | analyze(1);
138 | this.audio = audioElement;
139 | this.context = offlineAudioContext;
140 | this.analyser = analyser;
141 | this.sampleRate = sampleRate;
142 | this.audioBuffer = audioBuffer;
143 | } else {
144 | const audioContext = new AudioContext();
145 | const track = audioContext.createMediaElementSource(audioElement);
146 | const analyser = audioContext.createAnalyser();
147 | analyser.fftSize = 8192;
148 | analyser.smoothingTimeConstant = 0.1;
149 | track.connect(analyser);
150 | analyser.connect(audioContext.destination);
151 | this.audio = audioElement;
152 | this.context = audioContext;
153 | this.analyser = analyser;
154 | this.sampleRate = this.context.sampleRate;
155 | this.audioBuffer = null;
156 | }
157 | }
158 |
159 | /**
160 | * Gets the current frequency domain data from the playing audio track
161 | * @param {"frequency"|"music"|"voice"} [analysisType]
162 | * @param {number} [minDecibels] default -100
163 | * @param {number} [maxDecibels] default -30
164 | * @returns {AudioAnalysisOutputType}
165 | */
166 | getFrequencies(
167 | analysisType = 'frequency',
168 | minDecibels = -100,
169 | maxDecibels = -30,
170 | ) {
171 | let fftResult = null;
172 | if (this.audioBuffer && this.fftResults.length) {
173 | const pct = this.audio.currentTime / this.audio.duration;
174 | const index = Math.min(
175 | (pct * this.fftResults.length) | 0,
176 | this.fftResults.length - 1,
177 | );
178 | fftResult = this.fftResults[index];
179 | }
180 | return AudioAnalysis.getFrequencies(
181 | this.analyser,
182 | this.sampleRate,
183 | fftResult,
184 | analysisType,
185 | minDecibels,
186 | maxDecibels,
187 | );
188 | }
189 |
190 | /**
191 | * Resume the internal AudioContext if it was suspended due to the lack of
192 | * user interaction when the AudioAnalysis was instantiated.
193 | * @returns {Promise}
194 | */
195 | async resumeIfSuspended() {
196 | if (this.context.state === 'suspended') {
197 | await this.context.resume();
198 | }
199 | return true;
200 | }
201 | }
202 |
203 | globalThis.AudioAnalysis = AudioAnalysis;
204 |
--------------------------------------------------------------------------------
/src/lib/wavtools/lib/analysis/constants.js:
--------------------------------------------------------------------------------
1 | /**
2 | * Constants for help with visualization
3 | * Helps map frequency ranges from Fast Fourier Transform
4 | * to human-interpretable ranges, notably music ranges and
5 | * human vocal ranges.
6 | */
7 |
8 | // Eighth octave frequencies
9 | const octave8Frequencies = [
10 | 4186.01, 4434.92, 4698.63, 4978.03, 5274.04, 5587.65, 5919.91, 6271.93,
11 | 6644.88, 7040.0, 7458.62, 7902.13,
12 | ];
13 |
14 | // Labels for each of the above frequencies
15 | const octave8FrequencyLabels = [
16 | 'C',
17 | 'C#',
18 | 'D',
19 | 'D#',
20 | 'E',
21 | 'F',
22 | 'F#',
23 | 'G',
24 | 'G#',
25 | 'A',
26 | 'A#',
27 | 'B',
28 | ];
29 |
30 | /**
31 | * All note frequencies from 1st to 8th octave
32 | * in format "A#8" (A#, 8th octave)
33 | */
34 | export const noteFrequencies = [];
35 | export const noteFrequencyLabels = [];
36 | for (let i = 1; i <= 8; i++) {
37 | for (let f = 0; f < octave8Frequencies.length; f++) {
38 | const freq = octave8Frequencies[f];
39 | noteFrequencies.push(freq / Math.pow(2, 8 - i));
40 | noteFrequencyLabels.push(octave8FrequencyLabels[f] + i);
41 | }
42 | }
43 |
44 | /**
45 | * Subset of the note frequencies between 32 and 2000 Hz
46 | * 6 octave range: C1 to B6
47 | */
48 | const voiceFrequencyRange = [32.0, 2000.0];
49 | export const voiceFrequencies = noteFrequencies.filter((_, i) => {
50 | return (
51 | noteFrequencies[i] > voiceFrequencyRange[0] &&
52 | noteFrequencies[i] < voiceFrequencyRange[1]
53 | );
54 | });
55 | export const voiceFrequencyLabels = noteFrequencyLabels.filter((_, i) => {
56 | return (
57 | noteFrequencies[i] > voiceFrequencyRange[0] &&
58 | noteFrequencies[i] < voiceFrequencyRange[1]
59 | );
60 | });
61 |
--------------------------------------------------------------------------------
/src/lib/wavtools/lib/wav_packer.js:
--------------------------------------------------------------------------------
1 | /**
2 | * Raw wav audio file contents
3 | * @typedef {Object} WavPackerAudioType
4 | * @property {Blob} blob
5 | * @property {string} url
6 | * @property {number} channelCount
7 | * @property {number} sampleRate
8 | * @property {number} duration
9 | */
10 |
11 | /**
12 | * Utility class for assembling PCM16 "audio/wav" data
13 | * @class
14 | */
15 | export class WavPacker {
16 | /**
17 | * Converts Float32Array of amplitude data to ArrayBuffer in Int16Array format
18 | * @param {Float32Array} float32Array
19 | * @returns {ArrayBuffer}
20 | */
21 | static floatTo16BitPCM(float32Array) {
22 | const buffer = new ArrayBuffer(float32Array.length * 2);
23 | const view = new DataView(buffer);
24 | let offset = 0;
25 | for (let i = 0; i < float32Array.length; i++, offset += 2) {
26 | let s = Math.max(-1, Math.min(1, float32Array[i]));
27 | view.setInt16(offset, s < 0 ? s * 0x8000 : s * 0x7fff, true);
28 | }
29 | return buffer;
30 | }
31 |
32 | /**
33 | * Concatenates two ArrayBuffers
34 | * @param {ArrayBuffer} leftBuffer
35 | * @param {ArrayBuffer} rightBuffer
36 | * @returns {ArrayBuffer}
37 | */
38 | static mergeBuffers(leftBuffer, rightBuffer) {
39 | const tmpArray = new Uint8Array(
40 | leftBuffer.byteLength + rightBuffer.byteLength
41 | );
42 | tmpArray.set(new Uint8Array(leftBuffer), 0);
43 | tmpArray.set(new Uint8Array(rightBuffer), leftBuffer.byteLength);
44 | return tmpArray.buffer;
45 | }
46 |
47 | /**
48 | * Packs data into an Int16 format
49 | * @private
50 | * @param {number} size 0 = 1x Int16, 1 = 2x Int16
51 | * @param {number} arg value to pack
52 | * @returns
53 | */
54 | _packData(size, arg) {
55 | return [
56 | new Uint8Array([arg, arg >> 8]),
57 | new Uint8Array([arg, arg >> 8, arg >> 16, arg >> 24]),
58 | ][size];
59 | }
60 |
61 | /**
62 | * Packs audio into "audio/wav" Blob
63 | * @param {number} sampleRate
64 | * @param {{bitsPerSample: number, channels: Array, data: Int16Array}} audio
65 | * @returns {WavPackerAudioType}
66 | */
67 | pack(sampleRate, audio) {
68 | if (!audio?.bitsPerSample) {
69 | throw new Error(`Missing "bitsPerSample"`);
70 | } else if (!audio?.channels) {
71 | throw new Error(`Missing "channels"`);
72 | } else if (!audio?.data) {
73 | throw new Error(`Missing "data"`);
74 | }
75 | const { bitsPerSample, channels, data } = audio;
76 | const output = [
77 | // Header
78 | 'RIFF',
79 | this._packData(
80 | 1,
81 | 4 + (8 + 24) /* chunk 1 length */ + (8 + 8) /* chunk 2 length */
82 | ), // Length
83 | 'WAVE',
84 | // chunk 1
85 | 'fmt ', // Sub-chunk identifier
86 | this._packData(1, 16), // Chunk length
87 | this._packData(0, 1), // Audio format (1 is linear quantization)
88 | this._packData(0, channels.length),
89 | this._packData(1, sampleRate),
90 | this._packData(1, (sampleRate * channels.length * bitsPerSample) / 8), // Byte rate
91 | this._packData(0, (channels.length * bitsPerSample) / 8),
92 | this._packData(0, bitsPerSample),
93 | // chunk 2
94 | 'data', // Sub-chunk identifier
95 | this._packData(
96 | 1,
97 | (channels[0].length * channels.length * bitsPerSample) / 8
98 | ), // Chunk length
99 | data,
100 | ];
101 | const blob = new Blob(output, { type: 'audio/mpeg' });
102 | const url = URL.createObjectURL(blob);
103 | return {
104 | blob,
105 | url,
106 | channelCount: channels.length,
107 | sampleRate,
108 | duration: data.byteLength / (channels.length * sampleRate * 2),
109 | };
110 | }
111 | }
112 |
113 | globalThis.WavPacker = WavPacker;
114 |
--------------------------------------------------------------------------------
/src/lib/wavtools/lib/wav_recorder.js:
--------------------------------------------------------------------------------
1 | import { AudioProcessorSrc } from './worklets/audio_processor.js';
2 | import { AudioAnalysis } from './analysis/audio_analysis.js';
3 | import { WavPacker } from './wav_packer.js';
4 |
5 | /**
6 | * Decodes audio into a wav file
7 | * @typedef {Object} DecodedAudioType
8 | * @property {Blob} blob
9 | * @property {string} url
10 | * @property {Float32Array} values
11 | * @property {AudioBuffer} audioBuffer
12 | */
13 |
14 | /**
15 | * Records live stream of user audio as PCM16 "audio/wav" data
16 | * @class
17 | */
18 | export class WavRecorder {
19 | /**
20 | * Create a new WavRecorder instance
21 | * @param {{sampleRate?: number, outputToSpeakers?: boolean, debug?: boolean}} [options]
22 | * @returns {WavRecorder}
23 | */
24 | constructor({
25 | sampleRate = 44100,
26 | outputToSpeakers = false,
27 | debug = false,
28 | } = {}) {
29 | // Script source
30 | this.scriptSrc = AudioProcessorSrc;
31 | // Config
32 | this.sampleRate = sampleRate;
33 | this.outputToSpeakers = outputToSpeakers;
34 | this.debug = !!debug;
35 | this._deviceChangeCallback = null;
36 | this._devices = [];
37 | // State variables
38 | this.stream = null;
39 | this.processor = null;
40 | this.source = null;
41 | this.node = null;
42 | this.recording = false;
43 | // Event handling with AudioWorklet
44 | this._lastEventId = 0;
45 | this.eventReceipts = {};
46 | this.eventTimeout = 5000;
47 | // Process chunks of audio
48 | this._chunkProcessor = () => {};
49 | this._chunkProcessorSize = void 0;
50 | this._chunkProcessorBuffer = {
51 | raw: new ArrayBuffer(0),
52 | mono: new ArrayBuffer(0),
53 | };
54 | }
55 |
56 | /**
57 | * Decodes audio data from multiple formats to a Blob, url, Float32Array and AudioBuffer
58 | * @param {Blob|Float32Array|Int16Array|ArrayBuffer|number[]} audioData
59 | * @param {number} sampleRate
60 | * @param {number} fromSampleRate
61 | * @returns {Promise}
62 | */
63 | static async decode(audioData, sampleRate = 44100, fromSampleRate = -1) {
64 | const context = new AudioContext({ sampleRate });
65 | let arrayBuffer;
66 | let blob;
67 | if (audioData instanceof Blob) {
68 | if (fromSampleRate !== -1) {
69 | throw new Error(
70 | `Can not specify "fromSampleRate" when reading from Blob`,
71 | );
72 | }
73 | blob = audioData;
74 | arrayBuffer = await blob.arrayBuffer();
75 | } else if (audioData instanceof ArrayBuffer) {
76 | if (fromSampleRate !== -1) {
77 | throw new Error(
78 | `Can not specify "fromSampleRate" when reading from ArrayBuffer`,
79 | );
80 | }
81 | arrayBuffer = audioData;
82 | blob = new Blob([arrayBuffer], { type: 'audio/wav' });
83 | } else {
84 | let float32Array;
85 | let data;
86 | if (audioData instanceof Int16Array) {
87 | data = audioData;
88 | float32Array = new Float32Array(audioData.length);
89 | for (let i = 0; i < audioData.length; i++) {
90 | float32Array[i] = audioData[i] / 0x8000;
91 | }
92 | } else if (audioData instanceof Float32Array) {
93 | float32Array = audioData;
94 | } else if (audioData instanceof Array) {
95 | float32Array = new Float32Array(audioData);
96 | } else {
97 | throw new Error(
98 | `"audioData" must be one of: Blob, Float32Arrray, Int16Array, ArrayBuffer, Array`,
99 | );
100 | }
101 | if (fromSampleRate === -1) {
102 | throw new Error(
103 | `Must specify "fromSampleRate" when reading from Float32Array, In16Array or Array`,
104 | );
105 | } else if (fromSampleRate < 3000) {
106 | throw new Error(`Minimum "fromSampleRate" is 3000 (3kHz)`);
107 | }
108 | if (!data) {
109 | data = WavPacker.floatTo16BitPCM(float32Array);
110 | }
111 | const audio = {
112 | bitsPerSample: 16,
113 | channels: [float32Array],
114 | data,
115 | };
116 | const packer = new WavPacker();
117 | const result = packer.pack(fromSampleRate, audio);
118 | blob = result.blob;
119 | arrayBuffer = await blob.arrayBuffer();
120 | }
121 | const audioBuffer = await context.decodeAudioData(arrayBuffer);
122 | const values = audioBuffer.getChannelData(0);
123 | const url = URL.createObjectURL(blob);
124 | return {
125 | blob,
126 | url,
127 | values,
128 | audioBuffer,
129 | };
130 | }
131 |
132 | /**
133 | * Logs data in debug mode
134 | * @param {...any} arguments
135 | * @returns {true}
136 | */
137 | log() {
138 | if (this.debug) {
139 | this.log(...arguments);
140 | }
141 | return true;
142 | }
143 |
144 | /**
145 | * Retrieves the current sampleRate for the recorder
146 | * @returns {number}
147 | */
148 | getSampleRate() {
149 | return this.sampleRate;
150 | }
151 |
152 | /**
153 | * Retrieves the current status of the recording
154 | * @returns {"ended"|"paused"|"recording"}
155 | */
156 | getStatus() {
157 | if (!this.processor) {
158 | return 'ended';
159 | } else if (!this.recording) {
160 | return 'paused';
161 | } else {
162 | return 'recording';
163 | }
164 | }
165 |
166 | /**
167 | * Sends an event to the AudioWorklet
168 | * @private
169 | * @param {string} name
170 | * @param {{[key: string]: any}} data
171 | * @param {AudioWorkletNode} [_processor]
172 | * @returns {Promise<{[key: string]: any}>}
173 | */
174 | async _event(name, data = {}, _processor = null) {
175 | _processor = _processor || this.processor;
176 | if (!_processor) {
177 | throw new Error('Can not send events without recording first');
178 | }
179 | const message = {
180 | event: name,
181 | id: this._lastEventId++,
182 | data,
183 | };
184 | _processor.port.postMessage(message);
185 | const t0 = new Date().valueOf();
186 | while (!this.eventReceipts[message.id]) {
187 | if (new Date().valueOf() - t0 > this.eventTimeout) {
188 | throw new Error(`Timeout waiting for "${name}" event`);
189 | }
190 | await new Promise((res) => setTimeout(() => res(true), 1));
191 | }
192 | const payload = this.eventReceipts[message.id];
193 | delete this.eventReceipts[message.id];
194 | return payload;
195 | }
196 |
197 | /**
198 | * Sets device change callback, remove if callback provided is `null`
199 | * @param {(Array): void|null} callback
200 | * @returns {true}
201 | */
202 | listenForDeviceChange(callback) {
203 | if (callback === null && this._deviceChangeCallback) {
204 | navigator.mediaDevices.removeEventListener(
205 | 'devicechange',
206 | this._deviceChangeCallback,
207 | );
208 | this._deviceChangeCallback = null;
209 | } else if (callback !== null) {
210 | // Basically a debounce; we only want this called once when devices change
211 | // And we only want the most recent callback() to be executed
212 | // if a few are operating at the same time
213 | let lastId = 0;
214 | let lastDevices = [];
215 | const serializeDevices = (devices) =>
216 | devices
217 | .map((d) => d.deviceId)
218 | .sort()
219 | .join(',');
220 | const cb = async () => {
221 | let id = ++lastId;
222 | const devices = await this.listDevices();
223 | if (id === lastId) {
224 | if (serializeDevices(lastDevices) !== serializeDevices(devices)) {
225 | lastDevices = devices;
226 | callback(devices.slice());
227 | }
228 | }
229 | };
230 | navigator.mediaDevices.addEventListener('devicechange', cb);
231 | cb();
232 | this._deviceChangeCallback = cb;
233 | }
234 | return true;
235 | }
236 |
237 | /**
238 | * Manually request permission to use the microphone
239 | * @returns {Promise}
240 | */
241 | async requestPermission() {
242 | const permissionStatus = await navigator.permissions.query({
243 | name: 'microphone',
244 | });
245 | if (permissionStatus.state === 'denied') {
246 | window.alert('You must grant microphone access to use this feature.');
247 | } else if (permissionStatus.state === 'prompt') {
248 | try {
249 | const stream = await navigator.mediaDevices.getUserMedia({
250 | audio: true,
251 | });
252 | const tracks = stream.getTracks();
253 | tracks.forEach((track) => track.stop());
254 | } catch (e) {
255 | window.alert('You must grant microphone access to use this feature.');
256 | }
257 | }
258 | return true;
259 | }
260 |
261 | /**
262 | * List all eligible devices for recording, will request permission to use microphone
263 | * @returns {Promise>}
264 | */
265 | async listDevices() {
266 | if (
267 | !navigator.mediaDevices ||
268 | !('enumerateDevices' in navigator.mediaDevices)
269 | ) {
270 | throw new Error('Could not request user devices');
271 | }
272 | await this.requestPermission();
273 | const devices = await navigator.mediaDevices.enumerateDevices();
274 | const audioDevices = devices.filter(
275 | (device) => device.kind === 'audioinput',
276 | );
277 | const defaultDeviceIndex = audioDevices.findIndex(
278 | (device) => device.deviceId === 'default',
279 | );
280 | const deviceList = [];
281 | if (defaultDeviceIndex !== -1) {
282 | let defaultDevice = audioDevices.splice(defaultDeviceIndex, 1)[0];
283 | let existingIndex = audioDevices.findIndex(
284 | (device) => device.groupId === defaultDevice.groupId,
285 | );
286 | if (existingIndex !== -1) {
287 | defaultDevice = audioDevices.splice(existingIndex, 1)[0];
288 | }
289 | defaultDevice.default = true;
290 | deviceList.push(defaultDevice);
291 | }
292 | return deviceList.concat(audioDevices);
293 | }
294 |
295 | /**
296 | * Begins a recording session and requests microphone permissions if not already granted
297 | * Microphone recording indicator will appear on browser tab but status will be "paused"
298 | * @param {string} [deviceId] if no device provided, default device will be used
299 | * @returns {Promise}
300 | */
301 | async begin(deviceId) {
302 | if (this.processor) {
303 | throw new Error(
304 | `Already connected: please call .end() to start a new session`,
305 | );
306 | }
307 |
308 | if (
309 | !navigator.mediaDevices ||
310 | !('getUserMedia' in navigator.mediaDevices)
311 | ) {
312 | throw new Error('Could not request user media');
313 | }
314 | try {
315 | const config = { audio: true };
316 | if (deviceId) {
317 | config.audio = { deviceId: { exact: deviceId } };
318 | }
319 | this.stream = await navigator.mediaDevices.getUserMedia(config);
320 | } catch (err) {
321 | throw new Error('Could not start media stream');
322 | }
323 |
324 | const context = new AudioContext({ sampleRate: this.sampleRate });
325 | const source = context.createMediaStreamSource(this.stream);
326 | // Load and execute the module script.
327 | try {
328 | await context.audioWorklet.addModule(this.scriptSrc);
329 | } catch (e) {
330 | console.error(e);
331 | throw new Error(`Could not add audioWorklet module: ${this.scriptSrc}`);
332 | }
333 | const processor = new AudioWorkletNode(context, 'audio_processor');
334 | processor.port.onmessage = (e) => {
335 | const { event, id, data } = e.data;
336 | if (event === 'receipt') {
337 | this.eventReceipts[id] = data;
338 | } else if (event === 'chunk') {
339 | if (this._chunkProcessorSize) {
340 | const buffer = this._chunkProcessorBuffer;
341 | this._chunkProcessorBuffer = {
342 | raw: WavPacker.mergeBuffers(buffer.raw, data.raw),
343 | mono: WavPacker.mergeBuffers(buffer.mono, data.mono),
344 | };
345 | if (
346 | this._chunkProcessorBuffer.mono.byteLength >=
347 | this._chunkProcessorSize
348 | ) {
349 | this._chunkProcessor(this._chunkProcessorBuffer);
350 | this._chunkProcessorBuffer = {
351 | raw: new ArrayBuffer(0),
352 | mono: new ArrayBuffer(0),
353 | };
354 | }
355 | } else {
356 | this._chunkProcessor(data);
357 | }
358 | }
359 | };
360 |
361 | const node = source.connect(processor);
362 | const analyser = context.createAnalyser();
363 | analyser.fftSize = 8192;
364 | analyser.smoothingTimeConstant = 0.1;
365 | node.connect(analyser);
366 | if (this.outputToSpeakers) {
367 | // eslint-disable-next-line no-console
368 | console.warn(
369 | 'Warning: Output to speakers may affect sound quality,\n' +
370 | 'especially due to system audio feedback preventative measures.\n' +
371 | 'use only for debugging',
372 | );
373 | analyser.connect(context.destination);
374 | }
375 |
376 | this.source = source;
377 | this.node = node;
378 | this.analyser = analyser;
379 | this.processor = processor;
380 | return true;
381 | }
382 |
383 | /**
384 | * Gets the current frequency domain data from the recording track
385 | * @param {"frequency"|"music"|"voice"} [analysisType]
386 | * @param {number} [minDecibels] default -100
387 | * @param {number} [maxDecibels] default -30
388 | * @returns {import('./analysis/audio_analysis.js').AudioAnalysisOutputType}
389 | */
390 | getFrequencies(
391 | analysisType = 'frequency',
392 | minDecibels = -100,
393 | maxDecibels = -30,
394 | ) {
395 | if (!this.processor) {
396 | throw new Error('Session ended: please call .begin() first');
397 | }
398 | return AudioAnalysis.getFrequencies(
399 | this.analyser,
400 | this.sampleRate,
401 | null,
402 | analysisType,
403 | minDecibels,
404 | maxDecibels,
405 | );
406 | }
407 |
408 | /**
409 | * Pauses the recording
410 | * Keeps microphone stream open but halts storage of audio
411 | * @returns {Promise}
412 | */
413 | async pause() {
414 | if (!this.processor) {
415 | throw new Error('Session ended: please call .begin() first');
416 | } else if (!this.recording) {
417 | throw new Error('Already paused: please call .record() first');
418 | }
419 | if (this._chunkProcessorBuffer.raw.byteLength) {
420 | this._chunkProcessor(this._chunkProcessorBuffer);
421 | }
422 | this.log('Pausing ...');
423 | await this._event('stop');
424 | this.recording = false;
425 | return true;
426 | }
427 |
428 | /**
429 | * Start recording stream and storing to memory from the connected audio source
430 | * @param {(data: { mono: Int16Array; raw: Int16Array }) => any} [chunkProcessor]
431 | * @param {number} [chunkSize] chunkProcessor will not be triggered until this size threshold met in mono audio
432 | * @returns {Promise}
433 | */
434 | async record(chunkProcessor = () => {}, chunkSize = 8192) {
435 | if (!this.processor) {
436 | throw new Error('Session ended: please call .begin() first');
437 | } else if (this.recording) {
438 | throw new Error('Already recording: please call .pause() first');
439 | } else if (typeof chunkProcessor !== 'function') {
440 | throw new Error(`chunkProcessor must be a function`);
441 | }
442 | this._chunkProcessor = chunkProcessor;
443 | this._chunkProcessorSize = chunkSize;
444 | this._chunkProcessorBuffer = {
445 | raw: new ArrayBuffer(0),
446 | mono: new ArrayBuffer(0),
447 | };
448 | this.log('Recording ...');
449 | await this._event('start');
450 | this.recording = true;
451 | return true;
452 | }
453 |
454 | /**
455 | * Clears the audio buffer, empties stored recording
456 | * @returns {Promise}
457 | */
458 | async clear() {
459 | if (!this.processor) {
460 | throw new Error('Session ended: please call .begin() first');
461 | }
462 | await this._event('clear');
463 | return true;
464 | }
465 |
466 | /**
467 | * Reads the current audio stream data
468 | * @returns {Promise<{meanValues: Float32Array, channels: Array}>}
469 | */
470 | async read() {
471 | if (!this.processor) {
472 | throw new Error('Session ended: please call .begin() first');
473 | }
474 | this.log('Reading ...');
475 | const result = await this._event('read');
476 | return result;
477 | }
478 |
479 | /**
480 | * Saves the current audio stream to a file
481 | * @param {boolean} [force] Force saving while still recording
482 | * @returns {Promise}
483 | */
484 | async save(force = false) {
485 | if (!this.processor) {
486 | throw new Error('Session ended: please call .begin() first');
487 | }
488 | if (!force && this.recording) {
489 | throw new Error(
490 | 'Currently recording: please call .pause() first, or call .save(true) to force',
491 | );
492 | }
493 | this.log('Exporting ...');
494 | const exportData = await this._event('export');
495 | const packer = new WavPacker();
496 | const result = packer.pack(this.sampleRate, exportData.audio);
497 | return result;
498 | }
499 |
500 | /**
501 | * Ends the current recording session and saves the result
502 | * @returns {Promise}
503 | */
504 | async end() {
505 | if (!this.processor) {
506 | throw new Error('Session ended: please call .begin() first');
507 | }
508 |
509 | const _processor = this.processor;
510 |
511 | this.log('Stopping ...');
512 | await this._event('stop');
513 | this.recording = false;
514 | const tracks = this.stream.getTracks();
515 | tracks.forEach((track) => track.stop());
516 |
517 | this.log('Exporting ...');
518 | const exportData = await this._event('export', {}, _processor);
519 |
520 | this.processor.disconnect();
521 | this.source.disconnect();
522 | this.node.disconnect();
523 | this.analyser.disconnect();
524 | this.stream = null;
525 | this.processor = null;
526 | this.source = null;
527 | this.node = null;
528 |
529 | const packer = new WavPacker();
530 | const result = packer.pack(this.sampleRate, exportData.audio);
531 | return result;
532 | }
533 |
534 | /**
535 | * Performs a full cleanup of WavRecorder instance
536 | * Stops actively listening via microphone and removes existing listeners
537 | * @returns {Promise}
538 | */
539 | async quit() {
540 | this.listenForDeviceChange(null);
541 | if (this.processor) {
542 | await this.end();
543 | }
544 | return true;
545 | }
546 | }
547 |
548 | globalThis.WavRecorder = WavRecorder;
549 |
--------------------------------------------------------------------------------
/src/lib/wavtools/lib/wav_stream_player.js:
--------------------------------------------------------------------------------
1 | import { StreamProcessorSrc } from './worklets/stream_processor.js';
2 | import { AudioAnalysis } from './analysis/audio_analysis.js';
3 |
4 | /**
5 | * Plays audio streams received in raw PCM16 chunks from the browser
6 | * @class
7 | */
8 | export class WavStreamPlayer {
9 | /**
10 | * Creates a new WavStreamPlayer instance
11 | * @param {{sampleRate?: number}} options
12 | * @returns {WavStreamPlayer}
13 | */
14 | constructor({ sampleRate = 44100 } = {}) {
15 | this.scriptSrc = StreamProcessorSrc;
16 | this.sampleRate = sampleRate;
17 | this.context = null;
18 | this.stream = null;
19 | this.analyser = null;
20 | this.trackSampleOffsets = {};
21 | this.interruptedTrackIds = {};
22 | }
23 |
24 | /**
25 | * Connects the audio context and enables output to speakers
26 | * @returns {Promise}
27 | */
28 | async connect() {
29 | this.context = new AudioContext({ sampleRate: this.sampleRate });
30 | if (this.context.state === 'suspended') {
31 | await this.context.resume();
32 | }
33 | try {
34 | await this.context.audioWorklet.addModule(this.scriptSrc);
35 | } catch (e) {
36 | console.error(e);
37 | throw new Error(`Could not add audioWorklet module: ${this.scriptSrc}`);
38 | }
39 | const analyser = this.context.createAnalyser();
40 | analyser.fftSize = 8192;
41 | analyser.smoothingTimeConstant = 0.1;
42 | this.analyser = analyser;
43 | return true;
44 | }
45 |
46 | /**
47 | * Gets the current frequency domain data from the playing track
48 | * @param {"frequency"|"music"|"voice"} [analysisType]
49 | * @param {number} [minDecibels] default -100
50 | * @param {number} [maxDecibels] default -30
51 | * @returns {import('./analysis/audio_analysis.js').AudioAnalysisOutputType}
52 | */
53 | getFrequencies(
54 | analysisType = 'frequency',
55 | minDecibels = -100,
56 | maxDecibels = -30
57 | ) {
58 | if (!this.analyser) {
59 | throw new Error('Not connected, please call .connect() first');
60 | }
61 | return AudioAnalysis.getFrequencies(
62 | this.analyser,
63 | this.sampleRate,
64 | null,
65 | analysisType,
66 | minDecibels,
67 | maxDecibels
68 | );
69 | }
70 |
71 | /**
72 | * Starts audio streaming
73 | * @private
74 | * @returns {Promise}
75 | */
76 | _start() {
77 | const streamNode = new AudioWorkletNode(this.context, 'stream_processor');
78 | streamNode.connect(this.context.destination);
79 | streamNode.port.onmessage = (e) => {
80 | const { event } = e.data;
81 | if (event === 'stop') {
82 | streamNode.disconnect();
83 | this.stream = null;
84 | } else if (event === 'offset') {
85 | const { requestId, trackId, offset } = e.data;
86 | const currentTime = offset / this.sampleRate;
87 | this.trackSampleOffsets[requestId] = { trackId, offset, currentTime };
88 | }
89 | };
90 | this.analyser.disconnect();
91 | streamNode.connect(this.analyser);
92 | this.stream = streamNode;
93 | return true;
94 | }
95 |
96 | /**
97 | * Adds 16BitPCM data to the currently playing audio stream
98 | * You can add chunks beyond the current play point and they will be queued for play
99 | * @param {ArrayBuffer|Int16Array} arrayBuffer
100 | * @param {string} [trackId]
101 | * @returns {Int16Array}
102 | */
103 | add16BitPCM(arrayBuffer, trackId = 'default') {
104 | if (typeof trackId !== 'string') {
105 | throw new Error(`trackId must be a string`);
106 | } else if (this.interruptedTrackIds[trackId]) {
107 | return;
108 | }
109 | if (!this.stream) {
110 | this._start();
111 | }
112 | let buffer;
113 | if (arrayBuffer instanceof Int16Array) {
114 | buffer = arrayBuffer;
115 | } else if (arrayBuffer instanceof ArrayBuffer) {
116 | buffer = new Int16Array(arrayBuffer);
117 | } else {
118 | throw new Error(`argument must be Int16Array or ArrayBuffer`);
119 | }
120 | this.stream.port.postMessage({ event: 'write', buffer, trackId });
121 | return buffer;
122 | }
123 |
124 | /**
125 | * Gets the offset (sample count) of the currently playing stream
126 | * @param {boolean} [interrupt]
127 | * @returns {{trackId: string|null, offset: number, currentTime: number}}
128 | */
129 | async getTrackSampleOffset(interrupt = false) {
130 | if (!this.stream) {
131 | return null;
132 | }
133 | const requestId = crypto.randomUUID();
134 | this.stream.port.postMessage({
135 | event: interrupt ? 'interrupt' : 'offset',
136 | requestId,
137 | });
138 | let trackSampleOffset;
139 | while (!trackSampleOffset) {
140 | trackSampleOffset = this.trackSampleOffsets[requestId];
141 | await new Promise((r) => setTimeout(() => r(), 1));
142 | }
143 | const { trackId } = trackSampleOffset;
144 | if (interrupt && trackId) {
145 | this.interruptedTrackIds[trackId] = true;
146 | }
147 | return trackSampleOffset;
148 | }
149 |
150 | /**
151 | * Strips the current stream and returns the sample offset of the audio
152 | * @param {boolean} [interrupt]
153 | * @returns {{trackId: string|null, offset: number, currentTime: number}}
154 | */
155 | async interrupt() {
156 | return this.getTrackSampleOffset(true);
157 | }
158 | }
159 |
160 | globalThis.WavStreamPlayer = WavStreamPlayer;
161 |
--------------------------------------------------------------------------------
/src/lib/wavtools/lib/worklets/audio_processor.js:
--------------------------------------------------------------------------------
1 | const AudioProcessorWorklet = `
2 | class AudioProcessor extends AudioWorkletProcessor {
3 |
4 | constructor() {
5 | super();
6 | this.port.onmessage = this.receive.bind(this);
7 | this.initialize();
8 | }
9 |
10 | initialize() {
11 | this.foundAudio = false;
12 | this.recording = false;
13 | this.chunks = [];
14 | }
15 |
16 | /**
17 | * Concatenates sampled chunks into channels
18 | * Format is chunk[Left[], Right[]]
19 | */
20 | readChannelData(chunks, channel = -1, maxChannels = 9) {
21 | let channelLimit;
22 | if (channel !== -1) {
23 | if (chunks[0] && chunks[0].length - 1 < channel) {
24 | throw new Error(
25 | \`Channel \${channel} out of range: max \${chunks[0].length}\`
26 | );
27 | }
28 | channelLimit = channel + 1;
29 | } else {
30 | channel = 0;
31 | channelLimit = Math.min(chunks[0] ? chunks[0].length : 1, maxChannels);
32 | }
33 | const channels = [];
34 | for (let n = channel; n < channelLimit; n++) {
35 | const length = chunks.reduce((sum, chunk) => {
36 | return sum + chunk[n].length;
37 | }, 0);
38 | const buffers = chunks.map((chunk) => chunk[n]);
39 | const result = new Float32Array(length);
40 | let offset = 0;
41 | for (let i = 0; i < buffers.length; i++) {
42 | result.set(buffers[i], offset);
43 | offset += buffers[i].length;
44 | }
45 | channels[n] = result;
46 | }
47 | return channels;
48 | }
49 |
50 | /**
51 | * Combines parallel audio data into correct format,
52 | * channels[Left[], Right[]] to float32Array[LRLRLRLR...]
53 | */
54 | formatAudioData(channels) {
55 | if (channels.length === 1) {
56 | // Simple case is only one channel
57 | const float32Array = channels[0].slice();
58 | const meanValues = channels[0].slice();
59 | return { float32Array, meanValues };
60 | } else {
61 | const float32Array = new Float32Array(
62 | channels[0].length * channels.length
63 | );
64 | const meanValues = new Float32Array(channels[0].length);
65 | for (let i = 0; i < channels[0].length; i++) {
66 | const offset = i * channels.length;
67 | let meanValue = 0;
68 | for (let n = 0; n < channels.length; n++) {
69 | float32Array[offset + n] = channels[n][i];
70 | meanValue += channels[n][i];
71 | }
72 | meanValues[i] = meanValue / channels.length;
73 | }
74 | return { float32Array, meanValues };
75 | }
76 | }
77 |
78 | /**
79 | * Converts 32-bit float data to 16-bit integers
80 | */
81 | floatTo16BitPCM(float32Array) {
82 | const buffer = new ArrayBuffer(float32Array.length * 2);
83 | const view = new DataView(buffer);
84 | let offset = 0;
85 | for (let i = 0; i < float32Array.length; i++, offset += 2) {
86 | let s = Math.max(-1, Math.min(1, float32Array[i]));
87 | view.setInt16(offset, s < 0 ? s * 0x8000 : s * 0x7fff, true);
88 | }
89 | return buffer;
90 | }
91 |
92 | /**
93 | * Retrieves the most recent amplitude values from the audio stream
94 | * @param {number} channel
95 | */
96 | getValues(channel = -1) {
97 | const channels = this.readChannelData(this.chunks, channel);
98 | const { meanValues } = this.formatAudioData(channels);
99 | return { meanValues, channels };
100 | }
101 |
102 | /**
103 | * Exports chunks as an audio/wav file
104 | */
105 | export() {
106 | const channels = this.readChannelData(this.chunks);
107 | const { float32Array, meanValues } = this.formatAudioData(channels);
108 | const audioData = this.floatTo16BitPCM(float32Array);
109 | return {
110 | meanValues: meanValues,
111 | audio: {
112 | bitsPerSample: 16,
113 | channels: channels,
114 | data: audioData,
115 | },
116 | };
117 | }
118 |
119 | receive(e) {
120 | const { event, id } = e.data;
121 | let receiptData = {};
122 | switch (event) {
123 | case 'start':
124 | this.recording = true;
125 | break;
126 | case 'stop':
127 | this.recording = false;
128 | break;
129 | case 'clear':
130 | this.initialize();
131 | break;
132 | case 'export':
133 | receiptData = this.export();
134 | break;
135 | case 'read':
136 | receiptData = this.getValues();
137 | break;
138 | default:
139 | break;
140 | }
141 | // Always send back receipt
142 | this.port.postMessage({ event: 'receipt', id, data: receiptData });
143 | }
144 |
145 | sendChunk(chunk) {
146 | const channels = this.readChannelData([chunk]);
147 | const { float32Array, meanValues } = this.formatAudioData(channels);
148 | const rawAudioData = this.floatTo16BitPCM(float32Array);
149 | const monoAudioData = this.floatTo16BitPCM(meanValues);
150 | this.port.postMessage({
151 | event: 'chunk',
152 | data: {
153 | mono: monoAudioData,
154 | raw: rawAudioData,
155 | },
156 | });
157 | }
158 |
159 | process(inputList, outputList, parameters) {
160 | // Copy input to output (e.g. speakers)
161 | // Note that this creates choppy sounds with Mac products
162 | const sourceLimit = Math.min(inputList.length, outputList.length);
163 | for (let inputNum = 0; inputNum < sourceLimit; inputNum++) {
164 | const input = inputList[inputNum];
165 | const output = outputList[inputNum];
166 | const channelCount = Math.min(input.length, output.length);
167 | for (let channelNum = 0; channelNum < channelCount; channelNum++) {
168 | input[channelNum].forEach((sample, i) => {
169 | output[channelNum][i] = sample;
170 | });
171 | }
172 | }
173 | const inputs = inputList[0];
174 | // There's latency at the beginning of a stream before recording starts
175 | // Make sure we actually receive audio data before we start storing chunks
176 | let sliceIndex = 0;
177 | if (!this.foundAudio) {
178 | for (const channel of inputs) {
179 | sliceIndex = 0; // reset for each channel
180 | if (this.foundAudio) {
181 | break;
182 | }
183 | if (channel) {
184 | for (const value of channel) {
185 | if (value !== 0) {
186 | // find only one non-zero entry in any channel
187 | this.foundAudio = true;
188 | break;
189 | } else {
190 | sliceIndex++;
191 | }
192 | }
193 | }
194 | }
195 | }
196 | if (inputs && inputs[0] && this.foundAudio && this.recording) {
197 | // We need to copy the TypedArray, because the \`process\`
198 | // internals will reuse the same buffer to hold each input
199 | const chunk = inputs.map((input) => input.slice(sliceIndex));
200 | this.chunks.push(chunk);
201 | this.sendChunk(chunk);
202 | }
203 | return true;
204 | }
205 | }
206 |
207 | registerProcessor('audio_processor', AudioProcessor);
208 | `;
209 |
210 | const script = new Blob([AudioProcessorWorklet], {
211 | type: 'application/javascript',
212 | });
213 | const src = URL.createObjectURL(script);
214 | export const AudioProcessorSrc = src;
215 |
--------------------------------------------------------------------------------
/src/lib/wavtools/lib/worklets/stream_processor.js:
--------------------------------------------------------------------------------
1 | export const StreamProcessorWorklet = `
2 | class StreamProcessor extends AudioWorkletProcessor {
3 | constructor() {
4 | super();
5 | this.hasStarted = false;
6 | this.hasInterrupted = false;
7 | this.outputBuffers = [];
8 | this.bufferLength = 128;
9 | this.write = { buffer: new Float32Array(this.bufferLength), trackId: null };
10 | this.writeOffset = 0;
11 | this.trackSampleOffsets = {};
12 | this.port.onmessage = (event) => {
13 | if (event.data) {
14 | const payload = event.data;
15 | if (payload.event === 'write') {
16 | const int16Array = payload.buffer;
17 | const float32Array = new Float32Array(int16Array.length);
18 | for (let i = 0; i < int16Array.length; i++) {
19 | float32Array[i] = int16Array[i] / 0x8000; // Convert Int16 to Float32
20 | }
21 | this.writeData(float32Array, payload.trackId);
22 | } else if (
23 | payload.event === 'offset' ||
24 | payload.event === 'interrupt'
25 | ) {
26 | const requestId = payload.requestId;
27 | const trackId = this.write.trackId;
28 | const offset = this.trackSampleOffsets[trackId] || 0;
29 | this.port.postMessage({
30 | event: 'offset',
31 | requestId,
32 | trackId,
33 | offset,
34 | });
35 | if (payload.event === 'interrupt') {
36 | this.hasInterrupted = true;
37 | }
38 | } else {
39 | throw new Error(\`Unhandled event "\${payload.event}"\`);
40 | }
41 | }
42 | };
43 | }
44 |
45 | writeData(float32Array, trackId = null) {
46 | let { buffer } = this.write;
47 | let offset = this.writeOffset;
48 | for (let i = 0; i < float32Array.length; i++) {
49 | buffer[offset++] = float32Array[i];
50 | if (offset >= buffer.length) {
51 | this.outputBuffers.push(this.write);
52 | this.write = { buffer: new Float32Array(this.bufferLength), trackId };
53 | buffer = this.write.buffer;
54 | offset = 0;
55 | }
56 | }
57 | this.writeOffset = offset;
58 | return true;
59 | }
60 |
61 | process(inputs, outputs, parameters) {
62 | const output = outputs[0];
63 | const outputChannelData = output[0];
64 | const outputBuffers = this.outputBuffers;
65 | if (this.hasInterrupted) {
66 | this.port.postMessage({ event: 'stop' });
67 | return false;
68 | } else if (outputBuffers.length) {
69 | this.hasStarted = true;
70 | const { buffer, trackId } = outputBuffers.shift();
71 | for (let i = 0; i < outputChannelData.length; i++) {
72 | outputChannelData[i] = buffer[i] || 0;
73 | }
74 | if (trackId) {
75 | this.trackSampleOffsets[trackId] =
76 | this.trackSampleOffsets[trackId] || 0;
77 | this.trackSampleOffsets[trackId] += buffer.length;
78 | }
79 | return true;
80 | } else if (this.hasStarted) {
81 | this.port.postMessage({ event: 'stop' });
82 | return false;
83 | } else {
84 | return true;
85 | }
86 | }
87 | }
88 |
89 | registerProcessor('stream_processor', StreamProcessor);
90 | `;
91 |
92 | const script = new Blob([StreamProcessorWorklet], {
93 | type: 'application/javascript',
94 | });
95 | const src = URL.createObjectURL(script);
96 | export const StreamProcessorSrc = src;
97 |
--------------------------------------------------------------------------------
/src/logo.svg:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/src/pages/ConsolePage.scss:
--------------------------------------------------------------------------------
1 | [data-component='ConsolePage'] {
2 | font-family: 'Roboto Mono', monospace;
3 | font-weight: 400;
4 | font-style: normal;
5 | font-size: 12px;
6 | height: 100%;
7 | display: flex;
8 | flex-direction: column;
9 | overflow: hidden;
10 | margin: 0px 8px;
11 | & > div {
12 | flex-shrink: 0;
13 | }
14 |
15 | .modal {
16 | position: fixed;
17 | top: 0;
18 | left: 0;
19 | right: 0;
20 | bottom: 0;
21 | background: rgba(0, 0, 0, 0.5); // Semi-transparent background
22 | display: flex;
23 | align-items: center;
24 | justify-content: center;
25 | z-index: 1000; // Ensure the modal is on top of other elements
26 |
27 | .modal-content {
28 | background: #fff;
29 | padding: 24px;
30 | border-radius: 8px;
31 | width: 400px;
32 | max-width: 90%;
33 | box-shadow: 0 2px 10px rgba(0, 0, 0, 0.1);
34 |
35 | h2 {
36 | margin-top: 0;
37 | margin-bottom: 16px;
38 | font-size: 18px;
39 | text-align: center;
40 | }
41 |
42 | div {
43 | margin-bottom: 12px;
44 |
45 | label {
46 | display: block;
47 | margin-bottom: 4px;
48 | font-weight: bold;
49 | }
50 |
51 | input {
52 | width: 100%;
53 | padding: 8px;
54 | font-size: 14px;
55 | border: 1px solid #ccc;
56 | border-radius: 4px;
57 | }
58 |
59 | }
60 |
61 | .row
62 | {
63 | display: flex;
64 | flex-direction: row;
65 | align-items: center;
66 | justify-content: space-between;
67 | }
68 |
69 | button {
70 | width: 30%;
71 | padding: 10px;
72 | font-size: 16px;
73 | background-color: #0078d4;
74 | color: #fff;
75 | border: none;
76 | border-radius: 4px;
77 | cursor: pointer;
78 |
79 | &:last-child
80 | {
81 | // margin-left: 20px;
82 | background-color: #aaa;
83 | }
84 |
85 | &:hover {
86 | background-color: #005fa3;
87 | }
88 | }
89 | }
90 | }
91 |
92 | .spacer {
93 | flex-grow: 1;
94 | }
95 |
96 | .content-top {
97 | display: flex;
98 | align-items: center;
99 | padding: 8px 16px;
100 | min-height: 40px;
101 | .content-title {
102 | flex-grow: 1;
103 | display: flex;
104 | align-items: center;
105 | gap: 12px;
106 | img {
107 | width: 24px;
108 | height: 24px;
109 | }
110 | }
111 | }
112 |
113 | .content-main {
114 | flex-grow: 1;
115 | flex-shrink: 1 !important;
116 | margin: 0px 16px;
117 | display: flex;
118 | overflow: hidden;
119 | margin-bottom: 24px;
120 | .content-block {
121 | position: relative;
122 | display: flex;
123 | flex-direction: column;
124 | max-height: 100%;
125 | width: 100%;
126 | .content-block-title {
127 | flex-shrink: 0;
128 | padding-top: 16px;
129 | padding-bottom: 4px;
130 | position: relative;
131 | }
132 | .content-block-body {
133 | color: #6e6e7f;
134 | position: relative;
135 | flex-grow: 1;
136 | padding: 8px 0px;
137 | padding-top: 4px;
138 | line-height: 1.2em;
139 | overflow: auto;
140 | &.full {
141 | padding: 0px;
142 | }
143 | }
144 | }
145 | .content-right {
146 | width: 300px;
147 | flex-shrink: 0;
148 | display: flex;
149 | flex-direction: column;
150 | margin-left: 24px;
151 | gap: 24px;
152 | & > div {
153 | border-radius: 16px;
154 | flex-grow: 1;
155 | flex-shrink: 0;
156 | overflow: hidden;
157 | position: relative;
158 | .content-block-title {
159 | position: absolute;
160 | display: flex;
161 | align-items: center;
162 | justify-content: center;
163 | line-height: 2em;
164 | top: 16px;
165 | left: 16px;
166 | padding: 4px 16px;
167 | background-color: #fff;
168 | border-radius: 1000px;
169 | min-height: 32px;
170 | z-index: 9999;
171 | text-align: center;
172 | white-space: pre;
173 | &.bottom {
174 | top: auto;
175 | bottom: 16px;
176 | right: 16px;
177 | }
178 | }
179 | }
180 | & > div.kv {
181 | height: 250px;
182 | max-height: 250px;
183 | white-space: pre;
184 | background-color: #ececf1;
185 | .content-block-body {
186 | padding: 16px;
187 | margin-top: 56px;
188 | }
189 | }
190 | }
191 | .content-logs {
192 | flex-grow: 1;
193 | display: flex;
194 | flex-direction: column;
195 | overflow: hidden;
196 | & > div {
197 | flex-grow: 1;
198 | }
199 | & > .content-actions {
200 | flex-grow: 0;
201 | flex-shrink: 0;
202 | display: flex;
203 | align-items: center;
204 | justify-content: center;
205 | gap: 16px;
206 | }
207 | & > div.events {
208 | overflow: hidden;
209 | }
210 | .events {
211 | border-top: 1px solid #e7e7e7;
212 | }
213 | .conversation {
214 | display: flex;
215 | flex-shrink: 0;
216 | width: 100%;
217 | overflow: hidden;
218 | height: 200px;
219 | min-height: 0;
220 | max-height: 200px;
221 | border-top: 1px solid #e7e7e7;
222 | }
223 | }
224 | }
225 |
226 | .conversation-item {
227 | position: relative;
228 | display: flex;
229 | gap: 16px;
230 | margin-bottom: 16px;
231 | &:not(:hover) .close {
232 | display: none;
233 | }
234 | .close {
235 | position: absolute;
236 | top: 0px;
237 | right: -20px;
238 | background: #aaa;
239 | color: #fff;
240 | display: flex;
241 | border-radius: 16px;
242 | padding: 2px;
243 | cursor: pointer;
244 | &:hover {
245 | background: #696969;
246 | }
247 | svg {
248 | stroke-width: 3;
249 | width: 12px;
250 | height: 12px;
251 | }
252 | }
253 | .speaker {
254 | position: relative;
255 | text-align: left;
256 | gap: 16px;
257 | width: 80px;
258 | flex-shrink: 0;
259 | margin-right: 16px;
260 | &.user {
261 | color: #0099ff;
262 | }
263 | &.assistant {
264 | color: #009900;
265 | }
266 | }
267 | .speaker-content {
268 | color: #18181b;
269 | overflow: hidden;
270 | word-wrap: break-word;
271 | }
272 | }
273 |
274 | .event {
275 | border-radius: 3px;
276 | white-space: pre;
277 | display: flex;
278 | padding: 0px;
279 | gap: 16px;
280 | .event-timestamp {
281 | text-align: left;
282 | gap: 8px;
283 | padding: 4px 0px;
284 | width: 80px;
285 | flex-shrink: 0;
286 | margin-right: 16px;
287 | }
288 | .event-details {
289 | display: flex;
290 | flex-direction: column;
291 | color: #18181b;
292 | gap: 8px;
293 | .event-summary {
294 | padding: 4px 8px;
295 | margin: 0px -8px;
296 | &:hover {
297 | border-radius: 8px;
298 | background-color: #f0f0f0;
299 | }
300 | cursor: pointer;
301 | display: flex;
302 | gap: 8px;
303 | align-items: center;
304 | .event-source {
305 | flex-shrink: 0;
306 | display: flex;
307 | align-items: center;
308 | gap: 8px;
309 | &.client {
310 | color: #0099ff;
311 | }
312 | &.server {
313 | color: #009900;
314 | }
315 | &.error {
316 | color: #990000;
317 | }
318 | svg {
319 | stroke-width: 3;
320 | width: 12px;
321 | height: 12px;
322 | }
323 | }
324 | }
325 | }
326 | }
327 |
328 | .visualization {
329 | position: absolute;
330 | display: flex;
331 | bottom: 4px;
332 | right: 8px;
333 | padding: 4px;
334 | border-radius: 16px;
335 | z-index: 10;
336 | gap: 2px;
337 | .visualization-entry {
338 | position: relative;
339 | display: flex;
340 | align-items: center;
341 | height: 40px;
342 | width: 100px;
343 | gap: 4px;
344 | &.client {
345 | color: #0099ff;
346 | }
347 | &.server {
348 | color: #009900;
349 | }
350 | canvas {
351 | width: 100%;
352 | height: 100%;
353 | color: currentColor;
354 | }
355 | }
356 | }
357 | }
358 |
--------------------------------------------------------------------------------
/src/react-app-env.d.ts:
--------------------------------------------------------------------------------
1 | ///
2 |
--------------------------------------------------------------------------------
/src/reportWebVitals.ts:
--------------------------------------------------------------------------------
1 | import { ReportHandler } from 'web-vitals';
2 |
3 | const reportWebVitals = (onPerfEntry?: ReportHandler) => {
4 | if (onPerfEntry && onPerfEntry instanceof Function) {
5 | import('web-vitals').then(({ getCLS, getFID, getFCP, getLCP, getTTFB }) => {
6 | getCLS(onPerfEntry);
7 | getFID(onPerfEntry);
8 | getFCP(onPerfEntry);
9 | getLCP(onPerfEntry);
10 | getTTFB(onPerfEntry);
11 | });
12 | }
13 | };
14 |
15 | export default reportWebVitals;
16 |
--------------------------------------------------------------------------------
/src/setupTests.ts:
--------------------------------------------------------------------------------
1 | // jest-dom adds custom jest matchers for asserting on DOM nodes.
2 | // allows you to do things like:
3 | // expect(element).toHaveTextContent(/react/i)
4 | // learn more: https://github.com/testing-library/jest-dom
5 | import '@testing-library/jest-dom';
6 |
--------------------------------------------------------------------------------
/src/utils/conversation_config.js:
--------------------------------------------------------------------------------
1 | export const instructions = `System settings:
2 | Tool use: enabled.
3 |
4 | Instructions:
5 | - You are an artificial intelligence agent responsible for helping test realtime voice capabilities
6 | - Please make sure to respond with a helpful voice via audio
7 | - Be kind, helpful, and curteous
8 | - It is okay to ask the user questions
9 | - Use tools and functions you have available liberally, it is part of the training apparatus
10 | - Be open to exploration and conversation
11 | - Remember: this is just for fun and testing!
12 |
13 | Personality:
14 | - Be upbeat and genuine
15 | - Try speaking quickly as if excited
16 | `;
17 |
--------------------------------------------------------------------------------
/src/utils/wav_renderer.ts:
--------------------------------------------------------------------------------
1 | const dataMap = new WeakMap();
2 |
3 | /**
4 | * Normalizes a Float32Array to Array(m): We use this to draw amplitudes on a graph
5 | * If we're rendering the same audio data, then we'll often be using
6 | * the same (data, m, downsamplePeaks) triplets so we give option to memoize
7 | */
8 | const normalizeArray = (
9 | data: Float32Array,
10 | m: number,
11 | downsamplePeaks: boolean = false,
12 | memoize: boolean = false
13 | ) => {
14 | let cache, mKey, dKey;
15 | if (memoize) {
16 | mKey = m.toString();
17 | dKey = downsamplePeaks.toString();
18 | cache = dataMap.has(data) ? dataMap.get(data) : {};
19 | dataMap.set(data, cache);
20 | cache[mKey] = cache[mKey] || {};
21 | if (cache[mKey][dKey]) {
22 | return cache[mKey][dKey];
23 | }
24 | }
25 | const n = data.length;
26 | const result = new Array(m);
27 | if (m <= n) {
28 | // Downsampling
29 | result.fill(0);
30 | const count = new Array(m).fill(0);
31 | for (let i = 0; i < n; i++) {
32 | const index = Math.floor(i * (m / n));
33 | if (downsamplePeaks) {
34 | // take highest result in the set
35 | result[index] = Math.max(result[index], Math.abs(data[i]));
36 | } else {
37 | result[index] += Math.abs(data[i]);
38 | }
39 | count[index]++;
40 | }
41 | if (!downsamplePeaks) {
42 | for (let i = 0; i < result.length; i++) {
43 | result[i] = result[i] / count[i];
44 | }
45 | }
46 | } else {
47 | for (let i = 0; i < m; i++) {
48 | const index = (i * (n - 1)) / (m - 1);
49 | const low = Math.floor(index);
50 | const high = Math.ceil(index);
51 | const t = index - low;
52 | if (high >= n) {
53 | result[i] = data[n - 1];
54 | } else {
55 | result[i] = data[low] * (1 - t) + data[high] * t;
56 | }
57 | }
58 | }
59 | if (memoize) {
60 | cache[mKey as string][dKey as string] = result;
61 | }
62 | return result;
63 | };
64 |
65 | export const WavRenderer = {
66 | /**
67 | * Renders a point-in-time snapshot of an audio sample, usually frequency values
68 | * @param canvas
69 | * @param ctx
70 | * @param data
71 | * @param color
72 | * @param pointCount number of bars to render
73 | * @param barWidth width of bars in px
74 | * @param barSpacing spacing between bars in px
75 | * @param center vertically center the bars
76 | */
77 | drawBars: (
78 | canvas: HTMLCanvasElement,
79 | ctx: CanvasRenderingContext2D,
80 | data: Float32Array,
81 | color: string,
82 | pointCount: number = 0,
83 | barWidth: number = 0,
84 | barSpacing: number = 0,
85 | center: boolean = false
86 | ) => {
87 | pointCount = Math.floor(
88 | Math.min(
89 | pointCount,
90 | (canvas.width - barSpacing) / (Math.max(barWidth, 1) + barSpacing)
91 | )
92 | );
93 | if (!pointCount) {
94 | pointCount = Math.floor(
95 | (canvas.width - barSpacing) / (Math.max(barWidth, 1) + barSpacing)
96 | );
97 | }
98 | if (!barWidth) {
99 | barWidth = (canvas.width - barSpacing) / pointCount - barSpacing;
100 | }
101 | const points = normalizeArray(data, pointCount, true);
102 | for (let i = 0; i < pointCount; i++) {
103 | const amplitude = Math.abs(points[i]);
104 | const height = Math.max(1, amplitude * canvas.height);
105 | const x = barSpacing + i * (barWidth + barSpacing);
106 | const y = center ? (canvas.height - height) / 2 : canvas.height - height;
107 | ctx.fillStyle = color;
108 | ctx.fillRect(x, y, barWidth, height);
109 | }
110 | },
111 | };
112 |
--------------------------------------------------------------------------------
/tsconfig.json:
--------------------------------------------------------------------------------
1 | {
2 | "compilerOptions": {
3 | "target": "ES2020",
4 | "lib": ["dom", "dom.iterable", "esnext", "ES2020"],
5 | "allowJs": true,
6 | "skipLibCheck": true,
7 | "esModuleInterop": true,
8 | "allowSyntheticDefaultImports": true,
9 | "strict": true,
10 | "forceConsistentCasingInFileNames": true,
11 | "noFallthroughCasesInSwitch": true,
12 | "module": "esnext",
13 | "moduleResolution": "node",
14 | "resolveJsonModule": true,
15 | "isolatedModules": true,
16 | "noEmit": true,
17 | "jsx": "react-jsx"
18 | },
19 | "include": ["src", "src/lib"]
20 | }
21 |
--------------------------------------------------------------------------------