├── .editorconfig
├── .env.example
├── .eslintrc.json
├── .github
    ├── funding.yml
    └── workflows
    │   └── main.yml
├── .gitignore
├── .npmrc
├── .prettierignore
├── .prettierrc
├── examples
    ├── node
    │   ├── audio.ts
    │   ├── basic.ts
    │   ├── convo.ts
    │   ├── mic.d.ts
    │   ├── package.json
    │   └── relay-server.ts
    └── openai-realtime-console
    │   ├── .eslintrc.json
    │   ├── .gitignore
    │   ├── .prettierrc
    │   ├── LICENSE
    │   ├── README.md
    │   ├── package.json
    │   ├── public
    │       ├── index.html
    │       ├── openai-logomark.svg
    │       └── robots.txt
    │   ├── readme
    │       └── realtime-console-demo.png
    │   ├── src
    │       ├── App.scss
    │       ├── App.tsx
    │       ├── components
    │       │   ├── Map.scss
    │       │   ├── Map.tsx
    │       │   ├── button
    │       │   │   ├── Button.scss
    │       │   │   └── Button.tsx
    │       │   └── toggle
    │       │   │   ├── Toggle.scss
    │       │   │   └── Toggle.tsx
    │       ├── index.css
    │       ├── index.tsx
    │       ├── lib
    │       │   └── wavtools
    │       │   │   ├── index.js
    │       │   │   └── lib
    │       │   │       ├── analysis
    │       │   │           ├── audio_analysis.js
    │       │   │           └── constants.js
    │       │   │       ├── wav_packer.js
    │       │   │       ├── wav_recorder.js
    │       │   │       ├── wav_stream_player.js
    │       │   │       └── worklets
    │       │   │           ├── audio_processor.js
    │       │   │           └── stream_processor.js
    │       ├── logo.svg
    │       ├── pages
    │       │   ├── ConsolePage.scss
    │       │   └── ConsolePage.tsx
    │       ├── react-app-env.d.ts
    │       ├── reportWebVitals.ts
    │       ├── setupTests.ts
    │       └── utils
    │       │   ├── conversation_config.js
    │       │   └── wav_renderer.ts
    │   └── tsconfig.json
├── fixtures
    └── toronto.mp3
├── license
├── package.json
├── pnpm-lock.yaml
├── pnpm-workspace.yaml
├── readme.md
├── src
    ├── api.ts
    ├── client.test.ts
    ├── client.ts
    ├── conversation.ts
    ├── event-handler.ts
    ├── events.ts
    ├── index.ts
    ├── node
    │   ├── index.ts
    │   └── relay-server.ts
    ├── reset.d.ts
    ├── types.ts
    └── utils.ts
├── tsconfig.json
└── tsup.config.ts


/.editorconfig:
--------------------------------------------------------------------------------
 1 | root = true
 2 | 
 3 | [*]
 4 | indent_style = space
 5 | indent_size = 2
 6 | tab_width = 2
 7 | end_of_line = lf
 8 | charset = utf-8
 9 | trim_trailing_whitespace = true
10 | insert_final_newline = true
11 | 


--------------------------------------------------------------------------------
/.env.example:
--------------------------------------------------------------------------------
1 | OPENAI_API_KEY=
2 | 


--------------------------------------------------------------------------------
/.eslintrc.json:
--------------------------------------------------------------------------------
1 | {
2 |   "root": true,
3 |   "extends": ["@fisch0920/eslint-config/node"],
4 |   "rules": {
5 |     "unicorn/consistent-function-scoping": "off"
6 |   },
7 |   "ignorePatterns": ["examples/openai-realtime-console"]
8 | }
9 | 


--------------------------------------------------------------------------------
/.github/funding.yml:
--------------------------------------------------------------------------------
1 | github: [transitive-bullshit]
2 | 


--------------------------------------------------------------------------------
/.github/workflows/main.yml:
--------------------------------------------------------------------------------
 1 | name: CI
 2 | 
 3 | on: [push, pull_request]
 4 | 
 5 | jobs:
 6 |   test:
 7 |     name: Test Node.js ${{ matrix.node-version }}
 8 |     runs-on: ubuntu-latest
 9 |     strategy:
10 |       fail-fast: true
11 |       matrix:
12 |         node-version:
13 |           - 18
14 |           - 20
15 |           - 21
16 |           - 22
17 | 
18 |     steps:
19 |       - name: Checkout
20 |         uses: actions/checkout@v4
21 | 
22 |       - name: Install pnpm
23 |         uses: pnpm/action-setup@v3
24 |         id: pnpm-install
25 |         with:
26 |           version: 9.12.2
27 |           run_install: false
28 | 
29 |       - name: Install Node.js
30 |         uses: actions/setup-node@v4
31 |         with:
32 |           node-version: ${{ matrix.node-version }}
33 |           cache: 'pnpm'
34 | 
35 |       - name: Install libasound2-dev (for optional "speaker" dev dep)
36 |         run: sudo apt-get install -y libasound2-dev
37 | 
38 |       - name: Install dependencies
39 |         run: pnpm install --frozen-lockfile --strict-peer-dependencies
40 | 
41 |       - name: Run test
42 |         run: pnpm test
43 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # See https://help.github.com/articles/ignoring-files/ for more about ignoring files.
 2 | 
 3 | # dependencies
 4 | node_modules
 5 | /.pnp
 6 | .pnp.js
 7 | 
 8 | # testing
 9 | /coverage
10 | 
11 | # next.js
12 | .next/
13 | 
14 | # production
15 | build/
16 | dist/
17 | 
18 | # misc
19 | .DS_Store
20 | *.pem
21 | 
22 | # debug
23 | npm-debug.log*
24 | yarn-debug.log*
25 | yarn-error.log*
26 | .pnpm-debug.log*
27 | 
28 | # local env files
29 | .env*.local
30 | 
31 | # turbo
32 | .turbo
33 | 
34 | # vercel
35 | .vercel
36 | 
37 | # typescript
38 | *.tsbuildinfo
39 | next-env.d.ts
40 | 
41 | .env
42 | 
43 | old/
44 | out/
45 | 


--------------------------------------------------------------------------------
/.npmrc:
--------------------------------------------------------------------------------
1 | enable-pre-post-scripts=true
2 | package-manager-strict=false
3 | 


--------------------------------------------------------------------------------
/.prettierignore:
--------------------------------------------------------------------------------
1 | examples/openai-realtime-console
2 | 


--------------------------------------------------------------------------------
/.prettierrc:
--------------------------------------------------------------------------------
 1 | {
 2 |   "singleQuote": true,
 3 |   "jsxSingleQuote": true,
 4 |   "semi": false,
 5 |   "useTabs": false,
 6 |   "tabWidth": 2,
 7 |   "bracketSpacing": true,
 8 |   "bracketSameLine": false,
 9 |   "arrowParens": "always",
10 |   "trailingComma": "none"
11 | }
12 | 


--------------------------------------------------------------------------------
/examples/node/audio.ts:
--------------------------------------------------------------------------------
 1 | import 'dotenv/config'
 2 | 
 3 | import fs from 'node:fs/promises'
 4 | 
 5 | import decodeAudio from 'audio-decode'
 6 | import { arrayBufferToBase64, RealtimeClient } from 'openai-realtime-api'
 7 | 
 8 | /**
 9 |  * Simple Node.js demo using the `RealtimeClient` which sends a short audio
10 |  * message and waits for a complete response.
11 |  */
12 | async function main() {
13 |   const audioFile = await fs.readFile('./fixtures/toronto.mp3')
14 |   const audioBuffer = await decodeAudio(audioFile)
15 |   const channelData = audioBuffer.getChannelData(0) // only accepts mono
16 |   const audio = arrayBufferToBase64(channelData)
17 | 
18 |   const client = new RealtimeClient({
19 |     debug: false,
20 |     sessionConfig: {
21 |       instructions:
22 |         'Please follow the instructions of any query you receive.\n' +
23 |         'Be concise in your responses. Speak quickly and answer shortly.',
24 |       turn_detection: null
25 |     }
26 |   })
27 | 
28 |   await client.connect()
29 |   await client.waitForSessionCreated()
30 | 
31 |   console.log('Sending toronto.mp3 audio message...')
32 |   client.sendUserMessageContent([{ type: 'input_audio', audio }])
33 | 
34 |   const event = await client.realtime.waitForNext('response.done')
35 |   console.log(JSON.stringify(event, null, 2))
36 | 
37 |   client.disconnect()
38 | }
39 | 
40 | await main()
41 | 


--------------------------------------------------------------------------------
/examples/node/basic.ts:
--------------------------------------------------------------------------------
 1 | import 'dotenv/config'
 2 | 
 3 | import { RealtimeClient } from 'openai-realtime-api'
 4 | 
 5 | /**
 6 |  * Simple Node.js demo using the `RealtimeClient` which sends a text message and
 7 |  * waits for a complete response.
 8 |  */
 9 | async function main() {
10 |   const client = new RealtimeClient({
11 |     debug: false,
12 |     sessionConfig: {
13 |       instructions:
14 |         'Please follow the instructions of any query you receive.\n' +
15 |         'Be concise in your responses. Speak quickly and answer shortly.',
16 |       turn_detection: null
17 |     }
18 |   })
19 | 
20 |   await client.connect()
21 |   await client.waitForSessionCreated()
22 | 
23 |   const text = 'How are you?'
24 |   console.log(text)
25 |   client.sendUserMessageContent([{ type: 'input_text', text }])
26 | 
27 |   const event = await client.realtime.waitForNext('response.done')
28 |   console.log(JSON.stringify(event, null, 2))
29 | 
30 |   client.disconnect()
31 | }
32 | 
33 | await main()
34 | 


--------------------------------------------------------------------------------
/examples/node/convo.ts:
--------------------------------------------------------------------------------
  1 | import 'dotenv/config'
  2 | 
  3 | import { Readable } from 'node:stream'
  4 | 
  5 | import microphone from 'mic'
  6 | import { RealtimeClient } from 'openai-realtime-api'
  7 | import Speaker from 'speaker'
  8 | 
  9 | /**
 10 |  * Simple Node.js demo using the `RealtimeClient` with a microphone and speaker
 11 |  * to simulate a full, back & forth conversation from the terminal.
 12 |  */
 13 | async function main() {
 14 |   const client = new RealtimeClient({
 15 |     debug: false,
 16 |     sessionConfig: {
 17 |       instructions:
 18 |         'Please follow the instructions of any query you receive.\n' +
 19 |         'Be concise in your responses. Speak quickly and answer shortly.',
 20 |       turn_detection: null
 21 |     }
 22 |   })
 23 | 
 24 |   await client.connect()
 25 |   await client.waitForSessionCreated()
 26 | 
 27 |   let mic: microphone.Mic | undefined
 28 |   let speaker: Speaker | undefined
 29 |   startAudioStream()
 30 | 
 31 |   client.on('conversation.item.completed', ({ item }) => {
 32 |     const { formatted: _, ...rest } = item
 33 |     console.log('Conversation item completed:', rest)
 34 | 
 35 |     if (
 36 |       item.type === 'message' &&
 37 |       item.role === 'assistant' &&
 38 |       item.formatted &&
 39 |       item.formatted.audio
 40 |     ) {
 41 |       console.log(`Playing audio response... "${item.formatted.transcript}"`)
 42 |       playAudio(item.formatted.audio)
 43 |     }
 44 |   })
 45 | 
 46 |   function startAudioStream() {
 47 |     try {
 48 |       mic = microphone({
 49 |         rate: '24000',
 50 |         channels: '1',
 51 |         debug: false,
 52 |         exitOnSilence: 6,
 53 |         fileType: 'raw',
 54 |         encoding: 'signed-integer'
 55 |       })
 56 | 
 57 |       const micInputStream = mic!.getAudioStream()
 58 | 
 59 |       micInputStream.on('error', (error: any) => {
 60 |         console.error('Microphone error:', error)
 61 |       })
 62 | 
 63 |       mic!.start()
 64 |       console.log('Microphone started streaming.')
 65 | 
 66 |       let audioBuffer = Buffer.alloc(0)
 67 |       const chunkSize = 4800 // 0.2 seconds of audio at 24kHz
 68 | 
 69 |       micInputStream.on('data', (data: Buffer) => {
 70 |         audioBuffer = Buffer.concat([audioBuffer, data])
 71 | 
 72 |         while (audioBuffer.length >= chunkSize) {
 73 |           const chunk = audioBuffer.subarray(0, chunkSize)
 74 |           audioBuffer = audioBuffer.subarray(chunkSize)
 75 | 
 76 |           const int16Array = new Int16Array(
 77 |             chunk.buffer,
 78 |             chunk.byteOffset,
 79 |             chunk.length / 2
 80 |           )
 81 | 
 82 |           try {
 83 |             client.appendInputAudio(int16Array)
 84 |           } catch (err) {
 85 |             console.error('Error sending audio data:', err)
 86 |           }
 87 |         }
 88 |       })
 89 | 
 90 |       micInputStream.on('silence', () => {
 91 |         console.log('Silence detected, creating response...')
 92 |         try {
 93 |           client.createResponse()
 94 |         } catch (err) {
 95 |           console.error('Error creating response:', err)
 96 |         }
 97 |       })
 98 |     } catch (err) {
 99 |       console.error('Error starting audio stream:', err)
100 |     }
101 |   }
102 | 
103 |   function playAudio(audioData: Int16Array) {
104 |     try {
105 |       if (!speaker) {
106 |         speaker = new Speaker({
107 |           channels: 1,
108 |           bitDepth: 16,
109 |           sampleRate: client.conversation.frequency
110 |         })
111 |       }
112 | 
113 |       const origSpeaker = speaker
114 | 
115 |       const buffer = Buffer.from(audioData.buffer)
116 |       const readableStream = new Readable({
117 |         read() {
118 |           if (speaker !== origSpeaker) return
119 |           this.push(buffer)
120 |           this.push(null)
121 |         }
122 |       })
123 | 
124 |       // Pipe the audio stream to the speaker
125 |       readableStream.pipe(speaker)
126 |       console.log(
127 |         'Audio sent to speaker for playback. Buffer length:',
128 |         buffer.length
129 |       )
130 | 
131 |       speaker.on('close', () => {
132 |         speaker = undefined
133 |       })
134 |     } catch (err) {
135 |       console.error('Error playing audio:', err)
136 |     }
137 |   }
138 | }
139 | 
140 | await main()
141 | 


--------------------------------------------------------------------------------
/examples/node/mic.d.ts:
--------------------------------------------------------------------------------
 1 | declare module 'mic' {
 2 |   import type { Transform } from 'node:stream'
 3 | 
 4 |   export function mic(options: Options): Mic
 5 | 
 6 |   export interface Mic {
 7 |     start(): void
 8 |     stop(): void
 9 |     pause(): void
10 |     resume(): void
11 |     getAudioStream(): Transform
12 |   }
13 | 
14 |   export interface Options {
15 |     endian?: 'big' | 'little'
16 |     bitwidth?: number | string
17 |     encoding?: 'signed-integer' | 'unsigned-integer'
18 |     rate?: number | string
19 |     channels?: number | string
20 |     device?: string
21 |     exitOnSilence?: number | string
22 |     debug?: boolean | string
23 |     fileType?: string
24 |   }
25 | 
26 |   export = mic
27 | }
28 | 


--------------------------------------------------------------------------------
/examples/node/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "openai-realtime-api-examples-node",
 3 |   "version": "0.1.0",
 4 |   "private": true,
 5 |   "author": "Travis Fischer <travis@transitivebullsh.it>",
 6 |   "license": "MIT",
 7 |   "repository": {
 8 |     "type": "git",
 9 |     "url": "git+https://github.com/transitive-bullshit/openai-realtime-api.git"
10 |   },
11 |   "type": "module",
12 |   "dependencies": {
13 |     "openai-realtime-api": "workspace:*",
14 |     "audio-decode": "^2.2.2",
15 |     "dotenv": "^16.4.5"
16 |   },
17 |   "optionalDependencies": {
18 |     "mic": "^2.1.2",
19 |     "speaker": "^0.5.5"
20 |   }
21 | }
22 | 


--------------------------------------------------------------------------------
/examples/node/relay-server.ts:
--------------------------------------------------------------------------------
 1 | import 'dotenv/config'
 2 | 
 3 | import { RealtimeClient } from 'openai-realtime-api'
 4 | import { RealtimeRelay } from 'openai-realtime-api/node'
 5 | 
 6 | /**
 7 |  * Simple Node.js demo showing how to run the relay server.
 8 |  */
 9 | async function main() {
10 |   const client = new RealtimeClient({
11 |     debug: false,
12 |     relay: true,
13 |     sessionConfig: {
14 |       instructions:
15 |         'Please follow the instructions of any query you receive.\n' +
16 |         'Be concise in your responses. Speak quickly and answer shortly.',
17 |       turn_detection: null
18 |     }
19 |   })
20 | 
21 |   const relay = new RealtimeRelay({ client })
22 |   relay.listen()
23 | }
24 | 
25 | await main()
26 | 


--------------------------------------------------------------------------------
/examples/openai-realtime-console/.eslintrc.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "root": true,
 3 |   "parserOptions": {
 4 |     "sourceType": "module"
 5 |   },
 6 |   "env": {
 7 |     "es2022": true
 8 |   }
 9 | }
10 | 


--------------------------------------------------------------------------------
/examples/openai-realtime-console/.gitignore:
--------------------------------------------------------------------------------
 1 | # See https://help.github.com/articles/ignoring-files/ for more about ignoring files.
 2 | 
 3 | # dependencies
 4 | /node_modules
 5 | /.pnp
 6 | .pnp.js
 7 | 
 8 | # testing
 9 | /coverage
10 | 
11 | # production
12 | /build
13 | 
14 | # packaging
15 | *.zip
16 | *.tar.gz
17 | *.tar
18 | *.tgz
19 | *.bla
20 | 
21 | # misc
22 | .DS_Store
23 | .env
24 | .env.local
25 | .env.development.local
26 | .env.test.local
27 | .env.production.local
28 | 
29 | npm-debug.log*
30 | yarn-debug.log*
31 | yarn-error.log*
32 | 


--------------------------------------------------------------------------------
/examples/openai-realtime-console/.prettierrc:
--------------------------------------------------------------------------------
1 | {
2 |   "tabWidth": 2,
3 |   "useTabs": false,
4 |   "singleQuote": true
5 | }
6 | 


--------------------------------------------------------------------------------
/examples/openai-realtime-console/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2024 OpenAI
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/examples/openai-realtime-console/README.md:
--------------------------------------------------------------------------------
  1 | > [!IMPORTANT]
  2 | > This example has been imported from https://github.com/openai/openai-realtime-console ([at commit 6ea4dba](https://github.com/openai/openai-realtime-console/tree/6ea4dba795fee868c60ea9e8e7eba7469974b3e9)). The only change has been to replace `@openai/realtime-api-beta` with `openai-realtime-api` and to fix a few types.
  3 | 
  4 | # OpenAI Realtime Console
  5 | 
  6 | The OpenAI Realtime Console is intended as an inspector and interactive API reference
  7 | for the OpenAI Realtime API. It comes packaged with two utility libraries,
  8 | [openai/openai-realtime-api-beta](https://github.com/openai/openai-realtime-api-beta)
  9 | that acts as a **Reference Client** (for browser and Node.js) and
 10 | [`/src/lib/wavtools`](./src/lib/wavtools) which allows for simple audio
 11 | management in the browser.
 12 | 
 13 | <img src="/examples/openai-realtime-console/readme/realtime-console-demo.png" width="800" />
 14 | 
 15 | # Starting the console
 16 | 
 17 | This is a React project created using `create-react-app` that is bundled via Webpack.
 18 | Install it by extracting the contents of this package and using;
 19 | 
 20 | ```shell
 21 | $ npm i
 22 | ```
 23 | 
 24 | Start your server with:
 25 | 
 26 | ```shell
 27 | $ npm start
 28 | ```
 29 | 
 30 | It should be available via `localhost:3000`.
 31 | 
 32 | # Table of contents
 33 | 
 34 | - [OpenAI Realtime Console](#openai-realtime-console)
 35 | - [Starting the console](#starting-the-console)
 36 | - [Table of contents](#table-of-contents)
 37 | - [Using the console](#using-the-console)
 38 |   - [Using a relay server](#using-a-relay-server)
 39 | - [Realtime API reference client](#realtime-api-reference-client)
 40 |   - [Sending streaming audio](#sending-streaming-audio)
 41 |   - [Adding and using tools](#adding-and-using-tools)
 42 |   - [Interrupting the model](#interrupting-the-model)
 43 |   - [Reference client events](#reference-client-events)
 44 | - [Wavtools](#wavtools)
 45 |   - [WavRecorder Quickstart](#wavrecorder-quickstart)
 46 |   - [WavStreamPlayer Quickstart](#wavstreamplayer-quickstart)
 47 | - [Acknowledgements and contact](#acknowledgements-and-contact)
 48 | 
 49 | # Using the console
 50 | 
 51 | The console requires an OpenAI API key (**user key** or **project key**) that has access to the
 52 | Realtime API. You'll be prompted on startup to enter it. It will be saved via `localStorage` and can be
 53 | changed at any time from the UI.
 54 | 
 55 | To start a session you'll need to **connect**. This will require microphone access.
 56 | You can then choose between **manual** (Push-to-talk) and **vad** (Voice Activity Detection)
 57 | conversation modes, and switch between them at any time.
 58 | 
 59 | There are two functions enabled;
 60 | 
 61 | - `get_weather`: Ask for the weather anywhere and the model will do its best to pinpoint the
 62 |   location, show it on a map, and get the weather for that location. Note that it doesn't
 63 |   have location access, and coordinates are "guessed" from the model's training data so
 64 |   accuracy might not be perfect.
 65 | - `set_memory`: You can ask the model to remember information for you, and it will store it in
 66 |   a JSON blob on the left.
 67 | 
 68 | You can freely interrupt the model at any time in push-to-talk or VAD mode.
 69 | 
 70 | ## Using a relay server
 71 | 
 72 | If you would like to build a more robust implementation and play around with the reference
 73 | client using your own server, we have included a Node.js [Relay Server](/relay-server/index.js).
 74 | 
 75 | ```shell
 76 | $ npm run relay
 77 | ```
 78 | 
 79 | It will start automatically on `localhost:8081`.
 80 | 
 81 | **You will need to create a `.env` file** with the following configuration:
 82 | 
 83 | ```conf
 84 | OPENAI_API_KEY=YOUR_API_KEY
 85 | REACT_APP_LOCAL_RELAY_SERVER_URL=http://localhost:8081
 86 | ```
 87 | 
 88 | You will need to restart both your React app and relay server for the `.env.` changes
 89 | to take effect. The local server URL is loaded via [`ConsolePage.tsx`](/src/pages/ConsolePage.tsx).
 90 | To stop using the relay server at any time, simply delete the environment
 91 | variable or set it to empty string.
 92 | 
 93 | ```javascript
 94 | /**
 95 |  * Running a local relay server will allow you to hide your API key
 96 |  * and run custom logic on the server
 97 |  *
 98 |  * Set the local relay server address to:
 99 |  * REACT_APP_LOCAL_RELAY_SERVER_URL=http://localhost:8081
100 |  *
101 |  * This will also require you to set OPENAI_API_KEY= in a `.env` file
102 |  * You can run it with `npm run relay`, in parallel with `npm start`
103 |  */
104 | const LOCAL_RELAY_SERVER_URL: string =
105 |   process.env.REACT_APP_LOCAL_RELAY_SERVER_URL || '';
106 | ```
107 | 
108 | This server is **only a simple message relay**, but it can be extended to:
109 | 
110 | - Hide API credentials if you would like to ship an app to play with online
111 | - Handle certain calls you would like to keep secret (e.g. `instructions`) on
112 |   the server directly
113 | - Restrict what types of events the client can receive and send
114 | 
115 | You will have to implement these features yourself.
116 | 
117 | # Realtime API reference client
118 | 
119 | The latest reference client and documentation are available on GitHub at
120 | [openai/openai-realtime-api-beta](https://github.com/openai/openai-realtime-api-beta).
121 | 
122 | You can use this client yourself in any React (front-end) or Node.js project.
123 | For full documentation, refer to the GitHub repository, but you can use the
124 | guide here as a primer to get started.
125 | 
126 | ```javascript
127 | import { RealtimeClient } from '/src/lib/realtime-api-beta/index.js';
128 | 
129 | const client = new RealtimeClient({ apiKey: process.env.OPENAI_API_KEY });
130 | 
131 | // Can set parameters ahead of connecting
132 | client.updateSession({ instructions: 'You are a great, upbeat friend.' });
133 | client.updateSession({ voice: 'alloy' });
134 | client.updateSession({ turn_detection: 'server_vad' });
135 | client.updateSession({ input_audio_transcription: { model: 'whisper-1' } });
136 | 
137 | // Set up event handling
138 | client.on('conversation.updated', ({ item, delta }) => {
139 |   const items = client.conversation.getItems(); // can use this to render all items
140 |   /* includes all changes to conversations, delta may be populated */
141 | });
142 | 
143 | // Connect to Realtime API
144 | await client.connect();
145 | 
146 | // Send an item and triggers a generation
147 | client.sendUserMessageContent([{ type: 'text', text: `How are you?` }]);
148 | ```
149 | 
150 | ## Sending streaming audio
151 | 
152 | To send streaming audio, use the `.appendInputAudio()` method. If you're in `turn_detection: 'disabled'` mode,
153 | then you need to use `.generate()` to tell the model to respond.
154 | 
155 | ```javascript
156 | // Send user audio, must be Int16Array or ArrayBuffer
157 | // Default audio format is pcm16 with sample rate of 24,000 Hz
158 | // This populates 1s of noise in 0.1s chunks
159 | for (let i = 0; i < 10; i++) {
160 |   const data = new Int16Array(2400);
161 |   for (let n = 0; n < 2400; n++) {
162 |     const value = Math.floor((Math.random() * 2 - 1) * 0x8000);
163 |     data[n] = value;
164 |   }
165 |   client.appendInputAudio(data);
166 | }
167 | // Pending audio is committed and model is asked to generate
168 | client.createResponse();
169 | ```
170 | 
171 | ## Adding and using tools
172 | 
173 | Working with tools is easy. Just call `.addTool()` and set a callback as the second parameter.
174 | The callback will be executed with the parameters for the tool, and the result will be automatically
175 | sent back to the model.
176 | 
177 | ```javascript
178 | // We can add tools as well, with callbacks specified
179 | client.addTool(
180 |   {
181 |     name: 'get_weather',
182 |     description:
183 |       'Retrieves the weather for a given lat, lng coordinate pair. Specify a label for the location.',
184 |     parameters: {
185 |       type: 'object',
186 |       properties: {
187 |         lat: {
188 |           type: 'number',
189 |           description: 'Latitude',
190 |         },
191 |         lng: {
192 |           type: 'number',
193 |           description: 'Longitude',
194 |         },
195 |         location: {
196 |           type: 'string',
197 |           description: 'Name of the location',
198 |         },
199 |       },
200 |       required: ['lat', 'lng', 'location'],
201 |     },
202 |   },
203 |   async ({ lat, lng, location }) => {
204 |     const result = await fetch(
205 |       `https://api.open-meteo.com/v1/forecast?latitude=${lat}&longitude=${lng}&current=temperature_2m,wind_speed_10m`,
206 |     );
207 |     const json = await result.json();
208 |     return json;
209 |   },
210 | );
211 | ```
212 | 
213 | ## Interrupting the model
214 | 
215 | You may want to manually interrupt the model, especially in `turn_detection: 'disabled'` mode.
216 | To do this, we can use:
217 | 
218 | ```javascript
219 | // id is the id of the item currently being generated
220 | // sampleCount is the number of audio samples that have been heard by the listener
221 | client.cancelResponse(id, sampleCount);
222 | ```
223 | 
224 | This method will cause the model to immediately cease generation, but also truncate the
225 | item being played by removing all audio after `sampleCount` and clearing the text
226 | response. By using this method you can interrupt the model and prevent it from "remembering"
227 | anything it has generated that is ahead of where the user's state is.
228 | 
229 | ## Reference client events
230 | 
231 | There are five main client events for application control flow in `RealtimeClient`.
232 | Note that this is only an overview of using the client, the full Realtime API
233 | event specification is considerably larger, if you need more control check out the GitHub repository:
234 | [openai/openai-realtime-api-beta](https://github.com/openai/openai-realtime-api-beta).
235 | 
236 | ```javascript
237 | // errors like connection failures
238 | client.on('error', (event) => {
239 |   // do thing
240 | });
241 | 
242 | // in VAD mode, the user starts speaking
243 | // we can use this to stop audio playback of a previous response if necessary
244 | client.on('conversation.interrupted', () => {
245 |   /* do something */
246 | });
247 | 
248 | // includes all changes to conversations
249 | // delta may be populated
250 | client.on('conversation.updated', ({ item, delta }) => {
251 |   // get all items, e.g. if you need to update a chat window
252 |   const items = client.conversation.getItems();
253 |   switch (item.type) {
254 |     case 'message':
255 |       // system, user, or assistant message (item.role)
256 |       break;
257 |     case 'function_call':
258 |       // always a function call from the model
259 |       break;
260 |     case 'function_call_output':
261 |       // always a response from the user / application
262 |       break;
263 |   }
264 |   if (delta) {
265 |     // Only one of the following will be populated for any given event
266 |     // delta.audio = Int16Array, audio added
267 |     // delta.transcript = string, transcript added
268 |     // delta.arguments = string, function arguments added
269 |   }
270 | });
271 | 
272 | // only triggered after item added to conversation
273 | client.on('conversation.item.appended', ({ item }) => {
274 |   /* item status can be 'in_progress' or 'completed' */
275 | });
276 | 
277 | // only triggered after item completed in conversation
278 | // will always be triggered after conversation.item.appended
279 | client.on('conversation.item.completed', ({ item }) => {
280 |   /* item status will always be 'completed' */
281 | });
282 | ```
283 | 
284 | # Wavtools
285 | 
286 | Wavtools contains easy management of PCM16 audio streams in the browser, both
287 | recording and playing.
288 | 
289 | ## WavRecorder Quickstart
290 | 
291 | ```javascript
292 | import { WavRecorder } from '/src/lib/wavtools/index.js';
293 | 
294 | const wavRecorder = new WavRecorder({ sampleRate: 24000 });
295 | wavRecorder.getStatus(); // "ended"
296 | 
297 | // request permissions, connect microphone
298 | await wavRecorder.begin();
299 | wavRecorder.getStatus(); // "paused"
300 | 
301 | // Start recording
302 | // This callback will be triggered in chunks of 8192 samples by default
303 | // { mono, raw } are Int16Array (PCM16) mono & full channel data
304 | await wavRecorder.record((data) => {
305 |   const { mono, raw } = data;
306 | });
307 | wavRecorder.getStatus(); // "recording"
308 | 
309 | // Stop recording
310 | await wavRecorder.pause();
311 | wavRecorder.getStatus(); // "paused"
312 | 
313 | // outputs "audio/wav" audio file
314 | const audio = await wavRecorder.save();
315 | 
316 | // clears current audio buffer and starts recording
317 | await wavRecorder.clear();
318 | await wavRecorder.record();
319 | 
320 | // get data for visualization
321 | const frequencyData = wavRecorder.getFrequencies();
322 | 
323 | // Stop recording, disconnects microphone, output file
324 | await wavRecorder.pause();
325 | const finalAudio = await wavRecorder.end();
326 | 
327 | // Listen for device change; e.g. if somebody disconnects a microphone
328 | // deviceList is array of MediaDeviceInfo[] + `default` property
329 | wavRecorder.listenForDeviceChange((deviceList) => {});
330 | ```
331 | 
332 | ## WavStreamPlayer Quickstart
333 | 
334 | ```javascript
335 | import { WavStreamPlayer } from '/src/lib/wavtools/index.js';
336 | 
337 | const wavStreamPlayer = new WavStreamPlayer({ sampleRate: 24000 });
338 | 
339 | // Connect to audio output
340 | await wavStreamPlayer.connect();
341 | 
342 | // Create 1s of empty PCM16 audio
343 | const audio = new Int16Array(24000);
344 | // Queue 3s of audio, will start playing immediately
345 | wavStreamPlayer.add16BitPCM(audio, 'my-track');
346 | wavStreamPlayer.add16BitPCM(audio, 'my-track');
347 | wavStreamPlayer.add16BitPCM(audio, 'my-track');
348 | 
349 | // get data for visualization
350 | const frequencyData = wavStreamPlayer.getFrequencies();
351 | 
352 | // Interrupt the audio (halt playback) at any time
353 | // To restart, need to call .add16BitPCM() again
354 | const trackOffset = await wavStreamPlayer.interrupt();
355 | trackOffset.trackId; // "my-track"
356 | trackOffset.offset; // sample number
357 | trackOffset.currentTime; // time in track
358 | ```
359 | 
360 | # Acknowledgements and contact
361 | 
362 | Thanks for checking out the Realtime Console. We hope you have fun with the Realtime API.
363 | Special thanks to the whole Realtime API team for making this possible. Please feel free
364 | to reach out, ask questions, or give feedback by creating an issue on the repository.
365 | You can also reach out and let us know what you think directly!
366 | 
367 | - OpenAI Developers / [@OpenAIDevs](https://x.com/OpenAIDevs)
368 | - Jordan Sitkin / API / [@dustmason](https://x.com/dustmason)
369 | - Mark Hudnall / API / [@landakram](https://x.com/landakram)
370 | - Peter Bakkum / API / [@pbbakkum](https://x.com/pbbakkum)
371 | - Atty Eleti / API / [@athyuttamre](https://x.com/athyuttamre)
372 | - Jason Clark / API / [@onebitToo](https://x.com/onebitToo)
373 | - Karolis Kosas / Design / [@karoliskosas](https://x.com/karoliskosas)
374 | - Keith Horwood / API + DX / [@keithwhor](https://x.com/keithwhor)
375 | - Romain Huet / DX / [@romainhuet](https://x.com/romainhuet)
376 | - Katia Gil Guzman / DX / [@kagigz](https://x.com/kagigz)
377 | - Ilan Bigio / DX / [@ilanbigio](https://x.com/ilanbigio)
378 | - Kevin Whinnery / DX / [@kevinwhinnery](https://x.com/kevinwhinnery)
379 | 


--------------------------------------------------------------------------------
/examples/openai-realtime-console/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "openai-realtime-console",
 3 |   "version": "0.0.0",
 4 |   "type": "module",
 5 |   "private": true,
 6 |   "dependencies": {
 7 |     "@openai/realtime-api-beta": "github:openai/openai-realtime-api-beta",
 8 |     "@testing-library/jest-dom": "^5.17.0",
 9 |     "@testing-library/react": "^13.4.0",
10 |     "@testing-library/user-event": "^13.5.0",
11 |     "@types/jest": "^27.5.2",
12 |     "@types/leaflet": "^1.9.12",
13 |     "@types/node": "^16.18.108",
14 |     "@types/react": "^18.3.5",
15 |     "@types/react-dom": "^18.3.0",
16 |     "dotenv": "^16.4.5",
17 |     "leaflet": "^1.9.4",
18 |     "openai-realtime-api": "workspace:*",
19 |     "react": "^18.3.1",
20 |     "react-dom": "^18.3.1",
21 |     "react-feather": "^2.0.10",
22 |     "react-leaflet": "^4.2.1",
23 |     "react-scripts": "^5.0.1",
24 |     "sass": "^1.78.0",
25 |     "save": "^2.9.0",
26 |     "typescript": "^4.9.5",
27 |     "web-vitals": "^2.1.4",
28 |     "ws": "^8.18.0"
29 |   },
30 |   "scripts": {
31 |     "start": "react-scripts start",
32 |     "build": "react-scripts build",
33 |     "test": "react-scripts test",
34 |     "eject": "react-scripts eject",
35 |     "zip": "zip -r realtime-api-console.zip . -x 'node_modules' 'node_modules/*' 'node_modules/**' '.git' '.git/*' '.git/**' '.DS_Store' '*/.DS_Store' 'package-lock.json' '*.zip' '*.tar.gz' '*.tar' '.env'",
36 |     "relay": "nodemon ./relay-server/index.js"
37 |   },
38 |   "eslintConfig": {
39 |     "root": true,
40 |     "extends": [
41 |       "react-app",
42 |       "react-app/jest"
43 |     ]
44 |   },
45 |   "browserslist": {
46 |     "production": [
47 |       ">0.2%",
48 |       "not dead",
49 |       "not op_mini all"
50 |     ],
51 |     "development": [
52 |       "last 1 chrome version",
53 |       "last 1 firefox version",
54 |       "last 1 safari version"
55 |     ]
56 |   },
57 |   "devDependencies": {
58 |     "@babel/plugin-proposal-private-property-in-object": "^7.21.11",
59 |     "nodemon": "^3.1.7"
60 |   }
61 | }
62 | 


--------------------------------------------------------------------------------
/examples/openai-realtime-console/public/index.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html lang="en">
 3 |   <head>
 4 |     <meta charset="utf-8" />
 5 |     <link rel="icon" href="%PUBLIC_URL%/openai-logomark.svg" />
 6 |     <meta name="viewport" content="width=device-width, initial-scale=1" />
 7 |     <title>realtime console</title>
 8 |     <!-- Fonts -->
 9 |     <link
10 |       href="https://fonts.googleapis.com/css2?family=Roboto+Mono:ital,wght@0,100..700;1,100..700&display=swap"
11 |       rel="stylesheet"
12 |     />
13 |     <!-- Leaflet / OpenStreetMap -->
14 |     <link
15 |       rel="stylesheet"
16 |       href="https://unpkg.com/leaflet@1.6.0/dist/leaflet.css"
17 |       integrity="sha512-xwE/Az9zrjBIphAcBb3F6JVqxf46+CDLwfLMHloNu6KEQCAWi6HcDUbeOfBIptF7tcCzusKFjFw2yuvEpDL9wQ=="
18 |       crossorigin=""
19 |     />
20 |     <script
21 |       src="https://unpkg.com/leaflet@1.6.0/dist/leaflet.js"
22 |       integrity="sha512-gZwIG9x3wUXg2hdXF6+rVkLF/0Vi9U8D2Ntg4Ga5I5BZpVkVxlJWbSQtXPSiUTtC0TjtGOmxa1AJPuV0CPthew=="
23 |       crossorigin=""
24 |     ></script>
25 |   </head>
26 |   <body>
27 |     <noscript>You need to enable JavaScript to run this app.</noscript>
28 |     <div id="root"></div>
29 |     <!--
30 |       This HTML file is a template.
31 |       If you open it directly in the browser, you will see an empty page.
32 | 
33 |       You can add webfonts, meta tags, or analytics to this file.
34 |       The build step will place the bundled scripts into the <body> tag.
35 | 
36 |       To begin the development, run `npm start` or `yarn start`.
37 |       To create a production bundle, use `npm run build` or `yarn build`.
38 |     -->
39 |   </body>
40 | </html>
41 | 


--------------------------------------------------------------------------------
/examples/openai-realtime-console/public/openai-logomark.svg:
--------------------------------------------------------------------------------
1 | <svg viewBox="0 0 320 320" xmlns="http://www.w3.org/2000/svg"><path d="m297.06 130.97c7.26-21.79 4.76-45.66-6.85-65.48-17.46-30.4-52.56-46.04-86.84-38.68-15.25-17.18-37.16-26.95-60.13-26.81-35.04-.08-66.13 22.48-76.91 55.82-22.51 4.61-41.94 18.7-53.31 38.67-17.59 30.32-13.58 68.54 9.92 94.54-7.26 21.79-4.76 45.66 6.85 65.48 17.46 30.4 52.56 46.04 86.84 38.68 15.24 17.18 37.16 26.95 60.13 26.8 35.06.09 66.16-22.49 76.94-55.86 22.51-4.61 41.94-18.7 53.31-38.67 17.57-30.32 13.55-68.51-9.94-94.51zm-120.28 168.11c-14.03.02-27.62-4.89-38.39-13.88.49-.26 1.34-.73 1.89-1.07l63.72-36.8c3.26-1.85 5.26-5.32 5.24-9.07v-89.83l26.93 15.55c.29.14.48.42.52.74v74.39c-.04 33.08-26.83 59.9-59.91 59.97zm-128.84-55.03c-7.03-12.14-9.56-26.37-7.15-40.18.47.28 1.3.79 1.89 1.13l63.72 36.8c3.23 1.89 7.23 1.89 10.47 0l77.79-44.92v31.1c.02.32-.13.63-.38.83l-64.41 37.19c-28.69 16.52-65.33 6.7-81.92-21.95zm-16.77-139.09c7-12.16 18.05-21.46 31.21-26.29 0 .55-.03 1.52-.03 2.2v73.61c-.02 3.74 1.98 7.21 5.23 9.06l77.79 44.91-26.93 15.55c-.27.18-.61.21-.91.08l-64.42-37.22c-28.63-16.58-38.45-53.21-21.95-81.89zm221.26 51.49-77.79-44.92 26.93-15.54c.27-.18.61-.21.91-.08l64.42 37.19c28.68 16.57 38.51 53.26 21.94 81.94-7.01 12.14-18.05 21.44-31.2 26.28v-75.81c.03-3.74-1.96-7.2-5.2-9.06zm26.8-40.34c-.47-.29-1.3-.79-1.89-1.13l-63.72-36.8c-3.23-1.89-7.23-1.89-10.47 0l-77.79 44.92v-31.1c-.02-.32.13-.63.38-.83l64.41-37.16c28.69-16.55 65.37-6.7 81.91 22 6.99 12.12 9.52 26.31 7.15 40.1zm-168.51 55.43-26.94-15.55c-.29-.14-.48-.42-.52-.74v-74.39c.02-33.12 26.89-59.96 60.01-59.94 14.01 0 27.57 4.92 38.34 13.88-.49.26-1.33.73-1.89 1.07l-63.72 36.8c-3.26 1.85-5.26 5.31-5.24 9.06l-.04 89.79zm14.63-31.54 34.65-20.01 34.65 20v40.01l-34.65 20-34.65-20z"/></svg>


--------------------------------------------------------------------------------
/examples/openai-realtime-console/public/robots.txt:
--------------------------------------------------------------------------------
1 | # https://www.robotstxt.org/robotstxt.html
2 | User-agent: *
3 | Disallow:
4 | 


--------------------------------------------------------------------------------
/examples/openai-realtime-console/readme/realtime-console-demo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/transitive-bullshit/openai-realtime-api/89d37b5f461fbcb0300241360749abe85ca45d01/examples/openai-realtime-console/readme/realtime-console-demo.png


--------------------------------------------------------------------------------
/examples/openai-realtime-console/src/App.scss:
--------------------------------------------------------------------------------
1 | [data-component='App'] {
2 |   height: 100%;
3 |   width: 100%;
4 |   position: relative;
5 | }
6 | 


--------------------------------------------------------------------------------
/examples/openai-realtime-console/src/App.tsx:
--------------------------------------------------------------------------------
 1 | import { ConsolePage } from './pages/ConsolePage';
 2 | import './App.scss';
 3 | 
 4 | function App() {
 5 |   return (
 6 |     <div data-component="App">
 7 |       <ConsolePage />
 8 |     </div>
 9 |   );
10 | }
11 | 
12 | export default App;
13 | 


--------------------------------------------------------------------------------
/examples/openai-realtime-console/src/components/Map.scss:
--------------------------------------------------------------------------------
 1 | [data-component='Map'] {
 2 |   position: absolute;
 3 |   width: 100%;
 4 |   height: 100%;
 5 |   .leaflet-container {
 6 |     height: 100%;
 7 |     width: 100%;
 8 |   }
 9 | }
10 | 


--------------------------------------------------------------------------------
/examples/openai-realtime-console/src/components/Map.tsx:
--------------------------------------------------------------------------------
 1 | import { MapContainer, TileLayer, Marker, Popup, useMap } from 'react-leaflet';
 2 | import { LatLngTuple } from 'leaflet';
 3 | import './Map.scss';
 4 | 
 5 | function ChangeView({ center, zoom }: { center: LatLngTuple; zoom: number }) {
 6 |   const map = useMap();
 7 |   map.setView(center, zoom);
 8 |   return null;
 9 | }
10 | 
11 | export function Map({
12 |   center,
13 |   location = 'My Location',
14 | }: {
15 |   center: LatLngTuple;
16 |   location?: string;
17 | }) {
18 |   return (
19 |     <div data-component="Map">
20 |       <MapContainer
21 |         center={center}
22 |         zoom={11}
23 |         scrollWheelZoom={false}
24 |         zoomControl={false}
25 |         attributionControl={false}
26 |       >
27 |         <ChangeView center={center} zoom={11} />
28 |         <TileLayer url="https://{s}.tile.openstreetmap.org/{z}/{x}/{y}.png" />
29 |         <Marker position={center}>
30 |           <Popup>{location}</Popup>
31 |         </Marker>
32 |       </MapContainer>
33 |     </div>
34 |   );
35 | }
36 | 


--------------------------------------------------------------------------------
/examples/openai-realtime-console/src/components/button/Button.scss:
--------------------------------------------------------------------------------
 1 | [data-component='Button'] {
 2 |   display: flex;
 3 |   align-items: center;
 4 |   gap: 8px;
 5 |   font-family: 'Roboto Mono', monospace;
 6 |   font-size: 12px;
 7 |   font-optical-sizing: auto;
 8 |   font-weight: 400;
 9 |   font-style: normal;
10 |   border: none;
11 |   background-color: #ececf1;
12 |   color: #101010;
13 |   border-radius: 1000px;
14 |   padding: 8px 24px;
15 |   min-height: 42px;
16 |   transition: transform 0.1s ease-in-out, background-color 0.1s ease-in-out;
17 |   outline: none;
18 | 
19 |   &.button-style-action {
20 |     background-color: #101010;
21 |     color: #ececf1;
22 |     &:hover:not([disabled]) {
23 |       background-color: #404040;
24 |     }
25 |   }
26 | 
27 |   &.button-style-alert {
28 |     background-color: #f00;
29 |     color: #ececf1;
30 |     &:hover:not([disabled]) {
31 |       background-color: #f00;
32 |     }
33 |   }
34 | 
35 |   &.button-style-flush {
36 |     background-color: rgba(255, 255, 255, 0);
37 |   }
38 | 
39 |   &[disabled] {
40 |     color: #999;
41 |   }
42 | 
43 |   &:not([disabled]) {
44 |     cursor: pointer;
45 |   }
46 | 
47 |   &:hover:not([disabled]) {
48 |     background-color: #d8d8d8;
49 |   }
50 | 
51 |   &:active:not([disabled]) {
52 |     transform: translateY(1px);
53 |   }
54 | 
55 |   .icon {
56 |     display: flex;
57 |     &.icon-start {
58 |       margin-left: -8px;
59 |     }
60 |     &.icon-end {
61 |       margin-right: -8px;
62 |     }
63 |     svg {
64 |       width: 16px;
65 |       height: 16px;
66 |     }
67 |   }
68 | 
69 |   &.icon-red .icon {
70 |     color: #cc0000;
71 |   }
72 |   &.icon-green .icon {
73 |     color: #009900;
74 |   }
75 |   &.icon-grey .icon {
76 |     color: #909090;
77 |   }
78 |   &.icon-fill {
79 |     svg {
80 |       fill: currentColor;
81 |     }
82 |   }
83 | }
84 | 


--------------------------------------------------------------------------------
/examples/openai-realtime-console/src/components/button/Button.tsx:
--------------------------------------------------------------------------------
 1 | import React from 'react';
 2 | import './Button.scss';
 3 | 
 4 | import { Icon } from 'react-feather';
 5 | 
 6 | interface ButtonProps extends React.ButtonHTMLAttributes<HTMLButtonElement> {
 7 |   label?: string;
 8 |   icon?: Icon;
 9 |   iconPosition?: 'start' | 'end';
10 |   iconColor?: 'red' | 'green' | 'grey';
11 |   iconFill?: boolean;
12 |   buttonStyle?: 'regular' | 'action' | 'alert' | 'flush';
13 | }
14 | 
15 | export function Button({
16 |   label = 'Okay',
17 |   icon = void 0,
18 |   iconPosition = 'start',
19 |   iconColor = void 0,
20 |   iconFill = false,
21 |   buttonStyle = 'regular',
22 |   ...rest
23 | }: ButtonProps) {
24 |   const StartIcon = iconPosition === 'start' ? icon : null;
25 |   const EndIcon = iconPosition === 'end' ? icon : null;
26 |   const classList = [];
27 |   if (iconColor) {
28 |     classList.push(`icon-${iconColor}`);
29 |   }
30 |   if (iconFill) {
31 |     classList.push(`icon-fill`);
32 |   }
33 |   classList.push(`button-style-${buttonStyle}`);
34 | 
35 |   return (
36 |     <button data-component="Button" className={classList.join(' ')} {...rest}>
37 |       {StartIcon && (
38 |         <span className="icon icon-start">
39 |           <StartIcon />
40 |         </span>
41 |       )}
42 |       <span className="label">{label}</span>
43 |       {EndIcon && (
44 |         <span className="icon icon-end">
45 |           <EndIcon />
46 |         </span>
47 |       )}
48 |     </button>
49 |   );
50 | }
51 | 


--------------------------------------------------------------------------------
/examples/openai-realtime-console/src/components/toggle/Toggle.scss:
--------------------------------------------------------------------------------
 1 | [data-component='Toggle'] {
 2 |   position: relative;
 3 |   display: flex;
 4 |   align-items: center;
 5 |   gap: 8px;
 6 |   cursor: pointer;
 7 |   overflow: hidden;
 8 | 
 9 |   background-color: #ececf1;
10 |   color: #101010;
11 |   height: 40px;
12 |   border-radius: 1000px;
13 | 
14 |   &:hover {
15 |     background-color: #d8d8d8;
16 |   }
17 | 
18 |   div.label {
19 |     position: relative;
20 |     color: #666;
21 |     transition: color 0.1s ease-in-out;
22 |     padding: 0px 16px;
23 |     z-index: 2;
24 |     user-select: none;
25 |   }
26 | 
27 |   div.label.right {
28 |     margin-left: -8px;
29 |   }
30 | 
31 |   .toggle-background {
32 |     background-color: #101010;
33 |     position: absolute;
34 |     top: 0px;
35 |     left: 0px;
36 |     width: auto;
37 |     bottom: 0px;
38 |     z-index: 1;
39 |     border-radius: 1000px;
40 |     transition: left 0.1s ease-in-out, width 0.1s ease-in-out;
41 |   }
42 | 
43 |   &[data-enabled='true'] {
44 |     div.label.right {
45 |       color: #fff;
46 |     }
47 |   }
48 | 
49 |   &[data-enabled='false'] {
50 |     div.label.left {
51 |       color: #fff;
52 |     }
53 |   }
54 | }
55 | 


--------------------------------------------------------------------------------
/examples/openai-realtime-console/src/components/toggle/Toggle.tsx:
--------------------------------------------------------------------------------
 1 | import { useState, useEffect, useRef } from 'react';
 2 | 
 3 | import './Toggle.scss';
 4 | 
 5 | export function Toggle({
 6 |   defaultValue = false,
 7 |   values,
 8 |   labels,
 9 |   onChange = () => {},
10 | }: {
11 |   defaultValue?: string | boolean;
12 |   values?: string[];
13 |   labels?: string[];
14 |   onChange?: (isEnabled: boolean, value: string) => void;
15 | }) {
16 |   if (typeof defaultValue === 'string') {
17 |     defaultValue = !!Math.max(0, (values || []).indexOf(defaultValue));
18 |   }
19 | 
20 |   const leftRef = useRef<HTMLDivElement>(null);
21 |   const rightRef = useRef<HTMLDivElement>(null);
22 |   const bgRef = useRef<HTMLDivElement>(null);
23 |   const [value, setValue] = useState<boolean>(defaultValue);
24 | 
25 |   const toggleValue = () => {
26 |     const v = !value;
27 |     const index = +v;
28 |     setValue(v);
29 |     onChange(v, (values || [])[index]);
30 |   };
31 | 
32 |   useEffect(() => {
33 |     const leftEl = leftRef.current;
34 |     const rightEl = rightRef.current;
35 |     const bgEl = bgRef.current;
36 |     if (leftEl && rightEl && bgEl) {
37 |       if (value) {
38 |         bgEl.style.left = rightEl.offsetLeft + 'px';
39 |         bgEl.style.width = rightEl.offsetWidth + 'px';
40 |       } else {
41 |         bgEl.style.left = '';
42 |         bgEl.style.width = leftEl.offsetWidth + 'px';
43 |       }
44 |     }
45 |   }, [value]);
46 | 
47 |   return (
48 |     <div
49 |       data-component="Toggle"
50 |       onClick={toggleValue}
51 |       data-enabled={value.toString()}
52 |     >
53 |       {labels && (
54 |         <div className="label left" ref={leftRef}>
55 |           {labels[0]}
56 |         </div>
57 |       )}
58 |       {labels && (
59 |         <div className="label right" ref={rightRef}>
60 |           {labels[1]}
61 |         </div>
62 |       )}
63 |       <div className="toggle-background" ref={bgRef}></div>
64 |     </div>
65 |   );
66 | }
67 | 


--------------------------------------------------------------------------------
/examples/openai-realtime-console/src/index.css:
--------------------------------------------------------------------------------
 1 | html,
 2 | body {
 3 |   padding: 0px;
 4 |   margin: 0px;
 5 |   position: relative;
 6 |   width: 100%;
 7 |   height: 100%;
 8 |   font-family: 'Assistant', sans-serif;
 9 |   font-optical-sizing: auto;
10 |   font-weight: 400;
11 |   font-style: normal;
12 |   color: #18181b;
13 |   -webkit-font-smoothing: antialiased;
14 |   -moz-osx-font-smoothing: grayscale;
15 | }
16 | 
17 | #root {
18 |   position: relative;
19 |   width: 100%;
20 |   height: 100%;
21 | }
22 | 


--------------------------------------------------------------------------------
/examples/openai-realtime-console/src/index.tsx:
--------------------------------------------------------------------------------
 1 | import React from 'react';
 2 | import ReactDOM from 'react-dom/client';
 3 | import './index.css';
 4 | import App from './App';
 5 | import reportWebVitals from './reportWebVitals';
 6 | 
 7 | const root = ReactDOM.createRoot(
 8 |   document.getElementById('root') as HTMLElement
 9 | );
10 | root.render(
11 |   <React.StrictMode>
12 |     <App />
13 |   </React.StrictMode>
14 | );
15 | 
16 | // If you want to start measuring performance in your app, pass a function
17 | // to log results (for example: reportWebVitals(console.log))
18 | // or send to an analytics endpoint. Learn more: https://bit.ly/CRA-vitals
19 | reportWebVitals();
20 | 


--------------------------------------------------------------------------------
/examples/openai-realtime-console/src/lib/wavtools/index.js:
--------------------------------------------------------------------------------
1 | import { WavPacker } from './lib/wav_packer.js';
2 | import { AudioAnalysis } from './lib/analysis/audio_analysis.js';
3 | import { WavStreamPlayer } from './lib/wav_stream_player.js';
4 | import { WavRecorder } from './lib/wav_recorder.js';
5 | 
6 | export { AudioAnalysis, WavPacker, WavStreamPlayer, WavRecorder };
7 | 


--------------------------------------------------------------------------------
/examples/openai-realtime-console/src/lib/wavtools/lib/analysis/audio_analysis.js:
--------------------------------------------------------------------------------
  1 | import {
  2 |   noteFrequencies,
  3 |   noteFrequencyLabels,
  4 |   voiceFrequencies,
  5 |   voiceFrequencyLabels,
  6 | } from './constants.js';
  7 | 
  8 | /**
  9 |  * Output of AudioAnalysis for the frequency domain of the audio
 10 |  * @typedef {Object} AudioAnalysisOutputType
 11 |  * @property {Float32Array} values Amplitude of this frequency between {0, 1} inclusive
 12 |  * @property {number[]} frequencies Raw frequency bucket values
 13 |  * @property {string[]} labels Labels for the frequency bucket values
 14 |  */
 15 | 
 16 | /**
 17 |  * Analyzes audio for visual output
 18 |  * @class
 19 |  */
 20 | export class AudioAnalysis {
 21 |   /**
 22 |    * Retrieves frequency domain data from an AnalyserNode adjusted to a decibel range
 23 |    * returns human-readable formatting and labels
 24 |    * @param {AnalyserNode} analyser
 25 |    * @param {number} sampleRate
 26 |    * @param {Float32Array} [fftResult]
 27 |    * @param {"frequency"|"music"|"voice"} [analysisType]
 28 |    * @param {number} [minDecibels] default -100
 29 |    * @param {number} [maxDecibels] default -30
 30 |    * @returns {AudioAnalysisOutputType}
 31 |    */
 32 |   static getFrequencies(
 33 |     analyser,
 34 |     sampleRate,
 35 |     fftResult,
 36 |     analysisType = 'frequency',
 37 |     minDecibels = -100,
 38 |     maxDecibels = -30,
 39 |   ) {
 40 |     if (!fftResult) {
 41 |       fftResult = new Float32Array(analyser.frequencyBinCount);
 42 |       analyser.getFloatFrequencyData(fftResult);
 43 |     }
 44 |     const nyquistFrequency = sampleRate / 2;
 45 |     const frequencyStep = (1 / fftResult.length) * nyquistFrequency;
 46 |     let outputValues;
 47 |     let frequencies;
 48 |     let labels;
 49 |     if (analysisType === 'music' || analysisType === 'voice') {
 50 |       const useFrequencies =
 51 |         analysisType === 'voice' ? voiceFrequencies : noteFrequencies;
 52 |       const aggregateOutput = Array(useFrequencies.length).fill(minDecibels);
 53 |       for (let i = 0; i < fftResult.length; i++) {
 54 |         const frequency = i * frequencyStep;
 55 |         const amplitude = fftResult[i];
 56 |         for (let n = useFrequencies.length - 1; n >= 0; n--) {
 57 |           if (frequency > useFrequencies[n]) {
 58 |             aggregateOutput[n] = Math.max(aggregateOutput[n], amplitude);
 59 |             break;
 60 |           }
 61 |         }
 62 |       }
 63 |       outputValues = aggregateOutput;
 64 |       frequencies =
 65 |         analysisType === 'voice' ? voiceFrequencies : noteFrequencies;
 66 |       labels =
 67 |         analysisType === 'voice' ? voiceFrequencyLabels : noteFrequencyLabels;
 68 |     } else {
 69 |       outputValues = Array.from(fftResult);
 70 |       frequencies = outputValues.map((_, i) => frequencyStep * i);
 71 |       labels = frequencies.map((f) => `${f.toFixed(2)} Hz`);
 72 |     }
 73 |     // We normalize to {0, 1}
 74 |     const normalizedOutput = outputValues.map((v) => {
 75 |       return Math.max(
 76 |         0,
 77 |         Math.min((v - minDecibels) / (maxDecibels - minDecibels), 1),
 78 |       );
 79 |     });
 80 |     const values = new Float32Array(normalizedOutput);
 81 |     return {
 82 |       values,
 83 |       frequencies,
 84 |       labels,
 85 |     };
 86 |   }
 87 | 
 88 |   /**
 89 |    * Creates a new AudioAnalysis instance for an HTMLAudioElement
 90 |    * @param {HTMLAudioElement} audioElement
 91 |    * @param {AudioBuffer|null} [audioBuffer] If provided, will cache all frequency domain data from the buffer
 92 |    * @returns {AudioAnalysis}
 93 |    */
 94 |   constructor(audioElement, audioBuffer = null) {
 95 |     this.fftResults = [];
 96 |     if (audioBuffer) {
 97 |       /**
 98 |        * Modified from
 99 |        * https://stackoverflow.com/questions/75063715/using-the-web-audio-api-to-analyze-a-song-without-playing
100 |        *
101 |        * We do this to populate FFT values for the audio if provided an `audioBuffer`
102 |        * The reason to do this is that Safari fails when using `createMediaElementSource`
103 |        * This has a non-zero RAM cost so we only opt-in to run it on Safari, Chrome is better
104 |        */
105 |       const { length, sampleRate } = audioBuffer;
106 |       const offlineAudioContext = new OfflineAudioContext({
107 |         length,
108 |         sampleRate,
109 |       });
110 |       const source = offlineAudioContext.createBufferSource();
111 |       source.buffer = audioBuffer;
112 |       const analyser = offlineAudioContext.createAnalyser();
113 |       analyser.fftSize = 8192;
114 |       analyser.smoothingTimeConstant = 0.1;
115 |       source.connect(analyser);
116 |       // limit is :: 128 / sampleRate;
117 |       // but we just want 60fps - cuts ~1s from 6MB to 1MB of RAM
118 |       const renderQuantumInSeconds = 1 / 60;
119 |       const durationInSeconds = length / sampleRate;
120 |       const analyze = (index) => {
121 |         const suspendTime = renderQuantumInSeconds * index;
122 |         if (suspendTime < durationInSeconds) {
123 |           offlineAudioContext.suspend(suspendTime).then(() => {
124 |             const fftResult = new Float32Array(analyser.frequencyBinCount);
125 |             analyser.getFloatFrequencyData(fftResult);
126 |             this.fftResults.push(fftResult);
127 |             analyze(index + 1);
128 |           });
129 |         }
130 |         if (index === 1) {
131 |           offlineAudioContext.startRendering();
132 |         } else {
133 |           offlineAudioContext.resume();
134 |         }
135 |       };
136 |       source.start(0);
137 |       analyze(1);
138 |       this.audio = audioElement;
139 |       this.context = offlineAudioContext;
140 |       this.analyser = analyser;
141 |       this.sampleRate = sampleRate;
142 |       this.audioBuffer = audioBuffer;
143 |     } else {
144 |       const audioContext = new AudioContext();
145 |       const track = audioContext.createMediaElementSource(audioElement);
146 |       const analyser = audioContext.createAnalyser();
147 |       analyser.fftSize = 8192;
148 |       analyser.smoothingTimeConstant = 0.1;
149 |       track.connect(analyser);
150 |       analyser.connect(audioContext.destination);
151 |       this.audio = audioElement;
152 |       this.context = audioContext;
153 |       this.analyser = analyser;
154 |       this.sampleRate = this.context.sampleRate;
155 |       this.audioBuffer = null;
156 |     }
157 |   }
158 | 
159 |   /**
160 |    * Gets the current frequency domain data from the playing audio track
161 |    * @param {"frequency"|"music"|"voice"} [analysisType]
162 |    * @param {number} [minDecibels] default -100
163 |    * @param {number} [maxDecibels] default -30
164 |    * @returns {AudioAnalysisOutputType}
165 |    */
166 |   getFrequencies(
167 |     analysisType = 'frequency',
168 |     minDecibels = -100,
169 |     maxDecibels = -30,
170 |   ) {
171 |     let fftResult = null;
172 |     if (this.audioBuffer && this.fftResults.length) {
173 |       const pct = this.audio.currentTime / this.audio.duration;
174 |       const index = Math.min(
175 |         (pct * this.fftResults.length) | 0,
176 |         this.fftResults.length - 1,
177 |       );
178 |       fftResult = this.fftResults[index];
179 |     }
180 |     return AudioAnalysis.getFrequencies(
181 |       this.analyser,
182 |       this.sampleRate,
183 |       fftResult,
184 |       analysisType,
185 |       minDecibels,
186 |       maxDecibels,
187 |     );
188 |   }
189 | 
190 |   /**
191 |    * Resume the internal AudioContext if it was suspended due to the lack of
192 |    * user interaction when the AudioAnalysis was instantiated.
193 |    * @returns {Promise<true>}
194 |    */
195 |   async resumeIfSuspended() {
196 |     if (this.context.state === 'suspended') {
197 |       await this.context.resume();
198 |     }
199 |     return true;
200 |   }
201 | }
202 | 
203 | globalThis.AudioAnalysis = AudioAnalysis;
204 | 


--------------------------------------------------------------------------------
/examples/openai-realtime-console/src/lib/wavtools/lib/analysis/constants.js:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Constants for help with visualization
 3 |  * Helps map frequency ranges from Fast Fourier Transform
 4 |  * to human-interpretable ranges, notably music ranges and
 5 |  * human vocal ranges.
 6 |  */
 7 | 
 8 | // Eighth octave frequencies
 9 | const octave8Frequencies = [
10 |   4186.01, 4434.92, 4698.63, 4978.03, 5274.04, 5587.65, 5919.91, 6271.93,
11 |   6644.88, 7040.0, 7458.62, 7902.13,
12 | ];
13 | 
14 | // Labels for each of the above frequencies
15 | const octave8FrequencyLabels = [
16 |   'C',
17 |   'C#',
18 |   'D',
19 |   'D#',
20 |   'E',
21 |   'F',
22 |   'F#',
23 |   'G',
24 |   'G#',
25 |   'A',
26 |   'A#',
27 |   'B',
28 | ];
29 | 
30 | /**
31 |  * All note frequencies from 1st to 8th octave
32 |  * in format "A#8" (A#, 8th octave)
33 |  */
34 | export const noteFrequencies = [];
35 | export const noteFrequencyLabels = [];
36 | for (let i = 1; i <= 8; i++) {
37 |   for (let f = 0; f < octave8Frequencies.length; f++) {
38 |     const freq = octave8Frequencies[f];
39 |     noteFrequencies.push(freq / Math.pow(2, 8 - i));
40 |     noteFrequencyLabels.push(octave8FrequencyLabels[f] + i);
41 |   }
42 | }
43 | 
44 | /**
45 |  * Subset of the note frequencies between 32 and 2000 Hz
46 |  * 6 octave range: C1 to B6
47 |  */
48 | const voiceFrequencyRange = [32.0, 2000.0];
49 | export const voiceFrequencies = noteFrequencies.filter((_, i) => {
50 |   return (
51 |     noteFrequencies[i] > voiceFrequencyRange[0] &&
52 |     noteFrequencies[i] < voiceFrequencyRange[1]
53 |   );
54 | });
55 | export const voiceFrequencyLabels = noteFrequencyLabels.filter((_, i) => {
56 |   return (
57 |     noteFrequencies[i] > voiceFrequencyRange[0] &&
58 |     noteFrequencies[i] < voiceFrequencyRange[1]
59 |   );
60 | });
61 | 


--------------------------------------------------------------------------------
/examples/openai-realtime-console/src/lib/wavtools/lib/wav_packer.js:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * Raw wav audio file contents
  3 |  * @typedef {Object} WavPackerAudioType
  4 |  * @property {Blob} blob
  5 |  * @property {string} url
  6 |  * @property {number} channelCount
  7 |  * @property {number} sampleRate
  8 |  * @property {number} duration
  9 |  */
 10 | 
 11 | /**
 12 |  * Utility class for assembling PCM16 "audio/wav" data
 13 |  * @class
 14 |  */
 15 | export class WavPacker {
 16 |   /**
 17 |    * Converts Float32Array of amplitude data to ArrayBuffer in Int16Array format
 18 |    * @param {Float32Array} float32Array
 19 |    * @returns {ArrayBuffer}
 20 |    */
 21 |   static floatTo16BitPCM(float32Array) {
 22 |     const buffer = new ArrayBuffer(float32Array.length * 2);
 23 |     const view = new DataView(buffer);
 24 |     let offset = 0;
 25 |     for (let i = 0; i < float32Array.length; i++, offset += 2) {
 26 |       let s = Math.max(-1, Math.min(1, float32Array[i]));
 27 |       view.setInt16(offset, s < 0 ? s * 0x8000 : s * 0x7fff, true);
 28 |     }
 29 |     return buffer;
 30 |   }
 31 | 
 32 |   /**
 33 |    * Concatenates two ArrayBuffers
 34 |    * @param {ArrayBuffer} leftBuffer
 35 |    * @param {ArrayBuffer} rightBuffer
 36 |    * @returns {ArrayBuffer}
 37 |    */
 38 |   static mergeBuffers(leftBuffer, rightBuffer) {
 39 |     const tmpArray = new Uint8Array(
 40 |       leftBuffer.byteLength + rightBuffer.byteLength
 41 |     );
 42 |     tmpArray.set(new Uint8Array(leftBuffer), 0);
 43 |     tmpArray.set(new Uint8Array(rightBuffer), leftBuffer.byteLength);
 44 |     return tmpArray.buffer;
 45 |   }
 46 | 
 47 |   /**
 48 |    * Packs data into an Int16 format
 49 |    * @private
 50 |    * @param {number} size 0 = 1x Int16, 1 = 2x Int16
 51 |    * @param {number} arg value to pack
 52 |    * @returns
 53 |    */
 54 |   _packData(size, arg) {
 55 |     return [
 56 |       new Uint8Array([arg, arg >> 8]),
 57 |       new Uint8Array([arg, arg >> 8, arg >> 16, arg >> 24]),
 58 |     ][size];
 59 |   }
 60 | 
 61 |   /**
 62 |    * Packs audio into "audio/wav" Blob
 63 |    * @param {number} sampleRate
 64 |    * @param {{bitsPerSample: number, channels: Array<Float32Array>, data: Int16Array}} audio
 65 |    * @returns {WavPackerAudioType}
 66 |    */
 67 |   pack(sampleRate, audio) {
 68 |     if (!audio?.bitsPerSample) {
 69 |       throw new Error(`Missing "bitsPerSample"`);
 70 |     } else if (!audio?.channels) {
 71 |       throw new Error(`Missing "channels"`);
 72 |     } else if (!audio?.data) {
 73 |       throw new Error(`Missing "data"`);
 74 |     }
 75 |     const { bitsPerSample, channels, data } = audio;
 76 |     const output = [
 77 |       // Header
 78 |       'RIFF',
 79 |       this._packData(
 80 |         1,
 81 |         4 + (8 + 24) /* chunk 1 length */ + (8 + 8) /* chunk 2 length */
 82 |       ), // Length
 83 |       'WAVE',
 84 |       // chunk 1
 85 |       'fmt ', // Sub-chunk identifier
 86 |       this._packData(1, 16), // Chunk length
 87 |       this._packData(0, 1), // Audio format (1 is linear quantization)
 88 |       this._packData(0, channels.length),
 89 |       this._packData(1, sampleRate),
 90 |       this._packData(1, (sampleRate * channels.length * bitsPerSample) / 8), // Byte rate
 91 |       this._packData(0, (channels.length * bitsPerSample) / 8),
 92 |       this._packData(0, bitsPerSample),
 93 |       // chunk 2
 94 |       'data', // Sub-chunk identifier
 95 |       this._packData(
 96 |         1,
 97 |         (channels[0].length * channels.length * bitsPerSample) / 8
 98 |       ), // Chunk length
 99 |       data,
100 |     ];
101 |     const blob = new Blob(output, { type: 'audio/mpeg' });
102 |     const url = URL.createObjectURL(blob);
103 |     return {
104 |       blob,
105 |       url,
106 |       channelCount: channels.length,
107 |       sampleRate,
108 |       duration: data.byteLength / (channels.length * sampleRate * 2),
109 |     };
110 |   }
111 | }
112 | 
113 | globalThis.WavPacker = WavPacker;
114 | 


--------------------------------------------------------------------------------
/examples/openai-realtime-console/src/lib/wavtools/lib/wav_recorder.js:
--------------------------------------------------------------------------------
  1 | import { AudioProcessorSrc } from './worklets/audio_processor.js';
  2 | import { AudioAnalysis } from './analysis/audio_analysis.js';
  3 | import { WavPacker } from './wav_packer.js';
  4 | 
  5 | /**
  6 |  * Decodes audio into a wav file
  7 |  * @typedef {Object} DecodedAudioType
  8 |  * @property {Blob} blob
  9 |  * @property {string} url
 10 |  * @property {Float32Array} values
 11 |  * @property {AudioBuffer} audioBuffer
 12 |  */
 13 | 
 14 | /**
 15 |  * Records live stream of user audio as PCM16 "audio/wav" data
 16 |  * @class
 17 |  */
 18 | export class WavRecorder {
 19 |   /**
 20 |    * Create a new WavRecorder instance
 21 |    * @param {{sampleRate?: number, outputToSpeakers?: boolean, debug?: boolean}} [options]
 22 |    * @returns {WavRecorder}
 23 |    */
 24 |   constructor({
 25 |     sampleRate = 44100,
 26 |     outputToSpeakers = false,
 27 |     debug = false,
 28 |   } = {}) {
 29 |     // Script source
 30 |     this.scriptSrc = AudioProcessorSrc;
 31 |     // Config
 32 |     this.sampleRate = sampleRate;
 33 |     this.outputToSpeakers = outputToSpeakers;
 34 |     this.debug = !!debug;
 35 |     this._deviceChangeCallback = null;
 36 |     this._devices = [];
 37 |     // State variables
 38 |     this.stream = null;
 39 |     this.processor = null;
 40 |     this.source = null;
 41 |     this.node = null;
 42 |     this.recording = false;
 43 |     // Event handling with AudioWorklet
 44 |     this._lastEventId = 0;
 45 |     this.eventReceipts = {};
 46 |     this.eventTimeout = 5000;
 47 |     // Process chunks of audio
 48 |     this._chunkProcessor = () => {};
 49 |     this._chunkProcessorSize = void 0;
 50 |     this._chunkProcessorBuffer = {
 51 |       raw: new ArrayBuffer(0),
 52 |       mono: new ArrayBuffer(0),
 53 |     };
 54 |   }
 55 | 
 56 |   /**
 57 |    * Decodes audio data from multiple formats to a Blob, url, Float32Array and AudioBuffer
 58 |    * @param {Blob|Float32Array|Int16Array|ArrayBuffer|number[]} audioData
 59 |    * @param {number} sampleRate
 60 |    * @param {number} fromSampleRate
 61 |    * @returns {Promise<DecodedAudioType>}
 62 |    */
 63 |   static async decode(audioData, sampleRate = 44100, fromSampleRate = -1) {
 64 |     const context = new AudioContext({ sampleRate });
 65 |     let arrayBuffer;
 66 |     let blob;
 67 |     if (audioData instanceof Blob) {
 68 |       if (fromSampleRate !== -1) {
 69 |         throw new Error(
 70 |           `Can not specify "fromSampleRate" when reading from Blob`,
 71 |         );
 72 |       }
 73 |       blob = audioData;
 74 |       arrayBuffer = await blob.arrayBuffer();
 75 |     } else if (audioData instanceof ArrayBuffer) {
 76 |       if (fromSampleRate !== -1) {
 77 |         throw new Error(
 78 |           `Can not specify "fromSampleRate" when reading from ArrayBuffer`,
 79 |         );
 80 |       }
 81 |       arrayBuffer = audioData;
 82 |       blob = new Blob([arrayBuffer], { type: 'audio/wav' });
 83 |     } else {
 84 |       let float32Array;
 85 |       let data;
 86 |       if (audioData instanceof Int16Array) {
 87 |         data = audioData;
 88 |         float32Array = new Float32Array(audioData.length);
 89 |         for (let i = 0; i < audioData.length; i++) {
 90 |           float32Array[i] = audioData[i] / 0x8000;
 91 |         }
 92 |       } else if (audioData instanceof Float32Array) {
 93 |         float32Array = audioData;
 94 |       } else if (audioData instanceof Array) {
 95 |         float32Array = new Float32Array(audioData);
 96 |       } else {
 97 |         throw new Error(
 98 |           `"audioData" must be one of: Blob, Float32Arrray, Int16Array, ArrayBuffer, Array<number>`,
 99 |         );
100 |       }
101 |       if (fromSampleRate === -1) {
102 |         throw new Error(
103 |           `Must specify "fromSampleRate" when reading from Float32Array, In16Array or Array`,
104 |         );
105 |       } else if (fromSampleRate < 3000) {
106 |         throw new Error(`Minimum "fromSampleRate" is 3000 (3kHz)`);
107 |       }
108 |       if (!data) {
109 |         data = WavPacker.floatTo16BitPCM(float32Array);
110 |       }
111 |       const audio = {
112 |         bitsPerSample: 16,
113 |         channels: [float32Array],
114 |         data,
115 |       };
116 |       const packer = new WavPacker();
117 |       const result = packer.pack(fromSampleRate, audio);
118 |       blob = result.blob;
119 |       arrayBuffer = await blob.arrayBuffer();
120 |     }
121 |     const audioBuffer = await context.decodeAudioData(arrayBuffer);
122 |     const values = audioBuffer.getChannelData(0);
123 |     const url = URL.createObjectURL(blob);
124 |     return {
125 |       blob,
126 |       url,
127 |       values,
128 |       audioBuffer,
129 |     };
130 |   }
131 | 
132 |   /**
133 |    * Logs data in debug mode
134 |    * @param {...any} arguments
135 |    * @returns {true}
136 |    */
137 |   log() {
138 |     if (this.debug) {
139 |       this.log(...arguments);
140 |     }
141 |     return true;
142 |   }
143 | 
144 |   /**
145 |    * Retrieves the current sampleRate for the recorder
146 |    * @returns {number}
147 |    */
148 |   getSampleRate() {
149 |     return this.sampleRate;
150 |   }
151 | 
152 |   /**
153 |    * Retrieves the current status of the recording
154 |    * @returns {"ended"|"paused"|"recording"}
155 |    */
156 |   getStatus() {
157 |     if (!this.processor) {
158 |       return 'ended';
159 |     } else if (!this.recording) {
160 |       return 'paused';
161 |     } else {
162 |       return 'recording';
163 |     }
164 |   }
165 | 
166 |   /**
167 |    * Sends an event to the AudioWorklet
168 |    * @private
169 |    * @param {string} name
170 |    * @param {{[key: string]: any}} data
171 |    * @param {AudioWorkletNode} [_processor]
172 |    * @returns {Promise<{[key: string]: any}>}
173 |    */
174 |   async _event(name, data = {}, _processor = null) {
175 |     _processor = _processor || this.processor;
176 |     if (!_processor) {
177 |       throw new Error('Can not send events without recording first');
178 |     }
179 |     const message = {
180 |       event: name,
181 |       id: this._lastEventId++,
182 |       data,
183 |     };
184 |     _processor.port.postMessage(message);
185 |     const t0 = new Date().valueOf();
186 |     while (!this.eventReceipts[message.id]) {
187 |       if (new Date().valueOf() - t0 > this.eventTimeout) {
188 |         throw new Error(`Timeout waiting for "${name}" event`);
189 |       }
190 |       await new Promise((res) => setTimeout(() => res(true), 1));
191 |     }
192 |     const payload = this.eventReceipts[message.id];
193 |     delete this.eventReceipts[message.id];
194 |     return payload;
195 |   }
196 | 
197 |   /**
198 |    * Sets device change callback, remove if callback provided is `null`
199 |    * @param {(Array<MediaDeviceInfo & {default: boolean}>): void|null} callback
200 |    * @returns {true}
201 |    */
202 |   listenForDeviceChange(callback) {
203 |     if (callback === null && this._deviceChangeCallback) {
204 |       navigator.mediaDevices.removeEventListener(
205 |         'devicechange',
206 |         this._deviceChangeCallback,
207 |       );
208 |       this._deviceChangeCallback = null;
209 |     } else if (callback !== null) {
210 |       // Basically a debounce; we only want this called once when devices change
211 |       // And we only want the most recent callback() to be executed
212 |       // if a few are operating at the same time
213 |       let lastId = 0;
214 |       let lastDevices = [];
215 |       const serializeDevices = (devices) =>
216 |         devices
217 |           .map((d) => d.deviceId)
218 |           .sort()
219 |           .join(',');
220 |       const cb = async () => {
221 |         let id = ++lastId;
222 |         const devices = await this.listDevices();
223 |         if (id === lastId) {
224 |           if (serializeDevices(lastDevices) !== serializeDevices(devices)) {
225 |             lastDevices = devices;
226 |             callback(devices.slice());
227 |           }
228 |         }
229 |       };
230 |       navigator.mediaDevices.addEventListener('devicechange', cb);
231 |       cb();
232 |       this._deviceChangeCallback = cb;
233 |     }
234 |     return true;
235 |   }
236 | 
237 |   /**
238 |    * Manually request permission to use the microphone
239 |    * @returns {Promise<true>}
240 |    */
241 |   async requestPermission() {
242 |     const permissionStatus = await navigator.permissions.query({
243 |       name: 'microphone',
244 |     });
245 |     if (permissionStatus.state === 'denied') {
246 |       window.alert('You must grant microphone access to use this feature.');
247 |     } else if (permissionStatus.state === 'prompt') {
248 |       try {
249 |         const stream = await navigator.mediaDevices.getUserMedia({
250 |           audio: true,
251 |         });
252 |         const tracks = stream.getTracks();
253 |         tracks.forEach((track) => track.stop());
254 |       } catch (e) {
255 |         window.alert('You must grant microphone access to use this feature.');
256 |       }
257 |     }
258 |     return true;
259 |   }
260 | 
261 |   /**
262 |    * List all eligible devices for recording, will request permission to use microphone
263 |    * @returns {Promise<Array<MediaDeviceInfo & {default: boolean}>>}
264 |    */
265 |   async listDevices() {
266 |     if (
267 |       !navigator.mediaDevices ||
268 |       !('enumerateDevices' in navigator.mediaDevices)
269 |     ) {
270 |       throw new Error('Could not request user devices');
271 |     }
272 |     await this.requestPermission();
273 |     const devices = await navigator.mediaDevices.enumerateDevices();
274 |     const audioDevices = devices.filter(
275 |       (device) => device.kind === 'audioinput',
276 |     );
277 |     const defaultDeviceIndex = audioDevices.findIndex(
278 |       (device) => device.deviceId === 'default',
279 |     );
280 |     const deviceList = [];
281 |     if (defaultDeviceIndex !== -1) {
282 |       let defaultDevice = audioDevices.splice(defaultDeviceIndex, 1)[0];
283 |       let existingIndex = audioDevices.findIndex(
284 |         (device) => device.groupId === defaultDevice.groupId,
285 |       );
286 |       if (existingIndex !== -1) {
287 |         defaultDevice = audioDevices.splice(existingIndex, 1)[0];
288 |       }
289 |       defaultDevice.default = true;
290 |       deviceList.push(defaultDevice);
291 |     }
292 |     return deviceList.concat(audioDevices);
293 |   }
294 | 
295 |   /**
296 |    * Begins a recording session and requests microphone permissions if not already granted
297 |    * Microphone recording indicator will appear on browser tab but status will be "paused"
298 |    * @param {string} [deviceId] if no device provided, default device will be used
299 |    * @returns {Promise<true>}
300 |    */
301 |   async begin(deviceId) {
302 |     if (this.processor) {
303 |       throw new Error(
304 |         `Already connected: please call .end() to start a new session`,
305 |       );
306 |     }
307 | 
308 |     if (
309 |       !navigator.mediaDevices ||
310 |       !('getUserMedia' in navigator.mediaDevices)
311 |     ) {
312 |       throw new Error('Could not request user media');
313 |     }
314 |     try {
315 |       const config = { audio: true };
316 |       if (deviceId) {
317 |         config.audio = { deviceId: { exact: deviceId } };
318 |       }
319 |       this.stream = await navigator.mediaDevices.getUserMedia(config);
320 |     } catch (err) {
321 |       throw new Error('Could not start media stream');
322 |     }
323 | 
324 |     const context = new AudioContext({ sampleRate: this.sampleRate });
325 |     const source = context.createMediaStreamSource(this.stream);
326 |     // Load and execute the module script.
327 |     try {
328 |       await context.audioWorklet.addModule(this.scriptSrc);
329 |     } catch (e) {
330 |       console.error(e);
331 |       throw new Error(`Could not add audioWorklet module: ${this.scriptSrc}`);
332 |     }
333 |     const processor = new AudioWorkletNode(context, 'audio_processor');
334 |     processor.port.onmessage = (e) => {
335 |       const { event, id, data } = e.data;
336 |       if (event === 'receipt') {
337 |         this.eventReceipts[id] = data;
338 |       } else if (event === 'chunk') {
339 |         if (this._chunkProcessorSize) {
340 |           const buffer = this._chunkProcessorBuffer;
341 |           this._chunkProcessorBuffer = {
342 |             raw: WavPacker.mergeBuffers(buffer.raw, data.raw),
343 |             mono: WavPacker.mergeBuffers(buffer.mono, data.mono),
344 |           };
345 |           if (
346 |             this._chunkProcessorBuffer.mono.byteLength >=
347 |             this._chunkProcessorSize
348 |           ) {
349 |             this._chunkProcessor(this._chunkProcessorBuffer);
350 |             this._chunkProcessorBuffer = {
351 |               raw: new ArrayBuffer(0),
352 |               mono: new ArrayBuffer(0),
353 |             };
354 |           }
355 |         } else {
356 |           this._chunkProcessor(data);
357 |         }
358 |       }
359 |     };
360 | 
361 |     const node = source.connect(processor);
362 |     const analyser = context.createAnalyser();
363 |     analyser.fftSize = 8192;
364 |     analyser.smoothingTimeConstant = 0.1;
365 |     node.connect(analyser);
366 |     if (this.outputToSpeakers) {
367 |       // eslint-disable-next-line no-console
368 |       console.warn(
369 |         'Warning: Output to speakers may affect sound quality,\n' +
370 |           'especially due to system audio feedback preventative measures.\n' +
371 |           'use only for debugging',
372 |       );
373 |       analyser.connect(context.destination);
374 |     }
375 | 
376 |     this.source = source;
377 |     this.node = node;
378 |     this.analyser = analyser;
379 |     this.processor = processor;
380 |     return true;
381 |   }
382 | 
383 |   /**
384 |    * Gets the current frequency domain data from the recording track
385 |    * @param {"frequency"|"music"|"voice"} [analysisType]
386 |    * @param {number} [minDecibels] default -100
387 |    * @param {number} [maxDecibels] default -30
388 |    * @returns {import('./analysis/audio_analysis.js').AudioAnalysisOutputType}
389 |    */
390 |   getFrequencies(
391 |     analysisType = 'frequency',
392 |     minDecibels = -100,
393 |     maxDecibels = -30,
394 |   ) {
395 |     if (!this.processor) {
396 |       throw new Error('Session ended: please call .begin() first');
397 |     }
398 |     return AudioAnalysis.getFrequencies(
399 |       this.analyser,
400 |       this.sampleRate,
401 |       null,
402 |       analysisType,
403 |       minDecibels,
404 |       maxDecibels,
405 |     );
406 |   }
407 | 
408 |   /**
409 |    * Pauses the recording
410 |    * Keeps microphone stream open but halts storage of audio
411 |    * @returns {Promise<true>}
412 |    */
413 |   async pause() {
414 |     if (!this.processor) {
415 |       throw new Error('Session ended: please call .begin() first');
416 |     } else if (!this.recording) {
417 |       throw new Error('Already paused: please call .record() first');
418 |     }
419 |     if (this._chunkProcessorBuffer.raw.byteLength) {
420 |       this._chunkProcessor(this._chunkProcessorBuffer);
421 |     }
422 |     this.log('Pausing ...');
423 |     await this._event('stop');
424 |     this.recording = false;
425 |     return true;
426 |   }
427 | 
428 |   /**
429 |    * Start recording stream and storing to memory from the connected audio source
430 |    * @param {(data: { mono: Int16Array; raw: Int16Array }) => any} [chunkProcessor]
431 |    * @param {number} [chunkSize] chunkProcessor will not be triggered until this size threshold met in mono audio
432 |    * @returns {Promise<true>}
433 |    */
434 |   async record(chunkProcessor = () => {}, chunkSize = 8192) {
435 |     if (!this.processor) {
436 |       throw new Error('Session ended: please call .begin() first');
437 |     } else if (this.recording) {
438 |       throw new Error('Already recording: please call .pause() first');
439 |     } else if (typeof chunkProcessor !== 'function') {
440 |       throw new Error(`chunkProcessor must be a function`);
441 |     }
442 |     this._chunkProcessor = chunkProcessor;
443 |     this._chunkProcessorSize = chunkSize;
444 |     this._chunkProcessorBuffer = {
445 |       raw: new ArrayBuffer(0),
446 |       mono: new ArrayBuffer(0),
447 |     };
448 |     this.log('Recording ...');
449 |     await this._event('start');
450 |     this.recording = true;
451 |     return true;
452 |   }
453 | 
454 |   /**
455 |    * Clears the audio buffer, empties stored recording
456 |    * @returns {Promise<true>}
457 |    */
458 |   async clear() {
459 |     if (!this.processor) {
460 |       throw new Error('Session ended: please call .begin() first');
461 |     }
462 |     await this._event('clear');
463 |     return true;
464 |   }
465 | 
466 |   /**
467 |    * Reads the current audio stream data
468 |    * @returns {Promise<{meanValues: Float32Array, channels: Array<Float32Array>}>}
469 |    */
470 |   async read() {
471 |     if (!this.processor) {
472 |       throw new Error('Session ended: please call .begin() first');
473 |     }
474 |     this.log('Reading ...');
475 |     const result = await this._event('read');
476 |     return result;
477 |   }
478 | 
479 |   /**
480 |    * Saves the current audio stream to a file
481 |    * @param {boolean} [force] Force saving while still recording
482 |    * @returns {Promise<import('./wav_packer.js').WavPackerAudioType>}
483 |    */
484 |   async save(force = false) {
485 |     if (!this.processor) {
486 |       throw new Error('Session ended: please call .begin() first');
487 |     }
488 |     if (!force && this.recording) {
489 |       throw new Error(
490 |         'Currently recording: please call .pause() first, or call .save(true) to force',
491 |       );
492 |     }
493 |     this.log('Exporting ...');
494 |     const exportData = await this._event('export');
495 |     const packer = new WavPacker();
496 |     const result = packer.pack(this.sampleRate, exportData.audio);
497 |     return result;
498 |   }
499 | 
500 |   /**
501 |    * Ends the current recording session and saves the result
502 |    * @returns {Promise<import('./wav_packer.js').WavPackerAudioType>}
503 |    */
504 |   async end() {
505 |     if (!this.processor) {
506 |       throw new Error('Session ended: please call .begin() first');
507 |     }
508 | 
509 |     const _processor = this.processor;
510 | 
511 |     this.log('Stopping ...');
512 |     await this._event('stop');
513 |     this.recording = false;
514 |     const tracks = this.stream.getTracks();
515 |     tracks.forEach((track) => track.stop());
516 | 
517 |     this.log('Exporting ...');
518 |     const exportData = await this._event('export', {}, _processor);
519 | 
520 |     this.processor.disconnect();
521 |     this.source.disconnect();
522 |     this.node.disconnect();
523 |     this.analyser.disconnect();
524 |     this.stream = null;
525 |     this.processor = null;
526 |     this.source = null;
527 |     this.node = null;
528 | 
529 |     const packer = new WavPacker();
530 |     const result = packer.pack(this.sampleRate, exportData.audio);
531 |     return result;
532 |   }
533 | 
534 |   /**
535 |    * Performs a full cleanup of WavRecorder instance
536 |    * Stops actively listening via microphone and removes existing listeners
537 |    * @returns {Promise<true>}
538 |    */
539 |   async quit() {
540 |     this.listenForDeviceChange(null);
541 |     if (this.processor) {
542 |       await this.end();
543 |     }
544 |     return true;
545 |   }
546 | }
547 | 
548 | globalThis.WavRecorder = WavRecorder;
549 | 


--------------------------------------------------------------------------------
/examples/openai-realtime-console/src/lib/wavtools/lib/wav_stream_player.js:
--------------------------------------------------------------------------------
  1 | import { StreamProcessorSrc } from './worklets/stream_processor.js';
  2 | import { AudioAnalysis } from './analysis/audio_analysis.js';
  3 | 
  4 | /**
  5 |  * Plays audio streams received in raw PCM16 chunks from the browser
  6 |  * @class
  7 |  */
  8 | export class WavStreamPlayer {
  9 |   /**
 10 |    * Creates a new WavStreamPlayer instance
 11 |    * @param {{sampleRate?: number}} options
 12 |    * @returns {WavStreamPlayer}
 13 |    */
 14 |   constructor({ sampleRate = 44100 } = {}) {
 15 |     this.scriptSrc = StreamProcessorSrc;
 16 |     this.sampleRate = sampleRate;
 17 |     this.context = null;
 18 |     this.stream = null;
 19 |     this.analyser = null;
 20 |     this.trackSampleOffsets = {};
 21 |     this.interruptedTrackIds = {};
 22 |   }
 23 | 
 24 |   /**
 25 |    * Connects the audio context and enables output to speakers
 26 |    * @returns {Promise<true>}
 27 |    */
 28 |   async connect() {
 29 |     this.context = new AudioContext({ sampleRate: this.sampleRate });
 30 |     if (this.context.state === 'suspended') {
 31 |       await this.context.resume();
 32 |     }
 33 |     try {
 34 |       await this.context.audioWorklet.addModule(this.scriptSrc);
 35 |     } catch (e) {
 36 |       console.error(e);
 37 |       throw new Error(`Could not add audioWorklet module: ${this.scriptSrc}`);
 38 |     }
 39 |     const analyser = this.context.createAnalyser();
 40 |     analyser.fftSize = 8192;
 41 |     analyser.smoothingTimeConstant = 0.1;
 42 |     this.analyser = analyser;
 43 |     return true;
 44 |   }
 45 | 
 46 |   /**
 47 |    * Gets the current frequency domain data from the playing track
 48 |    * @param {"frequency"|"music"|"voice"} [analysisType]
 49 |    * @param {number} [minDecibels] default -100
 50 |    * @param {number} [maxDecibels] default -30
 51 |    * @returns {import('./analysis/audio_analysis.js').AudioAnalysisOutputType}
 52 |    */
 53 |   getFrequencies(
 54 |     analysisType = 'frequency',
 55 |     minDecibels = -100,
 56 |     maxDecibels = -30
 57 |   ) {
 58 |     if (!this.analyser) {
 59 |       throw new Error('Not connected, please call .connect() first');
 60 |     }
 61 |     return AudioAnalysis.getFrequencies(
 62 |       this.analyser,
 63 |       this.sampleRate,
 64 |       null,
 65 |       analysisType,
 66 |       minDecibels,
 67 |       maxDecibels
 68 |     );
 69 |   }
 70 | 
 71 |   /**
 72 |    * Starts audio streaming
 73 |    * @private
 74 |    * @returns {Promise<true>}
 75 |    */
 76 |   _start() {
 77 |     const streamNode = new AudioWorkletNode(this.context, 'stream_processor');
 78 |     streamNode.connect(this.context.destination);
 79 |     streamNode.port.onmessage = (e) => {
 80 |       const { event } = e.data;
 81 |       if (event === 'stop') {
 82 |         streamNode.disconnect();
 83 |         this.stream = null;
 84 |       } else if (event === 'offset') {
 85 |         const { requestId, trackId, offset } = e.data;
 86 |         const currentTime = offset / this.sampleRate;
 87 |         this.trackSampleOffsets[requestId] = { trackId, offset, currentTime };
 88 |       }
 89 |     };
 90 |     this.analyser.disconnect();
 91 |     streamNode.connect(this.analyser);
 92 |     this.stream = streamNode;
 93 |     return true;
 94 |   }
 95 | 
 96 |   /**
 97 |    * Adds 16BitPCM data to the currently playing audio stream
 98 |    * You can add chunks beyond the current play point and they will be queued for play
 99 |    * @param {ArrayBuffer|Int16Array} arrayBuffer
100 |    * @param {string} [trackId]
101 |    * @returns {Int16Array}
102 |    */
103 |   add16BitPCM(arrayBuffer, trackId = 'default') {
104 |     if (typeof trackId !== 'string') {
105 |       throw new Error(`trackId must be a string`);
106 |     } else if (this.interruptedTrackIds[trackId]) {
107 |       return;
108 |     }
109 |     if (!this.stream) {
110 |       this._start();
111 |     }
112 |     let buffer;
113 |     if (arrayBuffer instanceof Int16Array) {
114 |       buffer = arrayBuffer;
115 |     } else if (arrayBuffer instanceof ArrayBuffer) {
116 |       buffer = new Int16Array(arrayBuffer);
117 |     } else {
118 |       throw new Error(`argument must be Int16Array or ArrayBuffer`);
119 |     }
120 |     this.stream.port.postMessage({ event: 'write', buffer, trackId });
121 |     return buffer;
122 |   }
123 | 
124 |   /**
125 |    * Gets the offset (sample count) of the currently playing stream
126 |    * @param {boolean} [interrupt]
127 |    * @returns {{trackId: string|null, offset: number, currentTime: number}}
128 |    */
129 |   async getTrackSampleOffset(interrupt = false) {
130 |     if (!this.stream) {
131 |       return null;
132 |     }
133 |     const requestId = crypto.randomUUID();
134 |     this.stream.port.postMessage({
135 |       event: interrupt ? 'interrupt' : 'offset',
136 |       requestId,
137 |     });
138 |     let trackSampleOffset;
139 |     while (!trackSampleOffset) {
140 |       trackSampleOffset = this.trackSampleOffsets[requestId];
141 |       await new Promise((r) => setTimeout(() => r(), 1));
142 |     }
143 |     const { trackId } = trackSampleOffset;
144 |     if (interrupt && trackId) {
145 |       this.interruptedTrackIds[trackId] = true;
146 |     }
147 |     return trackSampleOffset;
148 |   }
149 | 
150 |   /**
151 |    * Strips the current stream and returns the sample offset of the audio
152 |    * @param {boolean} [interrupt]
153 |    * @returns {{trackId: string|null, offset: number, currentTime: number}}
154 |    */
155 |   async interrupt() {
156 |     return this.getTrackSampleOffset(true);
157 |   }
158 | }
159 | 
160 | globalThis.WavStreamPlayer = WavStreamPlayer;
161 | 


--------------------------------------------------------------------------------
/examples/openai-realtime-console/src/lib/wavtools/lib/worklets/audio_processor.js:
--------------------------------------------------------------------------------
  1 | const AudioProcessorWorklet = `
  2 | class AudioProcessor extends AudioWorkletProcessor {
  3 | 
  4 |   constructor() {
  5 |     super();
  6 |     this.port.onmessage = this.receive.bind(this);
  7 |     this.initialize();
  8 |   }
  9 | 
 10 |   initialize() {
 11 |     this.foundAudio = false;
 12 |     this.recording = false;
 13 |     this.chunks = [];
 14 |   }
 15 | 
 16 |   /**
 17 |    * Concatenates sampled chunks into channels
 18 |    * Format is chunk[Left[], Right[]]
 19 |    */
 20 |   readChannelData(chunks, channel = -1, maxChannels = 9) {
 21 |     let channelLimit;
 22 |     if (channel !== -1) {
 23 |       if (chunks[0] && chunks[0].length - 1 < channel) {
 24 |         throw new Error(
 25 |           \`Channel \${channel} out of range: max \${chunks[0].length}\`
 26 |         );
 27 |       }
 28 |       channelLimit = channel + 1;
 29 |     } else {
 30 |       channel = 0;
 31 |       channelLimit = Math.min(chunks[0] ? chunks[0].length : 1, maxChannels);
 32 |     }
 33 |     const channels = [];
 34 |     for (let n = channel; n < channelLimit; n++) {
 35 |       const length = chunks.reduce((sum, chunk) => {
 36 |         return sum + chunk[n].length;
 37 |       }, 0);
 38 |       const buffers = chunks.map((chunk) => chunk[n]);
 39 |       const result = new Float32Array(length);
 40 |       let offset = 0;
 41 |       for (let i = 0; i < buffers.length; i++) {
 42 |         result.set(buffers[i], offset);
 43 |         offset += buffers[i].length;
 44 |       }
 45 |       channels[n] = result;
 46 |     }
 47 |     return channels;
 48 |   }
 49 | 
 50 |   /**
 51 |    * Combines parallel audio data into correct format,
 52 |    * channels[Left[], Right[]] to float32Array[LRLRLRLR...]
 53 |    */
 54 |   formatAudioData(channels) {
 55 |     if (channels.length === 1) {
 56 |       // Simple case is only one channel
 57 |       const float32Array = channels[0].slice();
 58 |       const meanValues = channels[0].slice();
 59 |       return { float32Array, meanValues };
 60 |     } else {
 61 |       const float32Array = new Float32Array(
 62 |         channels[0].length * channels.length
 63 |       );
 64 |       const meanValues = new Float32Array(channels[0].length);
 65 |       for (let i = 0; i < channels[0].length; i++) {
 66 |         const offset = i * channels.length;
 67 |         let meanValue = 0;
 68 |         for (let n = 0; n < channels.length; n++) {
 69 |           float32Array[offset + n] = channels[n][i];
 70 |           meanValue += channels[n][i];
 71 |         }
 72 |         meanValues[i] = meanValue / channels.length;
 73 |       }
 74 |       return { float32Array, meanValues };
 75 |     }
 76 |   }
 77 | 
 78 |   /**
 79 |    * Converts 32-bit float data to 16-bit integers
 80 |    */
 81 |   floatTo16BitPCM(float32Array) {
 82 |     const buffer = new ArrayBuffer(float32Array.length * 2);
 83 |     const view = new DataView(buffer);
 84 |     let offset = 0;
 85 |     for (let i = 0; i < float32Array.length; i++, offset += 2) {
 86 |       let s = Math.max(-1, Math.min(1, float32Array[i]));
 87 |       view.setInt16(offset, s < 0 ? s * 0x8000 : s * 0x7fff, true);
 88 |     }
 89 |     return buffer;
 90 |   }
 91 | 
 92 |   /**
 93 |    * Retrieves the most recent amplitude values from the audio stream
 94 |    * @param {number} channel
 95 |    */
 96 |   getValues(channel = -1) {
 97 |     const channels = this.readChannelData(this.chunks, channel);
 98 |     const { meanValues } = this.formatAudioData(channels);
 99 |     return { meanValues, channels };
100 |   }
101 | 
102 |   /**
103 |    * Exports chunks as an audio/wav file
104 |    */
105 |   export() {
106 |     const channels = this.readChannelData(this.chunks);
107 |     const { float32Array, meanValues } = this.formatAudioData(channels);
108 |     const audioData = this.floatTo16BitPCM(float32Array);
109 |     return {
110 |       meanValues: meanValues,
111 |       audio: {
112 |         bitsPerSample: 16,
113 |         channels: channels,
114 |         data: audioData,
115 |       },
116 |     };
117 |   }
118 | 
119 |   receive(e) {
120 |     const { event, id } = e.data;
121 |     let receiptData = {};
122 |     switch (event) {
123 |       case 'start':
124 |         this.recording = true;
125 |         break;
126 |       case 'stop':
127 |         this.recording = false;
128 |         break;
129 |       case 'clear':
130 |         this.initialize();
131 |         break;
132 |       case 'export':
133 |         receiptData = this.export();
134 |         break;
135 |       case 'read':
136 |         receiptData = this.getValues();
137 |         break;
138 |       default:
139 |         break;
140 |     }
141 |     // Always send back receipt
142 |     this.port.postMessage({ event: 'receipt', id, data: receiptData });
143 |   }
144 | 
145 |   sendChunk(chunk) {
146 |     const channels = this.readChannelData([chunk]);
147 |     const { float32Array, meanValues } = this.formatAudioData(channels);
148 |     const rawAudioData = this.floatTo16BitPCM(float32Array);
149 |     const monoAudioData = this.floatTo16BitPCM(meanValues);
150 |     this.port.postMessage({
151 |       event: 'chunk',
152 |       data: {
153 |         mono: monoAudioData,
154 |         raw: rawAudioData,
155 |       },
156 |     });
157 |   }
158 | 
159 |   process(inputList, outputList, parameters) {
160 |     // Copy input to output (e.g. speakers)
161 |     // Note that this creates choppy sounds with Mac products
162 |     const sourceLimit = Math.min(inputList.length, outputList.length);
163 |     for (let inputNum = 0; inputNum < sourceLimit; inputNum++) {
164 |       const input = inputList[inputNum];
165 |       const output = outputList[inputNum];
166 |       const channelCount = Math.min(input.length, output.length);
167 |       for (let channelNum = 0; channelNum < channelCount; channelNum++) {
168 |         input[channelNum].forEach((sample, i) => {
169 |           output[channelNum][i] = sample;
170 |         });
171 |       }
172 |     }
173 |     const inputs = inputList[0];
174 |     // There's latency at the beginning of a stream before recording starts
175 |     // Make sure we actually receive audio data before we start storing chunks
176 |     let sliceIndex = 0;
177 |     if (!this.foundAudio) {
178 |       for (const channel of inputs) {
179 |         sliceIndex = 0; // reset for each channel
180 |         if (this.foundAudio) {
181 |           break;
182 |         }
183 |         if (channel) {
184 |           for (const value of channel) {
185 |             if (value !== 0) {
186 |               // find only one non-zero entry in any channel
187 |               this.foundAudio = true;
188 |               break;
189 |             } else {
190 |               sliceIndex++;
191 |             }
192 |           }
193 |         }
194 |       }
195 |     }
196 |     if (inputs && inputs[0] && this.foundAudio && this.recording) {
197 |       // We need to copy the TypedArray, because the \`process\`
198 |       // internals will reuse the same buffer to hold each input
199 |       const chunk = inputs.map((input) => input.slice(sliceIndex));
200 |       this.chunks.push(chunk);
201 |       this.sendChunk(chunk);
202 |     }
203 |     return true;
204 |   }
205 | }
206 | 
207 | registerProcessor('audio_processor', AudioProcessor);
208 | `;
209 | 
210 | const script = new Blob([AudioProcessorWorklet], {
211 |   type: 'application/javascript',
212 | });
213 | const src = URL.createObjectURL(script);
214 | export const AudioProcessorSrc = src;
215 | 


--------------------------------------------------------------------------------
/examples/openai-realtime-console/src/lib/wavtools/lib/worklets/stream_processor.js:
--------------------------------------------------------------------------------
 1 | export const StreamProcessorWorklet = `
 2 | class StreamProcessor extends AudioWorkletProcessor {
 3 |   constructor() {
 4 |     super();
 5 |     this.hasStarted = false;
 6 |     this.hasInterrupted = false;
 7 |     this.outputBuffers = [];
 8 |     this.bufferLength = 128;
 9 |     this.write = { buffer: new Float32Array(this.bufferLength), trackId: null };
10 |     this.writeOffset = 0;
11 |     this.trackSampleOffsets = {};
12 |     this.port.onmessage = (event) => {
13 |       if (event.data) {
14 |         const payload = event.data;
15 |         if (payload.event === 'write') {
16 |           const int16Array = payload.buffer;
17 |           const float32Array = new Float32Array(int16Array.length);
18 |           for (let i = 0; i < int16Array.length; i++) {
19 |             float32Array[i] = int16Array[i] / 0x8000; // Convert Int16 to Float32
20 |           }
21 |           this.writeData(float32Array, payload.trackId);
22 |         } else if (
23 |           payload.event === 'offset' ||
24 |           payload.event === 'interrupt'
25 |         ) {
26 |           const requestId = payload.requestId;
27 |           const trackId = this.write.trackId;
28 |           const offset = this.trackSampleOffsets[trackId] || 0;
29 |           this.port.postMessage({
30 |             event: 'offset',
31 |             requestId,
32 |             trackId,
33 |             offset,
34 |           });
35 |           if (payload.event === 'interrupt') {
36 |             this.hasInterrupted = true;
37 |           }
38 |         } else {
39 |           throw new Error(\`Unhandled event "\${payload.event}"\`);
40 |         }
41 |       }
42 |     };
43 |   }
44 | 
45 |   writeData(float32Array, trackId = null) {
46 |     let { buffer } = this.write;
47 |     let offset = this.writeOffset;
48 |     for (let i = 0; i < float32Array.length; i++) {
49 |       buffer[offset++] = float32Array[i];
50 |       if (offset >= buffer.length) {
51 |         this.outputBuffers.push(this.write);
52 |         this.write = { buffer: new Float32Array(this.bufferLength), trackId };
53 |         buffer = this.write.buffer;
54 |         offset = 0;
55 |       }
56 |     }
57 |     this.writeOffset = offset;
58 |     return true;
59 |   }
60 | 
61 |   process(inputs, outputs, parameters) {
62 |     const output = outputs[0];
63 |     const outputChannelData = output[0];
64 |     const outputBuffers = this.outputBuffers;
65 |     if (this.hasInterrupted) {
66 |       this.port.postMessage({ event: 'stop' });
67 |       return false;
68 |     } else if (outputBuffers.length) {
69 |       this.hasStarted = true;
70 |       const { buffer, trackId } = outputBuffers.shift();
71 |       for (let i = 0; i < outputChannelData.length; i++) {
72 |         outputChannelData[i] = buffer[i] || 0;
73 |       }
74 |       if (trackId) {
75 |         this.trackSampleOffsets[trackId] =
76 |           this.trackSampleOffsets[trackId] || 0;
77 |         this.trackSampleOffsets[trackId] += buffer.length;
78 |       }
79 |       return true;
80 |     } else if (this.hasStarted) {
81 |       this.port.postMessage({ event: 'stop' });
82 |       return false;
83 |     } else {
84 |       return true;
85 |     }
86 |   }
87 | }
88 | 
89 | registerProcessor('stream_processor', StreamProcessor);
90 | `;
91 | 
92 | const script = new Blob([StreamProcessorWorklet], {
93 |   type: 'application/javascript',
94 | });
95 | const src = URL.createObjectURL(script);
96 | export const StreamProcessorSrc = src;
97 | 


--------------------------------------------------------------------------------
/examples/openai-realtime-console/src/logo.svg:
--------------------------------------------------------------------------------
1 | <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 841.9 595.3"><g fill="#61DAFB"><path d="M666.3 296.5c0-32.5-40.7-63.3-103.1-82.4 14.4-63.6 8-114.2-20.2-130.4-6.5-3.8-14.1-5.6-22.4-5.6v22.3c4.6 0 8.3.9 11.4 2.6 13.6 7.8 19.5 37.5 14.9 75.7-1.1 9.4-2.9 19.3-5.1 29.4-19.6-4.8-41-8.5-63.5-10.9-13.5-18.5-27.5-35.3-41.6-50 32.6-30.3 63.2-46.9 84-46.9V78c-27.5 0-63.5 19.6-99.9 53.6-36.4-33.8-72.4-53.2-99.9-53.2v22.3c20.7 0 51.4 16.5 84 46.6-14 14.7-28 31.4-41.3 49.9-22.6 2.4-44 6.1-63.6 11-2.3-10-4-19.7-5.2-29-4.7-38.2 1.1-67.9 14.6-75.8 3-1.8 6.9-2.6 11.5-2.6V78.5c-8.4 0-16 1.8-22.6 5.6-28.1 16.2-34.4 66.7-19.9 130.1-62.2 19.2-102.7 49.9-102.7 82.3 0 32.5 40.7 63.3 103.1 82.4-14.4 63.6-8 114.2 20.2 130.4 6.5 3.8 14.1 5.6 22.5 5.6 27.5 0 63.5-19.6 99.9-53.6 36.4 33.8 72.4 53.2 99.9 53.2 8.4 0 16-1.8 22.6-5.6 28.1-16.2 34.4-66.7 19.9-130.1 62-19.1 102.5-49.9 102.5-82.3zm-130.2-66.7c-3.7 12.9-8.3 26.2-13.5 39.5-4.1-8-8.4-16-13.1-24-4.6-8-9.5-15.8-14.4-23.4 14.2 2.1 27.9 4.7 41 7.9zm-45.8 106.5c-7.8 13.5-15.8 26.3-24.1 38.2-14.9 1.3-30 2-45.2 2-15.1 0-30.2-.7-45-1.9-8.3-11.9-16.4-24.6-24.2-38-7.6-13.1-14.5-26.4-20.8-39.8 6.2-13.4 13.2-26.8 20.7-39.9 7.8-13.5 15.8-26.3 24.1-38.2 14.9-1.3 30-2 45.2-2 15.1 0 30.2.7 45 1.9 8.3 11.9 16.4 24.6 24.2 38 7.6 13.1 14.5 26.4 20.8 39.8-6.3 13.4-13.2 26.8-20.7 39.9zm32.3-13c5.4 13.4 10 26.8 13.8 39.8-13.1 3.2-26.9 5.9-41.2 8 4.9-7.7 9.8-15.6 14.4-23.7 4.6-8 8.9-16.1 13-24.1zM421.2 430c-9.3-9.6-18.6-20.3-27.8-32 9 .4 18.2.7 27.5.7 9.4 0 18.7-.2 27.8-.7-9 11.7-18.3 22.4-27.5 32zm-74.4-58.9c-14.2-2.1-27.9-4.7-41-7.9 3.7-12.9 8.3-26.2 13.5-39.5 4.1 8 8.4 16 13.1 24 4.7 8 9.5 15.8 14.4 23.4zM420.7 163c9.3 9.6 18.6 20.3 27.8 32-9-.4-18.2-.7-27.5-.7-9.4 0-18.7.2-27.8.7 9-11.7 18.3-22.4 27.5-32zm-74 58.9c-4.9 7.7-9.8 15.6-14.4 23.7-4.6 8-8.9 16-13 24-5.4-13.4-10-26.8-13.8-39.8 13.1-3.1 26.9-5.8 41.2-7.9zm-90.5 125.2c-35.4-15.1-58.3-34.9-58.3-50.6 0-15.7 22.9-35.6 58.3-50.6 8.6-3.7 18-7 27.7-10.1 5.7 19.6 13.2 40 22.5 60.9-9.2 20.8-16.6 41.1-22.2 60.6-9.9-3.1-19.3-6.5-28-10.2zM310 490c-13.6-7.8-19.5-37.5-14.9-75.7 1.1-9.4 2.9-19.3 5.1-29.4 19.6 4.8 41 8.5 63.5 10.9 13.5 18.5 27.5 35.3 41.6 50-32.6 30.3-63.2 46.9-84 46.9-4.5-.1-8.3-1-11.3-2.7zm237.2-76.2c4.7 38.2-1.1 67.9-14.6 75.8-3 1.8-6.9 2.6-11.5 2.6-20.7 0-51.4-16.5-84-46.6 14-14.7 28-31.4 41.3-49.9 22.6-2.4 44-6.1 63.6-11 2.3 10.1 4.1 19.8 5.2 29.1zm38.5-66.7c-8.6 3.7-18 7-27.7 10.1-5.7-19.6-13.2-40-22.5-60.9 9.2-20.8 16.6-41.1 22.2-60.6 9.9 3.1 19.3 6.5 28.1 10.2 35.4 15.1 58.3 34.9 58.3 50.6-.1 15.7-23 35.6-58.4 50.6zM320.8 78.4z"/><circle cx="420.9" cy="296.5" r="45.7"/><path d="M520.5 78.1z"/></g></svg>


--------------------------------------------------------------------------------
/examples/openai-realtime-console/src/pages/ConsolePage.scss:
--------------------------------------------------------------------------------
  1 | [data-component='ConsolePage'] {
  2 |   font-family: 'Roboto Mono', monospace;
  3 |   font-weight: 400;
  4 |   font-style: normal;
  5 |   font-size: 12px;
  6 |   height: 100%;
  7 |   display: flex;
  8 |   flex-direction: column;
  9 |   overflow: hidden;
 10 |   margin: 0px 8px;
 11 |   & > div {
 12 |     flex-shrink: 0;
 13 |   }
 14 | 
 15 |   .spacer {
 16 |     flex-grow: 1;
 17 |   }
 18 | 
 19 |   .content-top {
 20 |     display: flex;
 21 |     align-items: center;
 22 |     padding: 8px 16px;
 23 |     min-height: 40px;
 24 |     .content-title {
 25 |       flex-grow: 1;
 26 |       display: flex;
 27 |       align-items: center;
 28 |       gap: 12px;
 29 |       img {
 30 |         width: 24px;
 31 |         height: 24px;
 32 |       }
 33 |     }
 34 |   }
 35 | 
 36 |   .content-main {
 37 |     flex-grow: 1;
 38 |     flex-shrink: 1 !important;
 39 |     margin: 0px 16px;
 40 |     display: flex;
 41 |     overflow: hidden;
 42 |     margin-bottom: 24px;
 43 |     .content-block {
 44 |       position: relative;
 45 |       display: flex;
 46 |       flex-direction: column;
 47 |       max-height: 100%;
 48 |       width: 100%;
 49 |       .content-block-title {
 50 |         flex-shrink: 0;
 51 |         padding-top: 16px;
 52 |         padding-bottom: 4px;
 53 |         position: relative;
 54 |       }
 55 |       .content-block-body {
 56 |         color: #6e6e7f;
 57 |         position: relative;
 58 |         flex-grow: 1;
 59 |         padding: 8px 0px;
 60 |         padding-top: 4px;
 61 |         line-height: 1.2em;
 62 |         overflow: auto;
 63 |         &.full {
 64 |           padding: 0px;
 65 |         }
 66 |       }
 67 |     }
 68 |     .content-right {
 69 |       width: 300px;
 70 |       flex-shrink: 0;
 71 |       display: flex;
 72 |       flex-direction: column;
 73 |       margin-left: 24px;
 74 |       gap: 24px;
 75 |       & > div {
 76 |         border-radius: 16px;
 77 |         flex-grow: 1;
 78 |         flex-shrink: 0;
 79 |         overflow: hidden;
 80 |         position: relative;
 81 |         .content-block-title {
 82 |           position: absolute;
 83 |           display: flex;
 84 |           align-items: center;
 85 |           justify-content: center;
 86 |           line-height: 2em;
 87 |           top: 16px;
 88 |           left: 16px;
 89 |           padding: 4px 16px;
 90 |           background-color: #fff;
 91 |           border-radius: 1000px;
 92 |           min-height: 32px;
 93 |           z-index: 9999;
 94 |           text-align: center;
 95 |           white-space: pre;
 96 |           &.bottom {
 97 |             top: auto;
 98 |             bottom: 16px;
 99 |             right: 16px;
100 |           }
101 |         }
102 |       }
103 |       & > div.kv {
104 |         height: 250px;
105 |         max-height: 250px;
106 |         white-space: pre;
107 |         background-color: #ececf1;
108 |         .content-block-body {
109 |           padding: 16px;
110 |           margin-top: 56px;
111 |         }
112 |       }
113 |     }
114 |     .content-logs {
115 |       flex-grow: 1;
116 |       display: flex;
117 |       flex-direction: column;
118 |       overflow: hidden;
119 |       & > div {
120 |         flex-grow: 1;
121 |       }
122 |       & > .content-actions {
123 |         flex-grow: 0;
124 |         flex-shrink: 0;
125 |         display: flex;
126 |         align-items: center;
127 |         justify-content: center;
128 |         gap: 16px;
129 |       }
130 |       & > div.events {
131 |         overflow: hidden;
132 |       }
133 |       .events {
134 |         border-top: 1px solid #e7e7e7;
135 |       }
136 |       .conversation {
137 |         display: flex;
138 |         flex-shrink: 0;
139 |         width: 100%;
140 |         overflow: hidden;
141 |         height: 200px;
142 |         min-height: 0;
143 |         max-height: 200px;
144 |         border-top: 1px solid #e7e7e7;
145 |       }
146 |     }
147 |   }
148 | 
149 |   .conversation-item {
150 |     position: relative;
151 |     display: flex;
152 |     gap: 16px;
153 |     margin-bottom: 16px;
154 |     &:not(:hover) .close {
155 |       display: none;
156 |     }
157 |     .close {
158 |       position: absolute;
159 |       top: 0px;
160 |       right: -20px;
161 |       background: #aaa;
162 |       color: #fff;
163 |       display: flex;
164 |       border-radius: 16px;
165 |       padding: 2px;
166 |       cursor: pointer;
167 |       &:hover {
168 |         background: #696969;
169 |       }
170 |       svg {
171 |         stroke-width: 3;
172 |         width: 12px;
173 |         height: 12px;
174 |       }
175 |     }
176 |     .speaker {
177 |       position: relative;
178 |       text-align: left;
179 |       gap: 16px;
180 |       width: 80px;
181 |       flex-shrink: 0;
182 |       margin-right: 16px;
183 |       &.user {
184 |         color: #0099ff;
185 |       }
186 |       &.assistant {
187 |         color: #009900;
188 |       }
189 |     }
190 |     .speaker-content {
191 |       color: #18181b;
192 |       overflow: hidden;
193 |       word-wrap: break-word;
194 |     }
195 |   }
196 | 
197 |   .event {
198 |     border-radius: 3px;
199 |     white-space: pre;
200 |     display: flex;
201 |     padding: 0px;
202 |     gap: 16px;
203 |     .event-timestamp {
204 |       text-align: left;
205 |       gap: 8px;
206 |       padding: 4px 0px;
207 |       width: 80px;
208 |       flex-shrink: 0;
209 |       margin-right: 16px;
210 |     }
211 |     .event-details {
212 |       display: flex;
213 |       flex-direction: column;
214 |       color: #18181b;
215 |       gap: 8px;
216 |       .event-summary {
217 |         padding: 4px 8px;
218 |         margin: 0px -8px;
219 |         &:hover {
220 |           border-radius: 8px;
221 |           background-color: #f0f0f0;
222 |         }
223 |         cursor: pointer;
224 |         display: flex;
225 |         gap: 8px;
226 |         align-items: center;
227 |         .event-source {
228 |           flex-shrink: 0;
229 |           display: flex;
230 |           align-items: center;
231 |           gap: 8px;
232 |           &.client {
233 |             color: #0099ff;
234 |           }
235 |           &.server {
236 |             color: #009900;
237 |           }
238 |           &.error {
239 |             color: #990000;
240 |           }
241 |           svg {
242 |             stroke-width: 3;
243 |             width: 12px;
244 |             height: 12px;
245 |           }
246 |         }
247 |       }
248 |     }
249 |   }
250 | 
251 |   .visualization {
252 |     position: absolute;
253 |     display: flex;
254 |     bottom: 4px;
255 |     right: 8px;
256 |     padding: 4px;
257 |     border-radius: 16px;
258 |     z-index: 10;
259 |     gap: 2px;
260 |     .visualization-entry {
261 |       position: relative;
262 |       display: flex;
263 |       align-items: center;
264 |       height: 40px;
265 |       width: 100px;
266 |       gap: 4px;
267 |       &.client {
268 |         color: #0099ff;
269 |       }
270 |       &.server {
271 |         color: #009900;
272 |       }
273 |       canvas {
274 |         width: 100%;
275 |         height: 100%;
276 |         color: currentColor;
277 |       }
278 |     }
279 |   }
280 | }
281 | 


--------------------------------------------------------------------------------
/examples/openai-realtime-console/src/react-app-env.d.ts:
--------------------------------------------------------------------------------
1 | /// <reference types="react-scripts" />
2 | 


--------------------------------------------------------------------------------
/examples/openai-realtime-console/src/reportWebVitals.ts:
--------------------------------------------------------------------------------
 1 | import { ReportHandler } from 'web-vitals';
 2 | 
 3 | const reportWebVitals = (onPerfEntry?: ReportHandler) => {
 4 |   if (onPerfEntry && onPerfEntry instanceof Function) {
 5 |     import('web-vitals').then(({ getCLS, getFID, getFCP, getLCP, getTTFB }) => {
 6 |       getCLS(onPerfEntry);
 7 |       getFID(onPerfEntry);
 8 |       getFCP(onPerfEntry);
 9 |       getLCP(onPerfEntry);
10 |       getTTFB(onPerfEntry);
11 |     });
12 |   }
13 | };
14 | 
15 | export default reportWebVitals;
16 | 


--------------------------------------------------------------------------------
/examples/openai-realtime-console/src/setupTests.ts:
--------------------------------------------------------------------------------
1 | // jest-dom adds custom jest matchers for asserting on DOM nodes.
2 | // allows you to do things like:
3 | // expect(element).toHaveTextContent(/react/i)
4 | // learn more: https://github.com/testing-library/jest-dom
5 | import '@testing-library/jest-dom';
6 | 


--------------------------------------------------------------------------------
/examples/openai-realtime-console/src/utils/conversation_config.js:
--------------------------------------------------------------------------------
 1 | export const instructions = `System settings:
 2 | Tool use: enabled.
 3 | 
 4 | Instructions:
 5 | - You are an artificial intelligence agent responsible for helping test realtime voice capabilities
 6 | - Please make sure to respond with a helpful voice via audio
 7 | - Be kind, helpful, and curteous
 8 | - It is okay to ask the user questions
 9 | - Use tools and functions you have available liberally, it is part of the training apparatus
10 | - Be open to exploration and conversation
11 | - Remember: this is just for fun and testing!
12 | 
13 | Personality:
14 | - Be upbeat and genuine
15 | - Try speaking quickly as if excited
16 | `;
17 | 


--------------------------------------------------------------------------------
/examples/openai-realtime-console/src/utils/wav_renderer.ts:
--------------------------------------------------------------------------------
  1 | const dataMap = new WeakMap();
  2 | 
  3 | /**
  4 |  * Normalizes a Float32Array to Array(m): We use this to draw amplitudes on a graph
  5 |  * If we're rendering the same audio data, then we'll often be using
  6 |  * the same (data, m, downsamplePeaks) triplets so we give option to memoize
  7 |  */
  8 | const normalizeArray = (
  9 |   data: Float32Array,
 10 |   m: number,
 11 |   downsamplePeaks: boolean = false,
 12 |   memoize: boolean = false
 13 | ) => {
 14 |   let cache, mKey, dKey;
 15 |   if (memoize) {
 16 |     mKey = m.toString();
 17 |     dKey = downsamplePeaks.toString();
 18 |     cache = dataMap.has(data) ? dataMap.get(data) : {};
 19 |     dataMap.set(data, cache);
 20 |     cache[mKey] = cache[mKey] || {};
 21 |     if (cache[mKey][dKey]) {
 22 |       return cache[mKey][dKey];
 23 |     }
 24 |   }
 25 |   const n = data.length;
 26 |   const result = new Array(m);
 27 |   if (m <= n) {
 28 |     // Downsampling
 29 |     result.fill(0);
 30 |     const count = new Array(m).fill(0);
 31 |     for (let i = 0; i < n; i++) {
 32 |       const index = Math.floor(i * (m / n));
 33 |       if (downsamplePeaks) {
 34 |         // take highest result in the set
 35 |         result[index] = Math.max(result[index], Math.abs(data[i]));
 36 |       } else {
 37 |         result[index] += Math.abs(data[i]);
 38 |       }
 39 |       count[index]++;
 40 |     }
 41 |     if (!downsamplePeaks) {
 42 |       for (let i = 0; i < result.length; i++) {
 43 |         result[i] = result[i] / count[i];
 44 |       }
 45 |     }
 46 |   } else {
 47 |     for (let i = 0; i < m; i++) {
 48 |       const index = (i * (n - 1)) / (m - 1);
 49 |       const low = Math.floor(index);
 50 |       const high = Math.ceil(index);
 51 |       const t = index - low;
 52 |       if (high >= n) {
 53 |         result[i] = data[n - 1];
 54 |       } else {
 55 |         result[i] = data[low] * (1 - t) + data[high] * t;
 56 |       }
 57 |     }
 58 |   }
 59 |   if (memoize) {
 60 |     cache[mKey as string][dKey as string] = result;
 61 |   }
 62 |   return result;
 63 | };
 64 | 
 65 | export const WavRenderer = {
 66 |   /**
 67 |    * Renders a point-in-time snapshot of an audio sample, usually frequency values
 68 |    * @param canvas
 69 |    * @param ctx
 70 |    * @param data
 71 |    * @param color
 72 |    * @param pointCount number of bars to render
 73 |    * @param barWidth width of bars in px
 74 |    * @param barSpacing spacing between bars in px
 75 |    * @param center vertically center the bars
 76 |    */
 77 |   drawBars: (
 78 |     canvas: HTMLCanvasElement,
 79 |     ctx: CanvasRenderingContext2D,
 80 |     data: Float32Array,
 81 |     color: string,
 82 |     pointCount: number = 0,
 83 |     barWidth: number = 0,
 84 |     barSpacing: number = 0,
 85 |     center: boolean = false
 86 |   ) => {
 87 |     pointCount = Math.floor(
 88 |       Math.min(
 89 |         pointCount,
 90 |         (canvas.width - barSpacing) / (Math.max(barWidth, 1) + barSpacing)
 91 |       )
 92 |     );
 93 |     if (!pointCount) {
 94 |       pointCount = Math.floor(
 95 |         (canvas.width - barSpacing) / (Math.max(barWidth, 1) + barSpacing)
 96 |       );
 97 |     }
 98 |     if (!barWidth) {
 99 |       barWidth = (canvas.width - barSpacing) / pointCount - barSpacing;
100 |     }
101 |     const points = normalizeArray(data, pointCount, true);
102 |     for (let i = 0; i < pointCount; i++) {
103 |       const amplitude = Math.abs(points[i]);
104 |       const height = Math.max(1, amplitude * canvas.height);
105 |       const x = barSpacing + i * (barWidth + barSpacing);
106 |       const y = center ? (canvas.height - height) / 2 : canvas.height - height;
107 |       ctx.fillStyle = color;
108 |       ctx.fillRect(x, y, barWidth, height);
109 |     }
110 |   },
111 | };
112 | 


--------------------------------------------------------------------------------
/examples/openai-realtime-console/tsconfig.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "compilerOptions": {
 3 |     "target": "ES2020",
 4 |     "lib": ["dom", "dom.iterable", "esnext", "ES2020"],
 5 |     "allowJs": true,
 6 |     "skipLibCheck": true,
 7 |     "esModuleInterop": true,
 8 |     "allowSyntheticDefaultImports": true,
 9 |     "strict": true,
10 |     "forceConsistentCasingInFileNames": true,
11 |     "noFallthroughCasesInSwitch": true,
12 |     "module": "esnext",
13 |     "moduleResolution": "node",
14 |     "resolveJsonModule": true,
15 |     "isolatedModules": true,
16 |     "noEmit": true,
17 |     "jsx": "react-jsx"
18 |   },
19 |   "include": ["src", "src/lib"]
20 | }
21 | 


--------------------------------------------------------------------------------
/fixtures/toronto.mp3:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/transitive-bullshit/openai-realtime-api/89d37b5f461fbcb0300241360749abe85ca45d01/fixtures/toronto.mp3


--------------------------------------------------------------------------------
/license:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2024 Travis Fischer
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "openai-realtime-api",
 3 |   "version": "1.0.7",
 4 |   "description": "TypeScript client OpenAI's realtime voice API.",
 5 |   "author": "Travis Fischer <travis@transitivebullsh.it>",
 6 |   "license": "MIT",
 7 |   "repository": {
 8 |     "type": "git",
 9 |     "url": "git+https://github.com/transitive-bullshit/openai-realtime-api.git"
10 |   },
11 |   "packageManager": "pnpm@9.12.2",
12 |   "engines": {
13 |     "node": ">=18"
14 |   },
15 |   "type": "module",
16 |   "main": "./dist/index.js",
17 |   "source": "./src/index.ts",
18 |   "types": "./dist/index.d.ts",
19 |   "sideEffects": false,
20 |   "exports": {
21 |     ".": {
22 |       "types": "./dist/index.d.ts",
23 |       "default": "./dist/index.js"
24 |     },
25 |     "./node": {
26 |       "types": "./dist/node/index.d.ts",
27 |       "default": "./dist/node/index.js"
28 |     }
29 |   },
30 |   "files": [
31 |     "dist"
32 |   ],
33 |   "scripts": {
34 |     "build": "tsup",
35 |     "dev": "tsup --watch",
36 |     "pretest": "run-s build",
37 |     "test": "run-s test:*",
38 |     "test:format": "prettier --check \"**/*.{js,ts,tsx}\"",
39 |     "test:lint": "eslint .",
40 |     "test:typecheck": "tsc --noEmit",
41 |     "test-unit": "vitest run",
42 |     "preinstall": "npx only-allow pnpm"
43 |   },
44 |   "dependencies": {
45 |     "nanoid": "^5.0.8",
46 |     "ws": "^8.18.0"
47 |   },
48 |   "devDependencies": {
49 |     "@fisch0920/eslint-config": "^1.4.0",
50 |     "@total-typescript/ts-reset": "^0.6.1",
51 |     "@types/node": "^22.8.6",
52 |     "@types/ws": "^8.5.12",
53 |     "audio-decode": "^2.2.2",
54 |     "dotenv": "^16.4.5",
55 |     "eslint": "^8.57.1",
56 |     "npm-run-all2": "^7.0.1",
57 |     "only-allow": "^1.2.1",
58 |     "prettier": "^3.3.3",
59 |     "tsup": "^8.3.5",
60 |     "typescript": "^5.6.3",
61 |     "vitest": "2.1.4"
62 |   }
63 | }
64 | 


--------------------------------------------------------------------------------
/pnpm-workspace.yaml:
--------------------------------------------------------------------------------
1 | packages:
2 |   - 'examples/*'
3 | 


--------------------------------------------------------------------------------
/readme.md:
--------------------------------------------------------------------------------
  1 | # openai-realtime-api <!-- omit from toc -->
  2 | 
  3 | > TypeScript client for OpenAI's realtime voice API.
  4 | 
  5 | <p>
  6 |   <a href="https://github.com/transitive-bullshit/openai-realtime-api/actions/workflows/main.yml"><img alt="Build Status" src="https://github.com/transitive-bullshit/openai-realtime-api/actions/workflows/main.yml/badge.svg" /></a>
  7 |   <a href="https://www.npmjs.com/package/openai-realtime-api"><img alt="NPM" src="https://img.shields.io/npm/v/openai-realtime-api.svg" /></a>
  8 |   <a href="https://github.com/transitive-bullshit/openai-realtime-api/blob/main/license"><img alt="MIT License" src="https://img.shields.io/badge/license-MIT-blue" /></a>
  9 |   <a href="https://prettier.io"><img alt="Prettier Code Formatting" src="https://img.shields.io/badge/code_style-prettier-brightgreen.svg" /></a>
 10 | </p>
 11 | 
 12 | - [Features](#features)
 13 | - [Install](#install)
 14 | - [Usage](#usage)
 15 |   - [Server Usage](#server-usage)
 16 |   - [Browser Usage](#browser-usage)
 17 |   - [Relay Server](#relay-server)
 18 | - [Examples](#examples)
 19 |   - [Node.js Basic](#nodejs-basic)
 20 |   - [Node.js Audio](#nodejs-audio)
 21 |   - [Node.js Conversation](#nodejs-conversation)
 22 |   - [OpenAI Realtime Console](#openai-realtime-console)
 23 | - [TODO](#todo)
 24 | - [License](#license)
 25 | 
 26 | ## Features
 27 | 
 28 | - **Strongly typed** TS fork of [openai/openai-realtime-api-beta](https://github.com/openai/openai-realtime-api-beta)
 29 | - [All events](./src/events.ts) and handlers are 100% typed
 30 | - **Drop-in replacement for OpenAI's JS version**
 31 | - Fixes many small bugs and inconsistencies
 32 |   - ([#3](https://github.com/openai/openai-realtime-api-beta/issues/3), [#11](https://github.com/openai/openai-realtime-api-beta/pull/11), [#12](https://github.com/openai/openai-realtime-api-beta/pull/12), [#14](https://github.com/openai/openai-realtime-api-beta/issues/14), [#17](https://github.com/openai/openai-realtime-api-beta/pull/17), [#29](https://github.com/openai/openai-realtime-api-beta/pull/29), [#34](https://github.com/openai/openai-realtime-api-beta/pull/34), [#35](https://github.com/openai/openai-realtime-api-beta/pull/35), [#37](https://github.com/openai/openai-realtime-api-beta/pull/37), [#43](https://github.com/openai/openai-realtime-api-beta/pull/43), [#44](https://github.com/openai/openai-realtime-api-beta/pull/44), and likely others)
 33 | - Published to NPM
 34 | - Supports Node.js, browser, deno, bun, CF workers, etc
 35 | - Includes Node.js CLI examples for easy local testing
 36 | - Includes a simple relay server
 37 | - Includes the [OpenAI Realtime Console demo](#openai-realtime-console) using this package 🔥
 38 | 
 39 | ## Install
 40 | 
 41 | ```sh
 42 | npm install openai-realtime-api
 43 | ```
 44 | 
 45 | This package is [ESM-only](https://gist.github.com/sindresorhus/a39789f98801d908bbc7ff3ecc99d99c). It requires `Node.js >= 18`, a browser environment, or an equivalent JS runtime (Deno, Bun, CF workers, etc).
 46 | 
 47 | ## Usage
 48 | 
 49 | > [!IMPORTANT]
 50 | > All usage and events are 100% compatible with the [OpenAI JS version](https://github.com/openai/openai-realtime-api-beta). The main difference aside from bug fixes is that **all events are fully-typed**.
 51 | 
 52 | ```ts
 53 | import { RealtimeClient } from 'openai-realtime-api'
 54 | 
 55 | // Create a new client; all params are optional; apiKey defaults to the
 56 | // `OPENAI_API_KEY` environment variable (when using Node.js).
 57 | const client = new RealtimeClient({
 58 |   sessionConfig: {
 59 |     instructions: 'You are a great, upbeat friend.',
 60 |     voice: 'alloy'
 61 |   }
 62 | })
 63 | 
 64 | // Can change session config ahead of connecting.
 65 | client.updateSession({
 66 |   turn_detection: null,
 67 |   input_audio_transcription: { model: 'whisper-1' }
 68 | })
 69 | 
 70 | // Example of custom event handling
 71 | client.on('conversation.updated', (event) => {
 72 |   // All events are fully-typed based on the event name.
 73 |   // In this case, `event` will have the type `RealtimeCustomEvents.ConversationUpdatedEvent`
 74 |   const { item, delta } = event
 75 | 
 76 |   // Access the full list of conversation items.
 77 |   const items = client.conversation.getItems()
 78 | })
 79 | 
 80 | // Connect to the Realtime API.
 81 | await client.connect()
 82 | 
 83 | // Send a text message and trigger a response generation.
 84 | client.sendUserMessageContent([{ type: 'input_text', text: 'How are you?' }])
 85 | 
 86 | // Wait for a completed response from the model.
 87 | // (`event` will be of type `RealtimeServerEvents.ResponseDoneEvent`)
 88 | const event = await client.realtime.waitForNext('response.done')
 89 | ```
 90 | 
 91 | See [examples](#examples) for more complete demos.
 92 | 
 93 | See also the official [OpenAI Realtime API Guide](https://platform.openai.com/docs/guides/realtime) and [API Reference](https://platform.openai.com/docs/api-reference/realtime).
 94 | 
 95 | For more info on usage, tools, and custom events, see [OpenAI's readme](https://github.com/openai/openai-realtime-api-beta). Note that this package is 100% compatible with OpenAI's beta package in terms of both official and unofficial events. The only difference is that all events are typed.
 96 | 
 97 | ### Server Usage
 98 | 
 99 | `RealtimeClient` takes in an optional `apiKey` which defaults to `process.env.OPENAI_API_KEY`.
100 | 
101 | ### Browser Usage
102 | 
103 | `RealtimeClient` takes in an optional `url` which can be pointed at a relay server.
104 | 
105 | ```ts
106 | import { RealtimeClient } from 'openai-realtime-api'
107 | 
108 | // Create a browser client which points to a relay server.
109 | const client = new RealtimeClient({ url: RELAY_SERVER_URL })
110 | ```
111 | 
112 | Alternatively, you can use `apiKey` with `RealtimeClient` in the browser, but you also have to pass `dangerouslyAllowAPIKeyInBrowser: true`.
113 | 
114 | ```ts
115 | import { RealtimeClient } from 'openai-realtime-api'
116 | 
117 | // Create a browser client which connects directly to the OpenAI realtime API
118 | // with an unsafe, client-side API key.
119 | const client = new RealtimeClient({
120 |   apiKey: process.env.OPENAI_API_KEY,
121 |   dangerouslyAllowAPIKeyInBrowser: true
122 | })
123 | ```
124 | 
125 | > [!CAUTION]
126 | > We strongly recommend against including your API key in any client (mobile or browser). It can be useful for local testing, but for production, you should be using a relay server.
127 | 
128 | ### Relay Server
129 | 
130 | ```ts
131 | import { RealtimeClient } from 'openai-realtime-api'
132 | import { RealtimeRelay } from 'openai-realtime-api/node'
133 | 
134 | // Setting `relay: true` disables tool calls and directly modifying the session,
135 | // since that will be the responsibility of the upstream client.
136 | const client = new RealtimeClient({ relay: true })
137 | const relay = new RealtimeRelay({ client })
138 | 
139 | relay.listen(8081)
140 | ```
141 | 
142 | Note that `RealtimeRelay` uses a different import path because it contains Node.js-specific code.
143 | 
144 | A full example is included in [examples/node/relay-server.ts](./examples/node/relay-server.ts).
145 | 
146 | ## Examples
147 | 
148 | To run the included examples (requires `Node.js >= 18`):
149 | 
150 | 1. Clone this repo
151 | 2. Run `pnpm install`
152 | 3. Setup `.env` with your `OPENAI_API_KEY`
153 | 
154 | You can set `debug: true` in the `RealtimeClient` constructor of these examples to print out the full event log.
155 | 
156 | ### Node.js Basic
157 | 
158 | Simple Node.js demo using the `RealtimeClient` which sends a text message and waits for a complete response.
159 | 
160 | - [examples/node/basic.ts](./examples/node/basic.ts)
161 | - Run `npx tsx examples/node/basic.ts`
162 | 
163 | ### Node.js Audio
164 | 
165 | Simple Node.js demo using the `RealtimeClient` which sends a short audio message and waits for a complete response.
166 | 
167 | - [examples/node/audio.ts](./examples/node/audio.ts)
168 | - Run `npx tsx examples/node/audio.ts`
169 | 
170 | ### Node.js Conversation
171 | 
172 | Simple Node.js demo using the `RealtimeClient` with a microphone and speaker to simulate a full, back & forth conversation from the terminal.
173 | 
174 | - [examples/node/convo.ts](./examples/node/convo.ts)
175 | - This demo uses the [mic](https://github.com/ashishbajaj99/mic) and [speaker](https://github.com/TooTallNate/node-speaker) npm packages
176 | - `mic` requires [sox](https://sourceforge.net/projects/sox/); on macOS, you can run `brew install sox`
177 | - `npx tsx examples/node/convo.ts`
178 | 
179 | ### OpenAI Realtime Console
180 | 
181 | This example has been imported from https://github.com/openai/openai-realtime-console ([at commit 6ea4dba](https://github.com/openai/openai-realtime-console/tree/6ea4dba795fee868c60ea9e8e7eba7469974b3e9)). The only change has been to replace `@openai/realtime-api-beta` with `openai-realtime-api` and to fix a few types.
182 | 
183 | <img src="/examples/openai-realtime-console/readme/realtime-console-demo.png" width="800" />
184 | 
185 | To run the realtime console example:
186 | 
187 | ```sh
188 | pnpm install
189 | cd examples/openai-realtime-console
190 | pnpm start
191 | ```
192 | 
193 | ## TODO
194 | 
195 | - add an example using tools
196 | - add an example next.js app
197 | - improve readme docs
198 | 
199 | ## License
200 | 
201 | MIT © [Travis Fischer](https://x.com/transitive_bs)
202 | 
203 | If you found this project interesting, [consider following me on Twitter](https://x.com/transitive_bs).
204 | 


--------------------------------------------------------------------------------
/src/api.ts:
--------------------------------------------------------------------------------
  1 | import type { ClientRequest } from 'node:http'
  2 | 
  3 | import type { WebSocket as WS } from 'ws'
  4 | 
  5 | import type {
  6 |   Event,
  7 |   RealtimeClientEvents,
  8 |   RealtimeServerEvents
  9 | } from './events'
 10 | import { RealtimeEventHandler } from './event-handler'
 11 | import {
 12 |   generateId,
 13 |   getEnv,
 14 |   hasNativeWebSocket,
 15 |   isBrowser,
 16 |   trimDebugEvent
 17 | } from './utils'
 18 | 
 19 | /**
 20 |  * The RealtimeAPI class handles low-level communication with the OpenAI
 21 |  * Realtime API via WebSockets.
 22 |  */
 23 | export class RealtimeAPI extends RealtimeEventHandler<
 24 |   | RealtimeClientEvents.EventType
 25 |   | RealtimeServerEvents.EventType
 26 |   | 'close'
 27 |   | `client.${RealtimeClientEvents.EventType}`
 28 |   | `server.${RealtimeServerEvents.EventType}`
 29 |   | 'client.*'
 30 |   | 'server.*',
 31 |   Event,
 32 |   RealtimeClientEvents.EventMap &
 33 |     RealtimeServerEvents.EventMap &
 34 |     RealtimeClientEvents.PrefixedEventMap &
 35 |     RealtimeServerEvents.PrefixedEventMap & {
 36 |       'client.*': RealtimeClientEvents.ClientEvent
 37 |     } & {
 38 |       'server.*': RealtimeServerEvents.ServerEvent
 39 |     } & {
 40 |       close: { type: 'close'; error: boolean }
 41 |     }
 42 | > {
 43 |   readonly model: string
 44 |   readonly url: string
 45 |   readonly apiKey?: string
 46 |   readonly debug: boolean
 47 |   ws?: WebSocket | WS
 48 | 
 49 |   /**
 50 |    * Creates a new RealtimeAPI instance.
 51 |    */
 52 |   constructor({
 53 |     model = 'gpt-4o-realtime-preview-2024-10-01',
 54 |     url = 'wss://api.openai.com/v1/realtime',
 55 |     apiKey = getEnv('OPENAI_API_KEY'),
 56 |     dangerouslyAllowAPIKeyInBrowser,
 57 |     debug
 58 |   }: {
 59 |     model?: string
 60 |     url?: string
 61 |     apiKey?: string
 62 |     dangerouslyAllowAPIKeyInBrowser?: boolean
 63 |     debug?: boolean
 64 |   } = {}) {
 65 |     super()
 66 | 
 67 |     this.model = model
 68 |     this.url = url
 69 |     this.apiKey = apiKey
 70 |     this.debug = !!debug
 71 | 
 72 |     if (isBrowser && this.apiKey) {
 73 |       if (!dangerouslyAllowAPIKeyInBrowser) {
 74 |         throw new Error(
 75 |           'Unable to provide API key in the browser without "dangerouslyAllowAPIKeyInBrowser" set to true'
 76 |         )
 77 |       }
 78 |     }
 79 |   }
 80 | 
 81 |   /**
 82 |    * Whether or not the WebSocket is connected.
 83 |    */
 84 |   get isConnected(): boolean {
 85 |     return !!this.ws
 86 |   }
 87 | 
 88 |   /**
 89 |    * Connects to Realtime API WebSocket Server.
 90 |    */
 91 |   async connect() {
 92 |     if (this.isConnected) {
 93 |       return
 94 |     }
 95 | 
 96 |     if (!this.apiKey && !isBrowser) {
 97 |       console.warn(`No apiKey provided for connection to "${this.url}"`)
 98 |     }
 99 | 
100 |     const url = new URL(this.url)
101 |     url.searchParams.set('model', this.model)
102 | 
103 |     if (hasNativeWebSocket()) {
104 |       if (isBrowser && this.apiKey) {
105 |         console.warn(
106 |           'Warning: Connecting using API key in the browser, this is not recommended'
107 |         )
108 |       }
109 | 
110 |       const ws = new WebSocket(
111 |         url.toString(),
112 |         [
113 |           'realtime',
114 |           this.apiKey ? `openai-insecure-api-key.${this.apiKey}` : undefined,
115 |           'openai-beta.realtime-v1'
116 |         ].filter(Boolean)
117 |       )
118 | 
119 |       ws.addEventListener('message', (event) => {
120 |         const message: any = JSON.parse(event.data)
121 |         this.receive(message.type, message)
122 |       })
123 | 
124 |       return new Promise((resolve, reject) => {
125 |         const connectionErrorHandler = () => {
126 |           this.disconnect(ws)
127 |           reject(new Error(`Could not connect to "${this.url}"`))
128 |         }
129 | 
130 |         ws.addEventListener('error', connectionErrorHandler)
131 |         ws.addEventListener('open', () => {
132 |           this._log(`Connected to "${this.url}"`)
133 | 
134 |           ws.removeEventListener('error', connectionErrorHandler)
135 |           ws.addEventListener('error', () => {
136 |             this.disconnect(ws)
137 |             this._log(`Error, disconnected from "${this.url}"`)
138 |             this.dispatch('close', { type: 'close', error: true })
139 |           })
140 | 
141 |           ws.addEventListener('close', () => {
142 |             this.disconnect(ws)
143 |             this._log(`Disconnected from "${this.url}"`)
144 |             this.dispatch('close', { type: 'close', error: false })
145 |           })
146 | 
147 |           this.ws = ws
148 |           resolve(true)
149 |         })
150 |       })
151 |     } else {
152 |       // Node.js
153 |       const wsModule = await import('ws')
154 |       const ws: WS = new wsModule.WebSocket(url.toString(), [], {
155 |         // Add auth headers
156 |         finishRequest: (request: ClientRequest) => {
157 |           request.setHeader('OpenAI-Beta', 'realtime=v1')
158 | 
159 |           if (this.apiKey) {
160 |             request.setHeader('Authorization', `Bearer ${this.apiKey}`)
161 | 
162 |             // Needed for Azure OpenAI
163 |             request.setHeader('api-key', this.apiKey)
164 |           }
165 | 
166 |           request.end()
167 |         }
168 |         // TODO: this `any` is a workaround for `@types/ws` being out-of-date.
169 |       } as any)
170 | 
171 |       ws.on('message', (data) => {
172 |         const message: any = JSON.parse(data.toString())
173 |         this.receive(message.type, message)
174 |       })
175 | 
176 |       return new Promise<void>((resolve, reject) => {
177 |         const connectionErrorHandler = () => {
178 |           this.disconnect(ws)
179 |           reject(new Error(`Could not connect to "${this.url}"`))
180 |         }
181 | 
182 |         ws.on('error', connectionErrorHandler)
183 |         ws.on('open', () => {
184 |           this._log(`Connected to "${this.url}"`)
185 | 
186 |           ws.removeListener('error', connectionErrorHandler)
187 |           ws.on('error', () => {
188 |             this._log(`Error, disconnected from "${this.url}"`)
189 |             this.disconnect(ws)
190 |             this.dispatch('close', { type: 'close', error: true })
191 |           })
192 | 
193 |           ws.on('close', () => {
194 |             this.disconnect(ws)
195 |             this._log(`Disconnected from "${this.url}"`)
196 |             this.dispatch('close', { type: 'close', error: false })
197 |           })
198 | 
199 |           this.ws = ws
200 |           resolve()
201 |         })
202 |       })
203 |     }
204 |   }
205 | 
206 |   /**
207 |    * Disconnects from the Realtime API server.
208 |    */
209 |   disconnect(ws?: WebSocket | WS) {
210 |     if (this.ws && (!ws || this.ws === ws)) {
211 |       this.ws?.close()
212 |       this.ws = undefined
213 |     }
214 |   }
215 | 
216 |   /**
217 |    * Receives an event from WebSocket and dispatches related events.
218 |    */
219 |   receive<
220 |     E extends RealtimeServerEvents.EventType,
221 |     D extends
222 |       RealtimeServerEvents.ServerEvent = RealtimeServerEvents.EventMap[E] extends RealtimeServerEvents.ServerEvent
223 |       ? RealtimeServerEvents.EventMap[E]
224 |       : RealtimeServerEvents.ServerEvent
225 |   >(eventName: E, event: D) {
226 |     this._log('received:', eventName, event)
227 |     this.dispatch(eventName, event)
228 |     this.dispatch(`server.${eventName}`, event)
229 |     this.dispatch('server.*', event)
230 |   }
231 | 
232 |   /**
233 |    * Sends an event to the underlying WebSocket and dispatches related events.
234 |    */
235 |   send<
236 |     E extends RealtimeClientEvents.EventType,
237 |     D extends
238 |       RealtimeClientEvents.ClientEvent = RealtimeClientEvents.EventMap[E] extends RealtimeClientEvents.ClientEvent
239 |       ? RealtimeClientEvents.EventMap[E]
240 |       : RealtimeClientEvents.ClientEvent
241 |   >(eventName: E, data: Omit<D, 'type'> = {} as any) {
242 |     if (!this.isConnected) {
243 |       throw new Error(`RealtimeAPI is not connected`)
244 |     }
245 |     data = data || {}
246 |     if (typeof data !== 'object') {
247 |       throw new TypeError(`data must be an object`)
248 |     }
249 | 
250 |     const event = {
251 |       event_id: generateId('evt_'),
252 |       type: eventName,
253 |       ...data
254 |     }
255 |     this.dispatch(eventName, event)
256 |     this.dispatch(`client.${eventName}`, event)
257 |     this.dispatch('client.*', event)
258 |     this._log('sent:', eventName, event)
259 |     this.ws!.send(JSON.stringify(event))
260 |   }
261 | 
262 |   /**
263 |    * Writes WebSocket logs to the console if `debug` is enabled.
264 |    */
265 |   protected _log(...args: any[]) {
266 |     const date = new Date().toISOString()
267 |     const logs = [`[Websocket/${date}]`].concat(args).map((arg) => {
268 |       if (typeof arg === 'object' && arg !== null) {
269 |         return JSON.stringify(trimDebugEvent(arg), null, 2)
270 |       } else {
271 |         return arg
272 |       }
273 |     })
274 | 
275 |     if (this.debug) {
276 |       console.log(...logs)
277 |     }
278 |   }
279 | }
280 | 


--------------------------------------------------------------------------------
/src/client.test.ts:
--------------------------------------------------------------------------------
  1 | import 'dotenv/config'
  2 | 
  3 | import fs from 'node:fs/promises'
  4 | 
  5 | import decodeAudio from 'audio-decode'
  6 | import { expect, expectTypeOf, test } from 'vitest'
  7 | 
  8 | import type { Event, RealtimeServerEvents } from './events'
  9 | import { RealtimeClient } from './client'
 10 | import { arrayBufferToBase64, trimDebugEvent } from './utils'
 11 | 
 12 | const fixtures = ['./fixtures/toronto.mp3']
 13 | const fixtureData = await Promise.all(
 14 |   fixtures.map(async (filePath) => {
 15 |     const audioFile = await fs.readFile(filePath)
 16 |     const audioBuffer = await decodeAudio(audioFile)
 17 |     const channelData = audioBuffer.getChannelData(0) // only accepts mono
 18 |     const base64 = arrayBufferToBase64(channelData)
 19 |     return { filePath, base64 }
 20 |   })
 21 | )
 22 | 
 23 | test(
 24 |   'e2e',
 25 |   {
 26 |     timeout: 60_000
 27 |   },
 28 |   async () => {
 29 |     const events: Event[] = []
 30 |     const client = new RealtimeClient({
 31 |       debug: true,
 32 |       sessionConfig: {
 33 |         instructions:
 34 |           'Please follow the instructions of any query you receive.\n' +
 35 |           'Be concise in your responses. Speak quickly and answer shortly.',
 36 |         turn_detection: null
 37 |       }
 38 |     })
 39 | 
 40 |     client.on('realtime.event', (event) => {
 41 |       events.push(trimDebugEvent(event.event))
 42 |     })
 43 | 
 44 |     expect(client.isConnected).toBe(false)
 45 |     await client.connect()
 46 |     expect(client.isConnected).toBe(true)
 47 | 
 48 |     await client.waitForSessionCreated()
 49 | 
 50 |     const sample = fixtureData[0]!.base64
 51 |     client.sendUserMessageContent([{ type: 'input_audio', audio: sample }])
 52 | 
 53 |     const item = await client.waitForNextItem()
 54 |     console.log(item)
 55 |     expect(item.type).toBe('message')
 56 |     expect(item.role).toBe('user')
 57 |     expect(item.status).toBe('completed')
 58 |     expect(item.content).toHaveLength(1)
 59 |     expect(item.content[0]!.type).toBe('input_audio')
 60 | 
 61 |     // Wait for the full response to complete from the server
 62 |     const event = await client.realtime.waitForNext('response.done')
 63 |     expectTypeOf(event).toEqualTypeOf<RealtimeServerEvents.ResponseDoneEvent>()
 64 |     console.log(event)
 65 | 
 66 |     client.disconnect()
 67 |     expect(client.isConnected).toBe(false)
 68 | 
 69 |     expect(event).toBeDefined()
 70 |     expect(event.type).toBe('response.done')
 71 |     expect(event.response).toBeDefined()
 72 |     expect(event.response.status).toBe('completed')
 73 |     expect(event.response.output).toBeDefined()
 74 |     expect(event.response.output).toHaveLength(1)
 75 |     expect(event.response.output[0]!.type).toBe('message')
 76 |     expect(event.response.output[0]!.role).toBe('assistant')
 77 |     expect(event.response.output[0]!.status).toBe('completed')
 78 |     expect(event.response.output[0]!.content).toBeDefined()
 79 |     expect(event.response.output[0]!.content).toHaveLength(1)
 80 |     expect(event.response.output[0]!.content[0]!.type).toBe('audio')
 81 |     expect(event.response.output[0]!.content[0]!.transcript).toMatch(/toronto/i)
 82 |     expect(event.response.usage).toBeDefined()
 83 | 
 84 |     expect(
 85 |       events.filter((e) => e.type === 'response.audio_transcript.delta').length
 86 |     ).toBeGreaterThanOrEqual(1)
 87 | 
 88 |     expect(
 89 |       events.filter((e) => e.type === 'response.audio.delta').length
 90 |     ).toBeGreaterThanOrEqual(1)
 91 | 
 92 |     expect(events.filter((e) => e.type === 'response.audio.done')).toHaveLength(
 93 |       1
 94 |     )
 95 | 
 96 |     expect(
 97 |       events.filter((e) => e.type === 'response.audio_transcript.done')
 98 |     ).toHaveLength(1)
 99 | 
100 |     expect(
101 |       events.filter((e) => e.type === 'response.content_part.done')
102 |     ).toHaveLength(1)
103 | 
104 |     expect(
105 |       events.filter((e) => e.type === 'response.output_item.done')
106 |     ).toHaveLength(1)
107 | 
108 |     console.log(JSON.stringify(events, null, 2))
109 |   }
110 | )
111 | 


--------------------------------------------------------------------------------
/src/client.ts:
--------------------------------------------------------------------------------
  1 | import type {
  2 |   Event,
  3 |   RealtimeClientEvents,
  4 |   RealtimeCustomEvents,
  5 |   RealtimeServerEvents
  6 | } from './events'
  7 | import type {
  8 |   EventHandlerResult,
  9 |   FormattedTool,
 10 |   Realtime,
 11 |   ToolHandler
 12 | } from './types'
 13 | import { RealtimeAPI } from './api'
 14 | import { RealtimeConversation } from './conversation'
 15 | import { RealtimeEventHandler } from './event-handler'
 16 | import { arrayBufferToBase64, assert, mergeInt16Arrays, sleep } from './utils'
 17 | 
 18 | /**
 19 |  * The RealtimeClient class is the main interface for interacting with the
 20 |  * OpenAI Realtime API. It handles connection, configuration, conversation
 21 |  * updates, and server event handling.
 22 |  */
 23 | export class RealtimeClient extends RealtimeEventHandler<
 24 |   | RealtimeClientEvents.EventType
 25 |   | RealtimeServerEvents.EventType
 26 |   | RealtimeCustomEvents.EventType,
 27 |   Event,
 28 |   RealtimeClientEvents.EventMap &
 29 |     RealtimeServerEvents.EventMap &
 30 |     RealtimeCustomEvents.EventMap
 31 | > {
 32 |   readonly defaultSessionConfig: Realtime.SessionConfig
 33 |   sessionConfig: Realtime.SessionConfig
 34 | 
 35 |   readonly relay: boolean
 36 | 
 37 |   realtime: RealtimeAPI
 38 |   conversation: RealtimeConversation
 39 | 
 40 |   inputAudioBuffer: Int16Array
 41 |   sessionCreated: boolean
 42 |   tools: Record<
 43 |     string,
 44 |     {
 45 |       definition: Realtime.ToolDefinition
 46 |       handler: ToolHandler
 47 |     }
 48 |   >
 49 | 
 50 |   constructor({
 51 |     sessionConfig,
 52 |     relay = false,
 53 |     ...apiParams
 54 |   }: {
 55 |     sessionConfig?: Partial<Omit<Realtime.SessionConfig, 'tools'>>
 56 |     apiKey?: string
 57 |     model?: string
 58 |     url?: string
 59 |     dangerouslyAllowAPIKeyInBrowser?: boolean
 60 |     debug?: boolean
 61 |     /**
 62 |      * Relay mode disables tool use, since it will be the responsibility of the
 63 |      * upstream client to handle tool calls.
 64 |      */
 65 |     relay?: boolean
 66 |   } = {}) {
 67 |     super()
 68 | 
 69 |     this.defaultSessionConfig = {
 70 |       modalities: ['text', 'audio'],
 71 |       voice: 'alloy',
 72 |       input_audio_format: 'pcm16',
 73 |       output_audio_format: 'pcm16',
 74 |       input_audio_transcription: {
 75 |         model: 'whisper-1'
 76 |       },
 77 |       turn_detection: null,
 78 |       // turn_detection: {
 79 |       //   type: 'server_vad',
 80 |       //   threshold: 0.5,
 81 |       //   prefix_padding_ms: 300,
 82 |       //   silence_duration_ms: 500
 83 |       // },
 84 |       tools: [],
 85 |       tool_choice: 'auto',
 86 |       temperature: 0.8,
 87 |       max_response_output_tokens: 4096,
 88 |       ...sessionConfig
 89 |     }
 90 |     this.sessionConfig = {}
 91 |     this.sessionCreated = false
 92 |     this.tools = {}
 93 |     this.inputAudioBuffer = new Int16Array(0)
 94 |     this.relay = !!relay
 95 | 
 96 |     this.realtime = new RealtimeAPI(apiParams)
 97 |     this.conversation = new RealtimeConversation({ debug: apiParams.debug })
 98 | 
 99 |     this._resetConfig()
100 |     this._addAPIEventHandlers()
101 |   }
102 | 
103 |   /**
104 |    * Resets sessionConfig and conversation to defaults.
105 |    */
106 |   protected _resetConfig() {
107 |     this.sessionCreated = false
108 |     this.tools = {}
109 |     this.sessionConfig = structuredClone(this.defaultSessionConfig)
110 |     this.inputAudioBuffer = new Int16Array(0)
111 |   }
112 | 
113 |   /**
114 |    * Sets up event handlers for a fully-functional application control flow.
115 |    */
116 |   protected _addAPIEventHandlers() {
117 |     // Event Logging handlers
118 |     this.realtime.on('client.*', (event: any) => {
119 |       this.dispatch('realtime.event', {
120 |         type: 'realtime.event',
121 |         time: new Date().toISOString(),
122 |         source: 'client',
123 |         event
124 |       })
125 |     })
126 | 
127 |     this.realtime.on('server.*', (event: RealtimeServerEvents.ServerEvent) => {
128 |       this.dispatch('realtime.event', {
129 |         type: 'realtime.event',
130 |         time: new Date().toISOString(),
131 |         source: 'server',
132 |         event
133 |       })
134 |     })
135 | 
136 |     // Handles session created event
137 |     this.realtime.on('server.session.created', () => {
138 |       this.sessionCreated = true
139 |     })
140 | 
141 |     // Setup for application control flow
142 |     const handler = (event: any, ...args: any[]): EventHandlerResult => {
143 |       if (!this.isConnected) return {}
144 |       return this.conversation.processEvent(event, ...args)
145 |     }
146 | 
147 |     const handlerWithDispatch = (event: any, ...args: any[]) => {
148 |       const res = handler(event, ...args)
149 | 
150 |       if (res.item) {
151 |         // FIXME: This is only here because `item.input_audio_transcription.completed`
152 |         // can fire before `item.created`, resulting in empty item. This happens in
153 |         // VAD mode with empty audio.
154 |         this.dispatch('conversation.updated', {
155 |           type: 'conversation.updated',
156 |           ...res
157 |         })
158 |       }
159 | 
160 |       return res
161 |     }
162 | 
163 |     const callTool = async (tool: FormattedTool) => {
164 |       // In relay mode, we don't attempt to call tools. That is the
165 |       // responsibility of the upstream client.
166 |       if (this.isRelay) return
167 | 
168 |       try {
169 |         const jsonArguments = JSON.parse(tool.arguments)
170 |         const toolConfig = this.tools[tool.name]
171 |         if (!toolConfig) {
172 |           console.warn(`Tool "${tool.name}" not found`)
173 |           return
174 |         }
175 | 
176 |         const result = await Promise.resolve(toolConfig.handler(jsonArguments))
177 |         this.realtime.send('conversation.item.create', {
178 |           item: {
179 |             type: 'function_call_output',
180 |             call_id: tool.call_id,
181 |             output: JSON.stringify(result)
182 |           }
183 |         })
184 |       } catch (err: any) {
185 |         console.warn(`Error calling tool "${tool.name}":`, err.message)
186 | 
187 |         this.realtime.send('conversation.item.create', {
188 |           item: {
189 |             type: 'function_call_output',
190 |             call_id: tool.call_id,
191 |             output: JSON.stringify({ error: err.message })
192 |           }
193 |         })
194 |       }
195 | 
196 |       this.createResponse()
197 |     }
198 | 
199 |     // Handlers to update internal conversation state
200 |     this.realtime.on('server.response.created', handler)
201 |     this.realtime.on('server.response.output_item.added', handler)
202 |     this.realtime.on('server.response.content_part.added', handler)
203 |     this.realtime.on(
204 |       'server.input_audio_buffer.speech_started',
205 |       (event: RealtimeServerEvents.InputAudioBufferSpeechStartedEvent) => {
206 |         handler(event)
207 |         this.dispatch('conversation.interrupted', event)
208 |       }
209 |     )
210 |     this.realtime.on(
211 |       'server.input_audio_buffer.speech_stopped',
212 |       (event: RealtimeServerEvents.InputAudioBufferSpeechStoppedEvent) => {
213 |         handler(event, this.inputAudioBuffer)
214 |       }
215 |     )
216 | 
217 |     // Handlers to update application state
218 |     this.realtime.on(
219 |       'server.conversation.item.created',
220 |       (event: RealtimeServerEvents.ConversationItemCreatedEvent) => {
221 |         const res = handlerWithDispatch(event)
222 |         if (!res.item) return
223 | 
224 |         this.dispatch('conversation.item.appended', {
225 |           type: 'conversation.item.appended',
226 |           ...res
227 |         })
228 | 
229 |         if (res.item.status === 'completed') {
230 |           this.dispatch('conversation.item.completed', {
231 |             type: 'conversation.item.completed',
232 |             ...res
233 |           })
234 |         }
235 |       }
236 |     )
237 |     this.realtime.on('server.conversation.item.truncated', handlerWithDispatch)
238 |     this.realtime.on('server.conversation.item.deleted', handlerWithDispatch)
239 |     this.realtime.on(
240 |       'server.conversation.item.input_audio_transcription.completed',
241 |       handlerWithDispatch
242 |     )
243 |     this.realtime.on(
244 |       'server.response.audio_transcript.delta',
245 |       handlerWithDispatch
246 |     )
247 |     this.realtime.on('server.response.audio.delta', handlerWithDispatch)
248 |     this.realtime.on('server.response.text.delta', handlerWithDispatch)
249 |     this.realtime.on(
250 |       'server.response.function_call_arguments.delta',
251 |       handlerWithDispatch
252 |     )
253 |     this.realtime.on(
254 |       'server.response.output_item.done',
255 |       async (event: RealtimeServerEvents.ResponseOutputItemDoneEvent) => {
256 |         const res = handlerWithDispatch(event)
257 |         if (!res.item?.formatted) return
258 | 
259 |         if (res.item.status === 'completed') {
260 |           this.dispatch('conversation.item.completed', {
261 |             type: 'conversation.item.completed',
262 |             ...res
263 |           })
264 |         }
265 | 
266 |         if (res.item.formatted.tool) {
267 |           callTool(res.item.formatted.tool)
268 |         }
269 |       }
270 |     )
271 |   }
272 | 
273 |   /**
274 |    * Whether the realtime socket is connected.
275 |    */
276 |   get isConnected(): boolean {
277 |     return this.realtime.isConnected
278 |   }
279 | 
280 |   /**
281 |    * Whether the client is in relay mode. When in relay mode, the client will
282 |    * not attempt to invoke tools.
283 |    */
284 |   get isRelay(): boolean {
285 |     return this.relay
286 |   }
287 | 
288 |   /**
289 |    * Resets the client instance entirely: disconnects and clears configs.
290 |    */
291 |   reset() {
292 |     this.disconnect()
293 |     this.clearEventHandlers()
294 |     this.realtime.clearEventHandlers()
295 |     this._resetConfig()
296 |     this._addAPIEventHandlers()
297 |   }
298 | 
299 |   /**
300 |    * Connects to the Realtime WebSocket API and updates the session config.
301 |    */
302 |   async connect() {
303 |     if (this.isConnected) {
304 |       return
305 |     }
306 | 
307 |     await this.realtime.connect()
308 |     this.updateSession()
309 |   }
310 | 
311 |   /**
312 |    * Waits for a session.created event to be executed before proceeding.
313 |    */
314 |   async waitForSessionCreated() {
315 |     assert(this.isConnected, 'Not connected, use .connect() first')
316 | 
317 |     while (!this.sessionCreated) {
318 |       await sleep(1)
319 |     }
320 |   }
321 | 
322 |   /**
323 |    * Disconnects from the Realtime API and clears the conversation history.
324 |    */
325 |   disconnect() {
326 |     this.sessionCreated = false
327 |     this.realtime.disconnect()
328 |     this.conversation.clear()
329 |   }
330 | 
331 |   /**
332 |    * Gets the active turn detection mode.
333 |    */
334 |   getTurnDetectionType(): 'server_vad' | undefined {
335 |     return this.sessionConfig.turn_detection?.type
336 |   }
337 | 
338 |   /**
339 |    * Adds a tool to the session.
340 |    */
341 |   addTool(definition: Realtime.PartialToolDefinition, handler: ToolHandler) {
342 |     assert(!this.isRelay, 'Unable to add tools in relay mode')
343 |     assert(definition?.name, 'Missing tool name in definition')
344 |     const { name } = definition
345 | 
346 |     assert(
347 |       typeof handler === 'function',
348 |       `Tool "${name}" handler must be a function`
349 |     )
350 | 
351 |     this.tools[name] = {
352 |       definition: {
353 |         type: 'function',
354 |         ...definition
355 |       },
356 |       handler
357 |     }
358 |     this.updateSession()
359 |   }
360 | 
361 |   /**
362 |    * Removes a tool from the session.
363 |    */
364 |   removeTool(name: string) {
365 |     assert(!this.isRelay, 'Unable to add tools in relay mode')
366 |     assert(
367 |       this.tools[name],
368 |       `Tool "${name}" does not exist, can not be removed.`
369 |     )
370 |     delete this.tools[name]
371 |     this.updateSession()
372 |   }
373 | 
374 |   /**
375 |    * Deletes an item.
376 |    */
377 |   deleteItem(id: string) {
378 |     this.realtime.send('conversation.item.delete', { item_id: id })
379 |   }
380 | 
381 |   /**
382 |    * Updates session configuration.
383 |    *
384 |    * If the client is not yet connected, the session will be updated upon connection.
385 |    */
386 |   updateSession(sessionConfig: Realtime.SessionConfig = {}) {
387 |     const tools = Object.values(this.tools).map(({ definition }) => definition)
388 | 
389 |     this.sessionConfig = {
390 |       ...this.sessionConfig,
391 |       ...sessionConfig,
392 |       tools
393 |     }
394 | 
395 |     if (this.isConnected && !this.isRelay) {
396 |       this.realtime.send('session.update', {
397 |         session: structuredClone(this.sessionConfig)
398 |       })
399 |     }
400 |   }
401 | 
402 |   /**
403 |    * Sends user message content and generates a response.
404 |    */
405 |   sendUserMessageContent(
406 |     content: Array<
407 |       Realtime.InputTextContentPart | Realtime.InputAudioContentPart
408 |     >
409 |   ) {
410 |     assert(!this.isRelay, 'Unable to send messages directly in relay mode')
411 | 
412 |     if (content.length) {
413 |       this.realtime.send('conversation.item.create', {
414 |         item: {
415 |           type: 'message',
416 |           role: 'user',
417 |           content
418 |         }
419 |       })
420 |     }
421 | 
422 |     this.createResponse()
423 |   }
424 | 
425 |   /**
426 |    * Appends user audio to the existing audio buffer.
427 |    */
428 |   appendInputAudio(arrayBuffer: Int16Array | ArrayBuffer) {
429 |     assert(!this.isRelay, 'Unable to append input audio directly in relay mode')
430 | 
431 |     if (arrayBuffer.byteLength > 0) {
432 |       this.realtime.send('input_audio_buffer.append', {
433 |         audio: arrayBufferToBase64(arrayBuffer)
434 |       })
435 | 
436 |       this.inputAudioBuffer = mergeInt16Arrays(
437 |         this.inputAudioBuffer,
438 |         arrayBuffer
439 |       )
440 |     }
441 |   }
442 | 
443 |   /**
444 |    * Forces the model to generate a response.
445 |    */
446 |   createResponse() {
447 |     assert(!this.isRelay, 'Unable to create a response directly in relay mode')
448 | 
449 |     if (!this.getTurnDetectionType() && this.inputAudioBuffer.byteLength > 0) {
450 |       this.realtime.send('input_audio_buffer.commit')
451 |       this.conversation.queueInputAudio(this.inputAudioBuffer)
452 |       this.inputAudioBuffer = new Int16Array(0)
453 |     }
454 | 
455 |     this.realtime.send('response.create')
456 |   }
457 | 
458 |   /**
459 |    * Cancels the ongoing server generation and truncates ongoing generation, if
460 |    * applicable.
461 |    *
462 |    * If no id provided, will simply call `cancel_generation` command.
463 |    */
464 |   cancelResponse(
465 |     /** The ID of the item to cancel. */
466 |     id?: string,
467 |     /** The number of samples to truncate past for the ongoing generation. */
468 |     sampleCount = 0
469 |   ): Realtime.AssistantItem | undefined {
470 |     assert(!this.isRelay, 'Unable to cancel a response directly in relay mode')
471 | 
472 |     if (!id) {
473 |       this.realtime.send('response.cancel')
474 |       return
475 |     }
476 | 
477 |     const item = this.conversation.getItem(id)
478 |     assert(item, `Could not find item "${id}"`)
479 |     assert(
480 |       item.type === 'message',
481 |       `Can only cancelResponse messages with type "message"`
482 |     )
483 |     assert(
484 |       item.role === 'assistant',
485 |       `Can only cancelResponse messages with role "assistant"`
486 |     )
487 | 
488 |     this.realtime.send('response.cancel')
489 |     const audioIndex = item.content.findIndex((c) => c.type === 'audio')
490 |     assert(audioIndex >= 0, `Could not find audio on item ${id} to cancel`)
491 | 
492 |     this.realtime.send('conversation.item.truncate', {
493 |       item_id: id,
494 |       content_index: audioIndex,
495 |       audio_end_ms: Math.floor(
496 |         (sampleCount / this.conversation.defaultFrequency) * 1000
497 |       )
498 |     })
499 | 
500 |     return item
501 |   }
502 | 
503 |   /**
504 |    * Utility for waiting for the next `conversation.item.appended` event to be
505 |    * triggered by the server.
506 |    */
507 |   async waitForNextItem(): Promise<Realtime.Item> {
508 |     const event = await this.waitForNext('conversation.item.appended')
509 |     return event.item
510 |   }
511 | 
512 |   /**
513 |    * Utility for waiting for the next `conversation.item.completed` event to be
514 |    * triggered by the server.
515 |    */
516 |   async waitForNextCompletedItem(): Promise<Realtime.Item> {
517 |     const event = await this.waitForNext('conversation.item.completed')
518 |     return event.item
519 |   }
520 | }
521 | 


--------------------------------------------------------------------------------
/src/conversation.ts:
--------------------------------------------------------------------------------
  1 | /* eslint-disable @typescript-eslint/naming-convention */
  2 | import type { RealtimeServerEvents } from './events'
  3 | import type { EventHandlerResult, FormattedItem, Realtime } from './types'
  4 | import { assert, base64ToArrayBuffer, mergeInt16Arrays } from './utils'
  5 | 
  6 | /**
  7 |  * RealtimeConversation holds conversation history and performs event
  8 |  * validation for RealtimeAPI.
  9 |  */
 10 | export class RealtimeConversation {
 11 |   readonly defaultFrequency = 24_000 // 24,000 Hz
 12 | 
 13 |   readonly frequency: number
 14 |   readonly debug: boolean
 15 | 
 16 |   itemLookup: Record<string, FormattedItem> = {}
 17 |   items: FormattedItem[] = []
 18 |   responseLookup: Record<string, Realtime.Response> = {}
 19 |   responses: Realtime.Response[] = []
 20 |   queuedSpeechItems: Record<
 21 |     string,
 22 |     { audio_start_ms: number; audio_end_ms?: number; audio?: Int16Array }
 23 |   > = {}
 24 |   queuedTranscriptItems: Record<string, { transcript: string }> = {}
 25 |   queuedInputAudio?: Int16Array
 26 | 
 27 |   constructor({
 28 |     frequency = this.defaultFrequency,
 29 |     debug = false
 30 |   }: {
 31 |     frequency?: number
 32 |     debug?: boolean
 33 |   } = {}) {
 34 |     // Default to 24,000 Hz if not provided
 35 |     if (frequency === undefined) {
 36 |       frequency = this.defaultFrequency
 37 |     }
 38 |     assert(frequency > 0, `Invalid frequency: ${frequency}`)
 39 | 
 40 |     this.frequency = frequency
 41 |     this.debug = debug
 42 | 
 43 |     this.clear()
 44 |   }
 45 | 
 46 |   /**
 47 |    * Clears the conversation history and resets to defaults.
 48 |    */
 49 |   clear() {
 50 |     this.itemLookup = {}
 51 |     this.items = []
 52 |     this.responseLookup = {}
 53 |     this.responses = []
 54 |     this.queuedSpeechItems = {}
 55 |     this.queuedTranscriptItems = {}
 56 |     this.queuedInputAudio = undefined
 57 |   }
 58 | 
 59 |   /**
 60 |    * Queue input audio for manual speech event.
 61 |    */
 62 |   queueInputAudio(inputAudio: Int16Array) {
 63 |     this.queuedInputAudio = inputAudio
 64 |   }
 65 | 
 66 |   /**
 67 |    * Process an event from the WebSocket server and compose items.
 68 |    */
 69 |   processEvent(
 70 |     event: RealtimeServerEvents.ServerEvent,
 71 |     ...args: any[]
 72 |   ): EventHandlerResult {
 73 |     assert(event.event_id, `Missing "event_id" on event`)
 74 |     assert(event.type, `Missing "type" on event`)
 75 | 
 76 |     const eventProcessor = this.EventProcessors[event.type]
 77 |     assert(eventProcessor, `Missing event processor for "${event.type}"`)
 78 | 
 79 |     try {
 80 |       return eventProcessor.call(this, event as any, ...args)
 81 |     } catch (err: any) {
 82 |       if (this.debug) {
 83 |         console.error(
 84 |           `Error processing event "${event.type}":`,
 85 |           err.message,
 86 |           event
 87 |         )
 88 |       }
 89 | 
 90 |       return {}
 91 |     }
 92 |   }
 93 | 
 94 |   /**
 95 |    * Retrieves an item by ID.
 96 |    */
 97 |   getItem(id: string): FormattedItem | undefined {
 98 |     return this.itemLookup[id]
 99 |   }
100 | 
101 |   /**
102 |    * Retrieves all items in the conversation.
103 |    */
104 |   getItems(): FormattedItem[] {
105 |     return this.items.slice()
106 |   }
107 | 
108 |   /** Event handlers. */
109 |   EventProcessors: Partial<{
110 |     [K in keyof RealtimeServerEvents.EventMap]: (
111 |       event: RealtimeServerEvents.EventMap[K],
112 |       ...args: any[]
113 |     ) => EventHandlerResult
114 |   }> = {
115 |     'conversation.item.created': (event) => {
116 |       const { item } = event
117 |       const newItem: FormattedItem = {
118 |         ...structuredClone(item),
119 |         formatted: {
120 |           audio: new Int16Array(0),
121 |           text: '',
122 |           transcript: ''
123 |         }
124 |       }
125 | 
126 |       if (!this.itemLookup[newItem.id]) {
127 |         this.itemLookup[newItem.id] = newItem
128 |         this.items.push(newItem)
129 |       }
130 | 
131 |       // If we have a speech item, can populate audio
132 |       if (this.queuedSpeechItems[newItem.id]?.audio) {
133 |         newItem.formatted.audio = this.queuedSpeechItems[newItem.id]!.audio!
134 |         delete this.queuedSpeechItems[newItem.id] // free up some memory
135 |       }
136 | 
137 |       // Populate formatted text if it comes out on creation
138 |       if (newItem.content) {
139 |         const textContent = newItem.content.filter(
140 |           (c) => c.type === 'text' || c.type === 'input_text'
141 |         ) as Array<Realtime.InputTextContentPart | Realtime.TextContentPart>
142 | 
143 |         for (const content of textContent) {
144 |           newItem.formatted.text += content.text
145 |         }
146 |       }
147 | 
148 |       // If we have a transcript item, can pre-populate transcript
149 |       if (this.queuedTranscriptItems[newItem.id]) {
150 |         newItem.formatted.transcript =
151 |           this.queuedTranscriptItems[newItem.id]!.transcript
152 |         delete this.queuedTranscriptItems[newItem.id]
153 |       }
154 | 
155 |       if (newItem.type === 'message') {
156 |         if (newItem.role === 'user') {
157 |           newItem.status = 'completed'
158 |           if (this.queuedInputAudio) {
159 |             newItem.formatted.audio = this.queuedInputAudio
160 |             this.queuedInputAudio = undefined
161 |           }
162 |         } else {
163 |           newItem.status = 'in_progress'
164 |         }
165 |       } else if (newItem.type === 'function_call') {
166 |         newItem.formatted.tool = {
167 |           type: 'function',
168 |           name: newItem.name,
169 |           call_id: newItem.call_id,
170 |           arguments: ''
171 |         }
172 | 
173 |         newItem.status = 'in_progress'
174 |       } else if (newItem.type === 'function_call_output') {
175 |         newItem.status = 'completed'
176 |         newItem.formatted.output = newItem.output
177 |       }
178 | 
179 |       return { item: newItem }
180 |     },
181 | 
182 |     'conversation.item.truncated': (event) => {
183 |       const { item_id, audio_end_ms } = event
184 |       const item = this.itemLookup[item_id]
185 |       if (!item) {
186 |         throw new Error(`item.truncated: Item "${item_id}" not found`)
187 |       }
188 | 
189 |       const endIndex = Math.floor((audio_end_ms * this.frequency) / 1000)
190 |       item.formatted.transcript = ''
191 |       item.formatted.audio = item.formatted.audio!.slice(0, endIndex)
192 | 
193 |       return { item }
194 |     },
195 | 
196 |     'conversation.item.deleted': (event) => {
197 |       const { item_id } = event
198 |       const item = this.itemLookup[item_id]
199 |       if (!item) {
200 |         throw new Error(`item.deleted: Item "${item_id}" not found`)
201 |       }
202 | 
203 |       delete this.itemLookup[item.id]
204 |       const index = this.items.indexOf(item)
205 | 
206 |       if (index >= 0) {
207 |         this.items.splice(index, 1)
208 |       }
209 | 
210 |       return { item }
211 |     },
212 | 
213 |     'conversation.item.input_audio_transcription.completed': (event) => {
214 |       const { item_id, content_index, transcript } = event
215 |       const item = this.itemLookup[item_id]
216 | 
217 |       // We use a single space to represent an empty transcript for .formatted values
218 |       // Otherwise it looks like no transcript provided
219 |       const formattedTranscript = transcript || ' '
220 | 
221 |       if (!item) {
222 |         // We can receive transcripts in VAD mode before item.created
223 |         // This happens specifically when audio is empty
224 |         this.queuedTranscriptItems[item_id] = {
225 |           transcript: formattedTranscript
226 |         }
227 | 
228 |         return {}
229 |       } else {
230 |         if (item.content[content_index]) {
231 |           ;(
232 |             item.content[content_index] as Realtime.AudioContentPart
233 |           ).transcript = transcript
234 |         }
235 |         item.formatted.transcript = formattedTranscript
236 |         return { item, delta: { transcript } }
237 |       }
238 |     },
239 | 
240 |     'input_audio_buffer.speech_started': (event) => {
241 |       const { item_id, audio_start_ms } = event
242 |       const item = this.itemLookup[item_id]
243 |       this.queuedSpeechItems[item_id] = { audio_start_ms }
244 |       return { item }
245 |     },
246 | 
247 |     'input_audio_buffer.speech_stopped': (
248 |       event,
249 |       inputAudioBuffer: Int16Array
250 |     ) => {
251 |       const { item_id, audio_end_ms } = event
252 |       const item = this.itemLookup[item_id]
253 | 
254 |       if (!this.queuedSpeechItems[item_id]) {
255 |         this.queuedSpeechItems[item_id] = { audio_start_ms: audio_end_ms }
256 |       }
257 | 
258 |       const speech = this.queuedSpeechItems[item_id]
259 |       assert(speech, `Speech item not found for "${item_id}"`)
260 |       speech.audio_end_ms = audio_end_ms
261 | 
262 |       if (inputAudioBuffer) {
263 |         const startIndex = Math.floor(
264 |           (speech.audio_start_ms * this.frequency) / 1000
265 |         )
266 |         const endIndex = Math.floor(
267 |           (speech.audio_end_ms * this.frequency) / 1000
268 |         )
269 | 
270 |         speech.audio = inputAudioBuffer.slice(startIndex, endIndex)
271 |       }
272 | 
273 |       return { item }
274 |     },
275 | 
276 |     'response.created': (event) => {
277 |       const { response } = event
278 | 
279 |       if (!this.responseLookup[response.id]) {
280 |         this.responseLookup[response.id] = response
281 |         this.responses.push(response)
282 |       }
283 | 
284 |       return { response }
285 |     },
286 | 
287 |     'response.output_item.added': (event) => {
288 |       const { response_id, item } = event
289 |       const response = this.responseLookup[response_id]
290 | 
291 |       if (!response) {
292 |         throw new Error(
293 |           `response.output_item.added: Response "${response_id}" not found`
294 |         )
295 |       }
296 | 
297 |       response.output.push(item)
298 |       return { item, response }
299 |     },
300 | 
301 |     'response.output_item.done': (event) => {
302 |       const { item } = event
303 |       if (!item) {
304 |         throw new Error(`response.output_item.done: Missing "item"`)
305 |       }
306 | 
307 |       const foundItem = this.itemLookup[item.id]
308 |       if (!foundItem) {
309 |         throw new Error(
310 |           `response.output_item.done: Item "${item.id}" not found`
311 |         )
312 |       }
313 | 
314 |       foundItem.status = item.status
315 |       return { item: foundItem }
316 |     },
317 | 
318 |     'response.content_part.added': (event) => {
319 |       const { item_id, part } = event
320 |       const item = this.itemLookup[item_id]
321 |       if (!item) {
322 |         throw new Error(
323 |           `response.content_part.added: Item "${item_id}" not found`
324 |         )
325 |       }
326 | 
327 |       item.content.push(part as any)
328 |       return { item }
329 |     },
330 | 
331 |     'response.audio_transcript.delta': (event) => {
332 |       const { item_id, content_index, delta } = event
333 |       const item = this.itemLookup[item_id]
334 |       if (!item) {
335 |         throw new Error(
336 |           `response.audio_transcript.delta: Item "${item_id}" not found`
337 |         )
338 |       }
339 | 
340 |       ;(item.content[content_index] as Realtime.AudioContentPart).transcript +=
341 |         delta
342 |       item.formatted.transcript += delta
343 | 
344 |       return { item, delta: { transcript: delta } }
345 |     },
346 | 
347 |     'response.audio.delta': (event) => {
348 |       const { item_id, content_index: _, delta } = event
349 |       const item = this.itemLookup[item_id]
350 |       if (!item) {
351 |         throw new Error(`response.audio.delta: Item "${item_id}" not found`)
352 |       }
353 | 
354 |       // This never gets renderered; we care about the formatted data instead.
355 |       // (item.content[content_index] as Realtime.AudioContentPart)!.audio += delta;
356 | 
357 |       const arrayBuffer = base64ToArrayBuffer(delta)
358 |       const appendValues = new Int16Array(arrayBuffer)
359 |       item.formatted.audio = mergeInt16Arrays(
360 |         item.formatted.audio,
361 |         appendValues
362 |       )
363 | 
364 |       return { item, delta: { audio: appendValues } }
365 |     },
366 | 
367 |     'response.text.delta': (event) => {
368 |       const { item_id, content_index, delta } = event
369 |       const item = this.itemLookup[item_id]
370 |       if (!item) {
371 |         throw new Error(`response.text.delta: Item "${item_id}" not found`)
372 |       }
373 | 
374 |       ;(item.content[content_index] as Realtime.TextContentPart).text += delta
375 |       item.formatted.text += delta
376 | 
377 |       return { item, delta: { text: delta } }
378 |     },
379 | 
380 |     'response.function_call_arguments.delta': (event) => {
381 |       const { item_id, delta } = event
382 |       const item = this.itemLookup[item_id]
383 |       if (!item) {
384 |         throw new Error(
385 |           `response.function_call_arguments.delta: Item "${item_id}" not found`
386 |         )
387 |       }
388 | 
389 |       ;(item as Realtime.FunctionCallItem).arguments += delta
390 |       item.formatted.tool!.arguments += delta
391 | 
392 |       return { item, delta: { arguments: delta } }
393 |     }
394 |   }
395 | }
396 | 


--------------------------------------------------------------------------------
/src/event-handler.ts:
--------------------------------------------------------------------------------
  1 | import type { Event } from './events'
  2 | import type { MaybePromise } from './types'
  3 | 
  4 | export type EventHandlerCallback<EventData> = (
  5 |   event: EventData
  6 | ) => MaybePromise<unknown>
  7 | 
  8 | /**
  9 |  * Basic event handler.
 10 |  */
 11 | export class RealtimeEventHandler<
 12 |   EventType extends string = string,
 13 |   EventData extends Event = Event,
 14 |   EventMap extends Record<EventType, EventData> = Record<EventType, EventData>
 15 | > {
 16 |   eventHandlers: Record<EventType, EventHandlerCallback<EventData>[]> =
 17 |     {} as Record<EventType, EventHandlerCallback<EventData>[]>
 18 | 
 19 |   /**
 20 |    * Clears all event handlers.
 21 |    */
 22 |   clearEventHandlers() {
 23 |     this.eventHandlers = {} as Record<
 24 |       EventType,
 25 |       EventHandlerCallback<EventData>[]
 26 |     >
 27 |   }
 28 | 
 29 |   /**
 30 |    * Adds a listener for a specific event.
 31 |    */
 32 |   on<
 33 |     E extends EventType,
 34 |     D extends EventData = EventMap[E] extends EventData
 35 |       ? EventMap[E]
 36 |       : EventData
 37 |   >(eventName: E, callback: EventHandlerCallback<D>) {
 38 |     this.eventHandlers[eventName] = this.eventHandlers[eventName] || []
 39 |     this.eventHandlers[eventName].push(
 40 |       callback as EventHandlerCallback<EventData>
 41 |     )
 42 |   }
 43 | 
 44 |   /**
 45 |    * Adds a listener for a single occurrence of an event.
 46 |    */
 47 |   once<
 48 |     E extends EventType,
 49 |     D extends EventData = EventMap[E] extends EventData
 50 |       ? EventMap[E]
 51 |       : EventData
 52 |   >(eventName: E, callback: EventHandlerCallback<D>) {
 53 |     const onceCallback = (event: D) => {
 54 |       this.off(eventName, onceCallback)
 55 |       return callback(event)
 56 |     }
 57 |     this.on(eventName, onceCallback)
 58 |   }
 59 | 
 60 |   /**
 61 |    * Removes a listener for an event.
 62 |    * Calling without a callback will remove all listeners for the event.
 63 |    */
 64 |   off<
 65 |     E extends EventType,
 66 |     D extends EventData = EventMap[E] extends EventData
 67 |       ? EventMap[E]
 68 |       : EventData
 69 |   >(eventName: E, callback?: EventHandlerCallback<D>) {
 70 |     const handlers = this.eventHandlers[eventName] || []
 71 |     if (callback) {
 72 |       const index = handlers.indexOf(
 73 |         callback as EventHandlerCallback<EventData>
 74 |       )
 75 |       if (index < 0) {
 76 |         throw new Error(
 77 |           `Could not turn off specified event listener for "${eventName}": not found as a listener`
 78 |         )
 79 |       }
 80 | 
 81 |       handlers.splice(index, 1)
 82 |     } else {
 83 |       delete this.eventHandlers[eventName]
 84 |     }
 85 |   }
 86 | 
 87 |   /**
 88 |    * Waits for next event of a specific type and returns the payload.
 89 |    */
 90 |   async waitForNext<
 91 |     E extends EventType,
 92 |     D extends EventData = EventMap[E] extends EventData
 93 |       ? EventMap[E]
 94 |       : EventData
 95 |   >(eventName: E, { timeoutMs }: { timeoutMs?: number } = {}): Promise<D> {
 96 |     return new Promise<D>((resolve, reject) => {
 97 |       this.once(eventName, resolve as any)
 98 | 
 99 |       if (timeoutMs !== undefined) {
100 |         setTimeout(
101 |           () => reject(new Error(`Timeout waiting for "${eventName}"`)),
102 |           timeoutMs
103 |         )
104 |       }
105 |     })
106 |   }
107 | 
108 |   /**
109 |    * Executes all events handlers in the order they were added.
110 |    */
111 |   dispatch<
112 |     E extends EventType,
113 |     D extends EventData = EventMap[E] extends EventData
114 |       ? EventMap[E]
115 |       : EventData
116 |   >(eventName: E, event: D) {
117 |     const handlers = this.eventHandlers[eventName] || []
118 |     for (const handler of handlers) {
119 |       handler(event)
120 |     }
121 |   }
122 | }
123 | 


--------------------------------------------------------------------------------
/src/events.ts:
--------------------------------------------------------------------------------
  1 | import type { EventHandlerResult, FormattedItem, Realtime } from './types'
  2 | 
  3 | export interface Event {
  4 |   /** The event type. */
  5 |   type: string
  6 | }
  7 | 
  8 | export type RealtimeEvent = RealtimeCustomEvents.CustomEvent & {
  9 |   type: 'realtime.event'
 10 |   source: 'server' | 'client'
 11 |   time: string
 12 |   event: Event
 13 | } & (
 14 |     | {
 15 |         source: 'server'
 16 |         event: RealtimeServerEvents.EventMap[RealtimeServerEvents.EventType]
 17 |       }
 18 |     | {
 19 |         source: 'client'
 20 |         event: RealtimeClientEvents.EventMap[RealtimeClientEvents.EventType]
 21 |       }
 22 |   )
 23 | 
 24 | // See https://platform.openai.com/docs/guides/realtime/events
 25 | export namespace RealtimeClientEvents {
 26 |   /** Event types sent by the client. */
 27 |   export type EventType =
 28 |     | 'session.update'
 29 |     | 'input_audio_buffer.append'
 30 |     | 'input_audio_buffer.commit'
 31 |     | 'input_audio_buffer.clear'
 32 |     | 'conversation.item.create'
 33 |     | 'conversation.item.truncate'
 34 |     | 'conversation.item.delete'
 35 |     | 'response.create'
 36 |     | 'response.cancel'
 37 | 
 38 |   export type EventMap = {
 39 |     'session.update': SessionUpdateEvent
 40 |     'input_audio_buffer.append': InputAudioBufferAppendEvent
 41 |     'input_audio_buffer.commit': InputAudioBufferCommitEvent
 42 |     'input_audio_buffer.clear': InputAudioBufferClearEvent
 43 |     'conversation.item.create': ConversationItemCreateEvent
 44 |     'conversation.item.truncate': ConversationItemTruncateEvent
 45 |     'conversation.item.delete': ConversationItemDeleteEvent
 46 |     'response.create': ResponseCreateEvent
 47 |     'response.cancel': ResponseCancelEvent
 48 |   }
 49 | 
 50 |   // Same as EventMap but every key is prefixed by 'client.'
 51 |   export type PrefixedEventMap = {
 52 |     [K in keyof EventMap as `client.${Extract<K, string>}`]: EventMap[K]
 53 |   }
 54 | 
 55 |   export interface ClientEvent extends Event {
 56 |     /** The event type. */
 57 |     type: EventType
 58 | 
 59 |     /** Optional client-generated ID used to identify this event. */
 60 |     event_id?: string
 61 |   }
 62 | 
 63 |   /** Send this event to update the session’s default configuration. */
 64 |   export interface SessionUpdateEvent extends ClientEvent {
 65 |     type: 'session.update'
 66 | 
 67 |     /** Session configuration to update. */
 68 |     session: Realtime.SessionConfig
 69 |   }
 70 | 
 71 |   /** Send this event to append audio bytes to the input audio buffer. */
 72 |   export interface InputAudioBufferAppendEvent extends ClientEvent {
 73 |     type: 'input_audio_buffer.append'
 74 | 
 75 |     /** Base64-encoded audio bytes. */
 76 |     audio: string
 77 |   }
 78 | 
 79 |   /** Send this event to commit audio bytes to a user message. */
 80 |   export interface InputAudioBufferCommitEvent extends ClientEvent {
 81 |     type: 'input_audio_buffer.commit'
 82 |   }
 83 | 
 84 |   /** Send this event to clear the audio bytes in the buffer. */
 85 |   export interface InputAudioBufferClearEvent extends ClientEvent {
 86 |     type: 'input_audio_buffer.clear'
 87 |   }
 88 | 
 89 |   /** Send this event when adding an item to the conversation. */
 90 |   export interface ConversationItemCreateEvent extends ClientEvent {
 91 |     type: 'conversation.item.create'
 92 | 
 93 |     /** The ID of the preceding item after which the new item will be inserted. */
 94 |     previous_item_id?: string
 95 | 
 96 |     /** The item to add to the conversation. */
 97 |     item?: Realtime.ClientItem
 98 |   }
 99 | 
100 |   /**
101 |    * Send this event when you want to truncate a previous assistant message’s audio.
102 |    */
103 |   export interface ConversationItemTruncateEvent extends ClientEvent {
104 |     type: 'conversation.item.truncate'
105 | 
106 |     /** The ID of the assistant message item to truncate. */
107 |     item_id: string
108 | 
109 |     /** The index of the content part to truncate. */
110 |     content_index: number
111 | 
112 |     /** Inclusive duration up to which audio is truncated, in milliseconds. */
113 |     audio_end_ms: number
114 |   }
115 | 
116 |   /**
117 |    * Send this event when you want to remove any item from the conversation history.
118 |    */
119 |   export interface ConversationItemDeleteEvent extends ClientEvent {
120 |     type: 'conversation.item.delete'
121 | 
122 |     /** The ID of the item to delete. */
123 |     item_id: string
124 |   }
125 | 
126 |   /** Send this event to trigger a response generation. */
127 |   export interface ResponseCreateEvent extends ClientEvent {
128 |     type: 'response.create'
129 | 
130 |     /** Configuration for the response. */
131 |     response: Realtime.ResponseConfig
132 |   }
133 | 
134 |   /** Send this event to cancel an in-progress response. */
135 |   export interface ResponseCancelEvent extends ClientEvent {
136 |     type: 'response.cancel'
137 |   }
138 | }
139 | 
140 | // See // See https://platform.openai.com/docs/guides/realtime/events
141 | export namespace RealtimeServerEvents {
142 |   /** Event types sent by the server. */
143 |   export type EventType =
144 |     | 'error'
145 |     | 'session.created'
146 |     | 'session.updated'
147 |     | 'conversation.created'
148 |     | 'conversation.item.created'
149 |     | 'conversation.item.input_audio_transcription.completed'
150 |     | 'conversation.item.input_audio_transcription.failed'
151 |     | 'conversation.item.truncated'
152 |     | 'conversation.item.deleted'
153 |     | 'input_audio_buffer.committed'
154 |     | 'input_audio_buffer.cleared'
155 |     | 'input_audio_buffer.speech_started'
156 |     | 'input_audio_buffer.speech_stopped'
157 |     | 'response.created'
158 |     | 'response.done'
159 |     | 'response.output_item.added'
160 |     | 'response.output_item.done'
161 |     | 'response.content_part.added'
162 |     | 'response.content_part.done'
163 |     | 'response.text.delta'
164 |     | 'response.text.done'
165 |     | 'response.audio_transcript.delta'
166 |     | 'response.audio_transcript.done'
167 |     | 'response.audio.delta'
168 |     | 'response.audio.done'
169 |     | 'response.function_call_arguments.delta'
170 |     | 'response.function_call_arguments.done'
171 |     | 'rate_limits.updated'
172 | 
173 |   export type EventMap = {
174 |     error: ErrorEvent
175 |     'session.created': SessionCreatedEvent
176 |     'session.updated': SessionUpdatedEvent
177 |     'conversation.created': ConversationCreatedEvent
178 |     'conversation.item.created': ConversationItemCreatedEvent
179 |     'conversation.item.input_audio_transcription.completed': ConversationItemInputAudioTranscriptionCompletedEvent
180 |     'conversation.item.input_audio_transcription.failed': ConversationItemInputAudioTranscriptionFailedEvent
181 |     'conversation.item.truncated': ConversationItemTruncatedEvent
182 |     'conversation.item.deleted': ConversationItemDeletedEvent
183 |     'input_audio_buffer.committed': InputAudioBufferCommittedEvent
184 |     'input_audio_buffer.cleared': InputAudioBufferClearedEvent
185 |     'input_audio_buffer.speech_started': InputAudioBufferSpeechStartedEvent
186 |     'input_audio_buffer.speech_stopped': InputAudioBufferSpeechStoppedEvent
187 |     'response.created': ResponseCreatedEvent
188 |     'response.done': ResponseDoneEvent
189 |     'response.output_item.added': ResponseOutputItemAddedEvent
190 |     'response.output_item.done': ResponseOutputItemDoneEvent
191 |     'response.content_part.added': ResponseContentPartItemAddedEvent
192 |     'response.content_part.done': ResponseContentPartItemDoneEvent
193 |     'response.text.delta': ResponseTextDeltaEvent
194 |     'response.text.done': ResponseTextDoneEvent
195 |     'response.audio_transcript.delta': ResponseAudioTranscriptDeltaEvent
196 |     'response.audio_transcript.done': ResponseAudioTranscriptDoneEvent
197 |     'response.audio.delta': ResponseAudioDeltaEvent
198 |     'response.audio.done': ResponseAudioDoneEvent
199 |     'response.function_call_arguments.delta': ResponseFunctionCallArgumentsDeltaEvent
200 |     'response.function_call_arguments.done': ResponseFunctionCallArgumentsDoneEvent
201 |     'rate_limits.updated': RateLimitsUpdatedEvent
202 |   }
203 | 
204 |   // Same as EventMap but every key is prefixed by 'server.'
205 |   export type PrefixedEventMap = {
206 |     [K in keyof EventMap as `server.${Extract<K, string>}`]: EventMap[K]
207 |   }
208 | 
209 |   export interface ServerEvent extends Event {
210 |     /** The event type. */
211 |     type: EventType
212 | 
213 |     /** The unique ID of the server event. */
214 |     event_id: string
215 |   }
216 | 
217 |   /** Returned when an error occurs. */
218 |   export interface ErrorEvent extends ServerEvent {
219 |     type: 'error'
220 | 
221 |     /** Details of the error. */
222 |     error: Realtime.Error
223 |   }
224 | 
225 |   /**
226 |    * Returned when a session is created. Emitted automatically when a new
227 |    * connection is established.
228 |    */
229 |   export interface SessionCreatedEvent extends ServerEvent {
230 |     type: 'session.created'
231 | 
232 |     /** The session resource. */
233 |     session: Realtime.Session
234 |   }
235 | 
236 |   /**
237 |    * Returned when a session is updated.
238 |    */
239 |   export interface SessionUpdatedEvent extends ServerEvent {
240 |     type: 'session.updated'
241 | 
242 |     /** The updated session resource. */
243 |     session: Realtime.Session
244 |   }
245 | 
246 |   /**
247 |    * Returned when a conversation is created. Emitted right after session
248 |    * creation.
249 |    */
250 |   export interface ConversationCreatedEvent extends ServerEvent {
251 |     type: 'conversation.created'
252 | 
253 |     /** The conversation resource. */
254 |     conversation: Realtime.Conversation
255 |   }
256 | 
257 |   /**
258 |    * Returned when a conversation item is created.
259 |    */
260 |   export interface ConversationItemCreatedEvent extends ServerEvent {
261 |     type: 'conversation.item.created'
262 | 
263 |     /** The ID of the preceding item. */
264 |     previous_item_id?: string
265 | 
266 |     /** The item that was created. */
267 |     item: Realtime.Item
268 |   }
269 | 
270 |   /**
271 |    * Returned when input audio transcription is enabled and a transcription succeeds.
272 |    */
273 |   export interface ConversationItemInputAudioTranscriptionCompletedEvent
274 |     extends ServerEvent {
275 |     type: 'conversation.item.input_audio_transcription.completed'
276 | 
277 |     /** The ID of the user message item. */
278 |     item_id: string
279 | 
280 |     /** The index of the content part containing the audio. */
281 |     content_index: number
282 | 
283 |     /** The transcribed text. */
284 |     transcript: string
285 |   }
286 | 
287 |   /**
288 |    * Returned when input audio transcription is configured, and a transcription
289 |    * request for a user message failed.
290 |    */
291 |   export interface ConversationItemInputAudioTranscriptionFailedEvent
292 |     extends ServerEvent {
293 |     type: 'conversation.item.input_audio_transcription.failed'
294 | 
295 |     /** The ID of the user message item. */
296 |     item_id: string
297 | 
298 |     /** The index of the content part containing the audio. */
299 |     content_index: number
300 | 
301 |     /** Details of the transcription error. */
302 |     error: Realtime.Error
303 |   }
304 | 
305 |   /**
306 |    * Returned when an earlier assistant audio message item is truncated by the client.
307 |    */
308 |   export interface ConversationItemTruncatedEvent extends ServerEvent {
309 |     type: 'conversation.item.truncated'
310 | 
311 |     /** The ID of the assistant message item that was truncated. */
312 |     item_id: string
313 | 
314 |     /** The index of the content part thtat was truncated. */
315 |     content_index: number
316 | 
317 |     /** The duration up to which the audio was truncated, in milliseconds. */
318 |     audio_end_ms: number
319 |   }
320 | 
321 |   /**
322 |    * Returned when an item in the conversation is deleted.
323 |    */
324 |   export interface ConversationItemDeletedEvent extends ServerEvent {
325 |     type: 'conversation.item.deleted'
326 | 
327 |     /** The ID of the item that was deleted. */
328 |     item_id: string
329 |   }
330 | 
331 |   /**
332 |    * Returned when an input audio buffer is committed, either by the client or
333 |    * automatically in server VAD mode.
334 |    */
335 |   export interface InputAudioBufferCommittedEvent extends ServerEvent {
336 |     type: 'input_audio_buffer.committed'
337 | 
338 |     /** The ID of the preceding item after which the new item will be inserted. */
339 |     previous_item_id?: string
340 | 
341 |     /** The ID of the user message item that will be created. */
342 |     item_id: string
343 |   }
344 | 
345 |   /**
346 |    * Returned when the input audio buffer is cleared by the client.
347 |    */
348 |   export interface InputAudioBufferClearedEvent extends ServerEvent {
349 |     type: 'input_audio_buffer.cleared'
350 |   }
351 | 
352 |   /**
353 |    * Returned in server turn detection mode when speech is detected.
354 |    */
355 |   export interface InputAudioBufferSpeechStartedEvent extends ServerEvent {
356 |     type: 'input_audio_buffer.speech_started'
357 | 
358 |     /** The ID of the user message item that will be created when speech stops. */
359 |     item_id: string
360 | 
361 |     /** Milliseconds since the session started when speech was detected. */
362 |     audio_start_ms: number
363 |   }
364 | 
365 |   /**
366 |    * Returned in server turn detection mode when speech stops.
367 |    */
368 |   export interface InputAudioBufferSpeechStoppedEvent extends ServerEvent {
369 |     type: 'input_audio_buffer.speech_stopped'
370 | 
371 |     /** The ID of the user message item that will be created. */
372 |     item_id: string
373 | 
374 |     /** Milliseconds since the session started when speech stopped. */
375 |     audio_end_ms: number
376 |   }
377 | 
378 |   /**
379 |    * Returned when a new Response is created. The first event of response
380 |    * creation, where the response is in an initial state of "in_progress".
381 |    */
382 |   export interface ResponseCreatedEvent extends ServerEvent {
383 |     type: 'response.created'
384 | 
385 |     /** The response resource. */
386 |     response: Realtime.Response
387 |   }
388 | 
389 |   /**
390 |    * Returned when a Response is done streaming. Always emitted, no matter the
391 |    * final state.
392 |    */
393 |   export interface ResponseDoneEvent extends ServerEvent {
394 |     type: 'response.done'
395 | 
396 |     /** The response resource. */
397 |     response: Realtime.Response
398 |   }
399 | 
400 |   /**
401 |    * Returned when a new Item is created during response generation.
402 |    */
403 |   export interface ResponseOutputItemAddedEvent extends ServerEvent {
404 |     type: 'response.output_item.added'
405 | 
406 |     /** The ID of the response. */
407 |     response_id: string
408 | 
409 |     /** The index of the output item in the response. */
410 |     output_index: string
411 | 
412 |     /** The item that was added. */
413 |     item: Realtime.Item
414 |   }
415 | 
416 |   /**
417 |    * Returned when an Item is done streaming. Also emitted when a Response is
418 |    * interrupted, incomplete, or cancelled.
419 |    */
420 |   export interface ResponseOutputItemDoneEvent extends ServerEvent {
421 |     type: 'response.output_item.done'
422 | 
423 |     /** The ID of the response. */
424 |     response_id: string
425 | 
426 |     /** The index of the output item in the response. */
427 |     output_index: string
428 | 
429 |     /** The item that was added. */
430 |     item: Realtime.Item
431 |   }
432 | 
433 |   /**
434 |    * Returned when a new content part is added to an assistant message item
435 |    * during response generation.
436 |    */
437 |   export interface ResponseContentPartItemAddedEvent extends ServerEvent {
438 |     type: 'response.content_part.added'
439 | 
440 |     /** The ID of the response. */
441 |     response_id: string
442 | 
443 |     /** The ID of the item. */
444 |     item_id: string
445 | 
446 |     /** The index of the output item in the response. */
447 |     output_index: string
448 | 
449 |     /** The index of the content part in the item's content array. */
450 |     content_index: number
451 | 
452 |     /** The content part. */
453 |     part: Realtime.ContentPart
454 |   }
455 | 
456 |   /**
457 |    * Returned when a content part is done streaming in an assistant message item.
458 |    * Also emitted when a Response is interrupted, incomplete, or cancelled.
459 |    */
460 |   export interface ResponseContentPartItemDoneEvent extends ServerEvent {
461 |     type: 'response.content_part.done'
462 | 
463 |     /** The ID of the response. */
464 |     response_id: string
465 | 
466 |     /** The ID of the item. */
467 |     item_id: string
468 | 
469 |     /** The index of the output item in the response. */
470 |     output_index: string
471 | 
472 |     /** The index of the content part in the item's content array. */
473 |     content_index: number
474 | 
475 |     /** The content part. */
476 |     part: Realtime.ContentPart
477 |   }
478 | 
479 |   /**
480 |    * Returned when the text value of a "text" content part is updated.
481 |    */
482 |   export interface ResponseTextDeltaEvent extends ServerEvent {
483 |     type: 'response.text.delta'
484 | 
485 |     /** The ID of the response. */
486 |     response_id: string
487 | 
488 |     /** The ID of the item. */
489 |     item_id: string
490 | 
491 |     /** The index of the output item in the response. */
492 |     output_index: string
493 | 
494 |     /** The index of the content part in the item's content array. */
495 |     content_index: number
496 | 
497 |     /** The text delta. */
498 |     delta: string
499 |   }
500 | 
501 |   /**
502 |    * Returned when the text value of a "text" content part is done streaming.
503 |    * Also emitted when a Response is interrupted, incomplete, or cancelled.
504 |    */
505 |   export interface ResponseTextDoneEvent extends ServerEvent {
506 |     type: 'response.text.done'
507 | 
508 |     /** The ID of the response. */
509 |     response_id: string
510 | 
511 |     /** The ID of the item. */
512 |     item_id: string
513 | 
514 |     /** The index of the output item in the response. */
515 |     output_index: string
516 | 
517 |     /** The index of the content part in the item's content array. */
518 |     content_index: number
519 | 
520 |     /** The final text content. */
521 |     text: string
522 |   }
523 | 
524 |   /**
525 |    * Returned when the model-generated transcription of audio output is updated.
526 |    */
527 |   export interface ResponseAudioTranscriptDeltaEvent extends ServerEvent {
528 |     type: 'response.audio_transcript.delta'
529 | 
530 |     /** The ID of the response. */
531 |     response_id: string
532 | 
533 |     /** The ID of the item. */
534 |     item_id: string
535 | 
536 |     /** The index of the output item in the response. */
537 |     output_index: string
538 | 
539 |     /** The index of the content part in the item's content array. */
540 |     content_index: number
541 | 
542 |     /** The transcript delta. */
543 |     delta: string
544 |   }
545 | 
546 |   /**
547 |    * Returned when the model-generated transcription of audio output is done
548 |    * streaming. Also emitted when a Response is interrupted, incomplete, or
549 |    * cancelled.
550 |    */
551 |   export interface ResponseAudioTranscriptDoneEvent extends ServerEvent {
552 |     type: 'response.audio_transcript.done'
553 | 
554 |     /** The ID of the response. */
555 |     response_id: string
556 | 
557 |     /** The ID of the item. */
558 |     item_id: string
559 | 
560 |     /** The index of the output item in the response. */
561 |     output_index: string
562 | 
563 |     /** The index of the content part in the item's content array. */
564 |     content_index: number
565 | 
566 |     /** The final transcript. */
567 |     transcript: string
568 |   }
569 | 
570 |   /**
571 |    * Returned when the model-generated audio is updated.
572 |    */
573 |   export interface ResponseAudioDeltaEvent extends ServerEvent {
574 |     type: 'response.audio.delta'
575 | 
576 |     /** The ID of the response. */
577 |     response_id: string
578 | 
579 |     /** The ID of the item. */
580 |     item_id: string
581 | 
582 |     /** The index of the output item in the response. */
583 |     output_index: string
584 | 
585 |     /** The index of the content part in the item's content array. */
586 |     content_index: number
587 | 
588 |     /** Base64-encoded audio data delta. */
589 |     delta: string
590 |   }
591 | 
592 |   /**
593 |    * Returned when the model-generated audio is done. Also emitted when a
594 |    * Response is interrupted, incomplete, or cancelled.
595 |    */
596 |   export interface ResponseAudioDoneEvent extends ServerEvent {
597 |     type: 'response.audio.done'
598 | 
599 |     /** The ID of the response. */
600 |     response_id: string
601 | 
602 |     /** The ID of the item. */
603 |     item_id: string
604 | 
605 |     /** The index of the output item in the response. */
606 |     output_index: string
607 | 
608 |     /** The index of the content part in the item's content array. */
609 |     content_index: number
610 |   }
611 | 
612 |   /**
613 |    * Returned when the model-generated function call arguments are updated.
614 |    */
615 |   export interface ResponseFunctionCallArgumentsDeltaEvent extends ServerEvent {
616 |     type: 'response.function_call_arguments.delta'
617 | 
618 |     /** The ID of the response. */
619 |     response_id: string
620 | 
621 |     /** The ID of the item. */
622 |     item_id: string
623 | 
624 |     /** The index of the output item in the response. */
625 |     output_index: string
626 | 
627 |     /** The index of the content part in the item's content array. */
628 |     content_index: number
629 | 
630 |     /** The ID of the function call. */
631 |     call_id: string
632 | 
633 |     /** The arguments delta as a JSON string. */
634 |     delta: string
635 |   }
636 | 
637 |   /**
638 |    * Returned when the model-generated function call arguments are done streaming.
639 |    * Also emitted when a Response is interrupted, incomplete, or cancelled.
640 |    */
641 |   export interface ResponseFunctionCallArgumentsDoneEvent extends ServerEvent {
642 |     type: 'response.function_call_arguments.done'
643 | 
644 |     /** The ID of the response. */
645 |     response_id: string
646 | 
647 |     /** The ID of the item. */
648 |     item_id: string
649 | 
650 |     /** The index of the output item in the response. */
651 |     output_index: string
652 | 
653 |     /** The index of the content part in the item's content array. */
654 |     content_index: number
655 | 
656 |     /** The ID of the function call. */
657 |     call_id: string
658 | 
659 |     /** The final arguments as a JSON string. */
660 |     arguments: string
661 |   }
662 | 
663 |   /**
664 |    * Emitted after every `response.done` event to indicate the updated rate
665 |    * limits.
666 |    */
667 |   export interface RateLimitsUpdatedEvent extends ServerEvent {
668 |     type: 'rate_limits.updated'
669 | 
670 |     /** Array of rate limit information. */
671 |     rate_limits: Realtime.RateLimit[]
672 |   }
673 | }
674 | 
675 | export namespace RealtimeCustomEvents {
676 |   /** Custom event types that are not part of the official realtime API. */
677 |   export type EventType =
678 |     | 'conversation.item.appended'
679 |     | 'conversation.item.completed'
680 |     | 'conversation.updated'
681 |     | 'conversation.interrupted'
682 |     | 'realtime.event'
683 | 
684 |   export type EventMap = {
685 |     'conversation.item.appended': ConversationItemAppendedEvent
686 |     'conversation.item.completed': ConversationItemCompletedEvent
687 |     'conversation.updated': ConversationUpdatedEvent
688 |     'conversation.interrupted': ConversationInterruptedEvent
689 |     'realtime.event':
690 |       | CustomServerEvent<RealtimeServerEvents.EventType>
691 |       | CustomClientEvent<RealtimeClientEvents.EventType>
692 |   }
693 | 
694 |   export interface CustomEvent extends Event {
695 |     /** The custom event type. */
696 |     type: EventType
697 |   }
698 | 
699 |   export type CustomServerEvent<T extends RealtimeServerEvents.EventType> =
700 |     RealtimeEvent & {
701 |       type: 'realtime.event'
702 |       source: 'server'
703 |       time: string
704 |       event: RealtimeServerEvents.EventMap[T]
705 |     }
706 | 
707 |   export type CustomClientEvent<T extends RealtimeClientEvents.EventType> =
708 |     RealtimeEvent & {
709 |       type: 'realtime.event'
710 |       source: 'client'
711 |       time: string
712 |       event: RealtimeClientEvents.EventMap[T]
713 |     }
714 | 
715 |   export interface ConversationItemAppendedEvent
716 |     extends CustomEvent,
717 |       Omit<EventHandlerResult, 'item'> {
718 |     type: 'conversation.item.appended'
719 |     item: FormattedItem
720 |   }
721 | 
722 |   export interface ConversationItemCompletedEvent
723 |     extends CustomEvent,
724 |       Omit<EventHandlerResult, 'item'> {
725 |     type: 'conversation.item.completed'
726 |     item: FormattedItem
727 |   }
728 | 
729 |   export interface ConversationUpdatedEvent
730 |     extends CustomEvent,
731 |       Omit<EventHandlerResult, 'item'> {
732 |     type: 'conversation.updated'
733 |     item: FormattedItem
734 |   }
735 | 
736 |   export interface ConversationInterruptedEvent
737 |     extends CustomEvent,
738 |       Omit<RealtimeServerEvents.InputAudioBufferSpeechStartedEvent, 'type'> {
739 |     type: 'conversation.interrupted'
740 |   }
741 | }
742 | 


--------------------------------------------------------------------------------
/src/index.ts:
--------------------------------------------------------------------------------
1 | export * from './api'
2 | export * from './client'
3 | export * from './conversation'
4 | export * from './event-handler'
5 | export type * from './events'
6 | export type * from './types'
7 | export * from './utils'
8 | 


--------------------------------------------------------------------------------
/src/node/index.ts:
--------------------------------------------------------------------------------
1 | export * from './relay-server'
2 | 


--------------------------------------------------------------------------------
/src/node/relay-server.ts:
--------------------------------------------------------------------------------
  1 | import type { IncomingMessage } from 'node:http'
  2 | 
  3 | import { type WebSocket, WebSocketServer } from 'ws'
  4 | 
  5 | import type { RealtimeClient } from '../client'
  6 | import type { RealtimeClientEvents } from '../events'
  7 | import { assert, getEnv } from '../utils'
  8 | 
  9 | /**
 10 |  * Simple Node.js relay server for the OpenAI Realtime API.
 11 |  *
 12 |  * @example
 13 |  *
 14 |  * ```ts
 15 |  * import { RealtimeClient } from 'openai-realtime-api'
 16 |  * import { RealtimeRelay } from 'openai-realtime-api/node'
 17 |  *
 18 |  * const client = new RealtimeClient({ relay: true })
 19 |  * const relay = new RealtimeRelay({ client })
 20 |  * relay.listen(8081)
 21 |  * ```
 22 |  */
 23 | export class RealtimeRelay {
 24 |   readonly client: RealtimeClient
 25 |   wss?: WebSocketServer
 26 | 
 27 |   constructor({ client }: { client: RealtimeClient }) {
 28 |     assert(
 29 |       client.relay,
 30 |       'RealtimeRelay client must have the "relay" option set'
 31 |     )
 32 |     assert(
 33 |       client.realtime.apiKey,
 34 |       'RealtimeRelay client must have an API key set'
 35 |     )
 36 | 
 37 |     this.client = client
 38 |   }
 39 | 
 40 |   /**
 41 |    * Creates a `WebSocketServer` and begins listening for connections.
 42 |    *
 43 |    * @param port Port to listen on; defaults to the PORT environment variable or 8081.
 44 |    */
 45 |   listen(port?: number) {
 46 |     assert(!this.wss, 'RealtimeRelay is already listening')
 47 | 
 48 |     if (!port) {
 49 |       port = Number.parseInt(getEnv('PORT') ?? '8081')
 50 |       assert(!Number.isNaN(port), `Invalid port: ${port}`)
 51 |     }
 52 | 
 53 |     this.wss = new WebSocketServer({ port })
 54 |     this.wss.on('connection', this._connectionHandler.bind(this))
 55 | 
 56 |     this._info(`Listening on ws://localhost:${port}`)
 57 |   }
 58 | 
 59 |   /**
 60 |    * Closes the WebSocket server.
 61 |    */
 62 |   close() {
 63 |     this.wss?.close()
 64 |     this.wss = undefined
 65 |   }
 66 | 
 67 |   protected async _connectionHandler(ws: WebSocket, req: IncomingMessage) {
 68 |     if (!req.url) {
 69 |       this._error('No URL provided, closing connection.')
 70 |       ws.close()
 71 |       return
 72 |     }
 73 | 
 74 |     const url = new URL(req.url, `http://${req.headers.host}`)
 75 |     const pathname = url.pathname
 76 | 
 77 |     if (pathname !== '/') {
 78 |       this._error(`Invalid pathname: "${pathname}"`)
 79 |       ws.close()
 80 |       return
 81 |     }
 82 | 
 83 |     // Relay: OpenAI server events -> browser
 84 |     this.client.realtime.on('server.*', (event) => {
 85 |       this._debug(`Relaying "${event.type}" to client`)
 86 |       ws.send(JSON.stringify(event))
 87 |     })
 88 |     this.client.realtime.on('close', () => ws.close())
 89 | 
 90 |     // Relay: browser events -> OpenAI server
 91 |     // We need to queue data waiting for the OpenAI connection
 92 |     const messageQueue: string[] = []
 93 |     const messageHandler = (data: string) => {
 94 |       try {
 95 |         const event = JSON.parse(data) as RealtimeClientEvents.ClientEvent
 96 |         this._debug(`Relaying "${event.type}" to server`)
 97 |         this.client.realtime.send(event.type, event)
 98 |       } catch (err: any) {
 99 |         this._error(`Error parsing event from client: ${data}`, err.message)
100 |       }
101 |     }
102 | 
103 |     ws.on('message', (data) => {
104 |       if (!this.client.isConnected) {
105 |         messageQueue.push(data.toString())
106 |       } else {
107 |         messageHandler(data.toString())
108 |       }
109 |     })
110 |     ws.on('close', () => this.client.disconnect())
111 | 
112 |     // Connect to OpenAI Realtime API
113 |     try {
114 |       this._info('Connecting to server...', this.client.realtime.url)
115 |       await this.client.connect()
116 |     } catch (err: any) {
117 |       this._error('Error connecting to server', err.message)
118 |       ws.close()
119 |       return
120 |     }
121 | 
122 |     this._info('Connected to server successfully', this.client.realtime.url)
123 |     while (messageQueue.length) {
124 |       messageHandler(messageQueue.shift()!)
125 |     }
126 |   }
127 | 
128 |   protected _info(...args: any[]) {
129 |     console.log('[RealtimeRelay]', ...args)
130 |   }
131 | 
132 |   protected _debug(...args: any[]) {
133 |     if (this.client.realtime.debug) {
134 |       console.log('[RealtimeRelay]', ...args)
135 |     }
136 |   }
137 | 
138 |   protected _error(...args: any[]) {
139 |     console.error('[RealtimeRelay]', ...args)
140 |   }
141 | }
142 | 


--------------------------------------------------------------------------------
/src/reset.d.ts:
--------------------------------------------------------------------------------
1 | import '@total-typescript/ts-reset'
2 | 


--------------------------------------------------------------------------------
/src/types.ts:
--------------------------------------------------------------------------------
  1 | export namespace Realtime {
  2 |   export type AudioFormat = 'pcm16' | 'g711_ulaw' | 'g711_alaw'
  3 |   export type AudioTranscriptionModel = 'whisper-1' | (string & {})
  4 | 
  5 |   export type ItemRole = 'user' | 'assistant' | 'system'
  6 |   export type ItemType = 'message' | 'function_call' | 'function_call_output'
  7 |   export type ItemStatus = 'in_progress' | 'completed' | 'incomplete'
  8 |   export type ContentPartType = 'input_text' | 'input_audio' | 'text' | 'audio'
  9 | 
 10 |   export type Voice =
 11 |     | 'alloy'
 12 |     | 'ash'
 13 |     | 'ballad'
 14 |     | 'coral'
 15 |     | 'echo'
 16 |     | 'sage'
 17 |     | 'shimmer'
 18 |     | 'verse'
 19 |     | (string & {})
 20 | 
 21 |   export type ToolChoice =
 22 |     | 'auto'
 23 |     | 'none'
 24 |     | 'required'
 25 |     | { type: 'function'; name: string }
 26 | 
 27 |   export type ObjectType =
 28 |     | 'realtime.item'
 29 |     | 'realtime.response'
 30 |     | 'realtime.session'
 31 |     | 'realtime.conversation'
 32 | 
 33 |   export type ResponseStatus =
 34 |     | 'in_progress'
 35 |     | 'completed'
 36 |     | 'incomplete'
 37 |     | 'cancelled'
 38 |     | 'failed'
 39 | 
 40 |   export interface BaseObject {
 41 |     /** The unique ID of the object. */
 42 |     id?: string
 43 | 
 44 |     /** Discriminator for the type of this object. */
 45 |     object?: ObjectType
 46 |   }
 47 | 
 48 |   export interface AudioTranscription {
 49 |     model: AudioTranscriptionModel
 50 |   }
 51 | 
 52 |   export interface TurnDetection {
 53 |     type: 'server_vad'
 54 | 
 55 |     /** 0.0 to 1.0 */
 56 |     threshold?: number
 57 | 
 58 |     /** How much audio to include in the audio stream before the speech starts. */
 59 |     prefix_padding_ms?: number
 60 | 
 61 |     /** How long to wait to mark the speech as stopped. */
 62 |     silence_duration_ms?: number
 63 |   }
 64 | 
 65 |   export interface ToolDefinition {
 66 |     type: 'function'
 67 |     name: string
 68 |     description: string
 69 |     parameters: { [key: string]: any }
 70 |   }
 71 | 
 72 |   export type PartialToolDefinition = Omit<ToolDefinition, 'type'> & {
 73 |     type?: 'function'
 74 |   }
 75 | 
 76 |   export interface SessionConfig {
 77 |     /** The default system instructions prepended to model calls. */
 78 |     instructions?: string
 79 | 
 80 |     /**
 81 |      * The set of modalities the model can respond with. To disable audio, set
 82 |      * this to ["text"].
 83 |      */
 84 |     modalities?: string[]
 85 | 
 86 |     /**
 87 |      * The voice the model uses to respond - one of alloy, echo, or shimmer.
 88 |      *
 89 |      * Cannot be changed once the model has responded with audio at least once.
 90 |      */
 91 |     voice?: Voice
 92 | 
 93 |     /** The format of input audio. */
 94 |     input_audio_format?: AudioFormat
 95 | 
 96 |     /** The format of output audio. */
 97 |     output_audio_format?: AudioFormat
 98 | 
 99 |     /** Configuration for input audio transcription. Can be set to null to turn off. */
100 |     input_audio_transcription?: AudioTranscription | null
101 | 
102 |     /** Configuration for turn detection. Can be set to null to turn off. */
103 |     turn_detection?: TurnDetection | null
104 | 
105 |     /** Tools (functions) available to the model. */
106 |     tools?: ToolDefinition[]
107 | 
108 |     /** How the model chooses tools. */
109 |     tool_choice?: ToolChoice
110 | 
111 |     /** Sampling temperature for the model. */
112 |     temperature?: number
113 | 
114 |     /**
115 |      * Maximum number of output tokens for a single assistant response, inclusive
116 |      * of tool calls. Provide an integer between 1 and 4096 to limit output
117 |      * tokens, or "inf" for the maximum available tokens for a given model.
118 |      *
119 |      * Defaults to "inf".
120 |      */
121 |     max_response_output_tokens?: number | 'inf'
122 |   }
123 | 
124 |   export interface Session extends BaseObject, SessionConfig {
125 |     /** The unique ID of the session. */
126 |     id: string
127 | 
128 |     /** Type of object. */
129 |     object: 'realtime.session'
130 |   }
131 | 
132 |   export interface BaseContentPart {
133 |     /** The type of the content. */
134 |     type: ContentPartType
135 | 
136 |     /** Text content for "text" and "input_text" content parts. */
137 |     text?: string
138 | 
139 |     /** Base64-encoded audio data. */
140 |     audio?: string
141 | 
142 |     /** Optional text transcript. */
143 |     transcript?: string | null
144 |   }
145 | 
146 |   export interface InputTextContentPart extends BaseContentPart {
147 |     type: 'input_text'
148 |     text: string
149 |   }
150 | 
151 |   export interface InputAudioContentPart extends BaseContentPart {
152 |     type: 'input_audio'
153 |     /** Base64-encoded audio data. */
154 |     audio?: string
155 |     transcript?: string | null
156 |   }
157 | 
158 |   export interface TextContentPart extends BaseContentPart {
159 |     type: 'text'
160 |     text: string
161 |   }
162 | 
163 |   export interface AudioContentPart extends BaseContentPart {
164 |     type: 'audio'
165 |     /** Base64-encoded audio data. */
166 |     audio?: string
167 |     transcript?: string | null
168 |   }
169 | 
170 |   export type ContentPart =
171 |     | InputTextContentPart
172 |     | InputAudioContentPart
173 |     | TextContentPart
174 |     | AudioContentPart
175 | 
176 |   export interface BaseItem extends BaseObject {
177 |     /** The unique ID of the item. */
178 |     id: string
179 | 
180 |     /** Type of object. */
181 |     object?: 'realtime.item'
182 | 
183 |     /** The type of the item. */
184 |     type: ItemType
185 | 
186 |     /** The status of the item. */
187 |     status: ItemStatus
188 | 
189 |     /** The role of the message sender. */
190 |     role: ItemRole
191 | 
192 |     /** The content of the item. */
193 |     content: ContentPart[]
194 |   }
195 | 
196 |   export interface SystemItem {
197 |     role: 'system'
198 |     type: 'message'
199 |     content: InputTextContentPart[]
200 |   }
201 | 
202 |   export interface UserItem {
203 |     role: 'user'
204 |     type: 'message'
205 |     content: Array<InputTextContentPart | InputAudioContentPart>
206 |   }
207 | 
208 |   export interface AssistantItem {
209 |     role: 'assistant'
210 |     type: 'message'
211 |     content: Array<TextContentPart | AudioContentPart>
212 |   }
213 | 
214 |   export interface FunctionCallItem {
215 |     type: 'function_call'
216 | 
217 |     /** The ID of the function call. */
218 |     call_id: string
219 | 
220 |     /** The name of the function being called. */
221 |     name: string
222 | 
223 |     /** The arguments of the function call. */
224 |     arguments: string
225 |   }
226 | 
227 |   export interface FunctionCallOutputItem {
228 |     type: 'function_call_output'
229 | 
230 |     /** The ID of the function call. */
231 |     call_id: string
232 | 
233 |     /** The output of the function call. */
234 |     output: string
235 |   }
236 | 
237 |   export type Item = BaseItem &
238 |     (
239 |       | SystemItem
240 |       | UserItem
241 |       | AssistantItem
242 |       | FunctionCallItem
243 |       | FunctionCallOutputItem
244 |     )
245 | 
246 |   export type ClientItem =
247 |     | SystemItem
248 |     | UserItem
249 |     | AssistantItem
250 |     | FunctionCallItem
251 |     | FunctionCallOutputItem
252 | 
253 |   export interface Usage {
254 |     total_tokens: number
255 |     input_tokens: number
256 |     output_tokens: number
257 |   }
258 | 
259 |   export interface ResponseConfig {
260 |     /** Instructions for the model. */
261 |     instructions?: string
262 | 
263 |     /**
264 |      * The modalities for the response. To disable audio, set this to ["text"].
265 |      */
266 |     modalities?: string[]
267 | 
268 |     /**
269 |      * The voice the model uses to respond - one of alloy, echo, or shimmer.
270 |      */
271 |     voice?: Voice
272 | 
273 |     /** The format of output audio. */
274 |     output_audio_format?: AudioFormat
275 | 
276 |     /** Tools (functions) available to the model. */
277 |     tools?: ToolDefinition[]
278 | 
279 |     /** How the model chooses tools. */
280 |     tool_choice?: ToolChoice
281 | 
282 |     /** Sampling temperature for the model. */
283 |     temperature?: number
284 | 
285 |     /**
286 |      * Maximum number of output tokens for a single assistant response, inclusive
287 |      * of tool calls. Provide an integer between 1 and 4096 to limit output
288 |      * tokens, or "inf" for the maximum available tokens for a given model.
289 |      * Defaults to "inf".
290 |      */
291 |     max_output_tokens?: number | 'inf'
292 |   }
293 | 
294 |   export interface Response extends BaseObject, ResponseConfig {
295 |     /** The unique ID of the response. */
296 |     id: string
297 | 
298 |     /** Type of object. */
299 |     object: 'realtime.response'
300 | 
301 |     /** Status of the response. */
302 |     status: ResponseStatus
303 | 
304 |     /** Additional details about the status. */
305 |     status_details?:
306 |       | {
307 |           type: 'incomplete'
308 |           reason: 'interruption' | 'max_output_tokens' | 'content_filter'
309 |         }
310 |       | {
311 |           type: 'failed'
312 |           error?: Error | null
313 |         }
314 |       | null
315 | 
316 |     /** The list of output items generated by the response. */
317 |     output: Item[]
318 | 
319 |     /** Usage statistics for the response. */
320 |     usage?: Usage
321 |   }
322 | 
323 |   export interface Error {
324 |     /** The type of error. */
325 |     type: string
326 | 
327 |     /** Error code, if any. */
328 |     code?: string
329 | 
330 |     /** A human-readable error message. */
331 |     message: string
332 | 
333 |     /** Parameter related to the error, if any. */
334 |     param?: string | null
335 | 
336 |     /** Unique ID of the event, if any. */
337 |     event_id?: string
338 |   }
339 | 
340 |   export interface Conversation extends BaseObject {
341 |     /** The unique ID of the conversation. */
342 |     id: string
343 | 
344 |     /** Type of object. */
345 |     object: 'realtime.conversation'
346 |   }
347 | 
348 |   export interface RateLimit {
349 |     name: 'requests' | 'tokens' | (string & {})
350 |     limit: number
351 |     remaining: number
352 |     reset_seconds: number
353 |   }
354 | }
355 | 
356 | // NOTE: all types outside of the Realtime namespace are local to this project
357 | // and not part of the official API.
358 | 
359 | export type MaybePromise<T> = T | Promise<T>
360 | 
361 | export interface FormattedTool {
362 |   type: 'function'
363 |   name: string
364 |   call_id: string
365 |   arguments: string
366 | }
367 | 
368 | export interface FormattedProperty {
369 |   audio: Int16Array
370 |   text: string
371 |   transcript: string
372 |   tool?: FormattedTool
373 |   output?: string
374 |   file?: any
375 | }
376 | 
377 | /** Local item used strictly for convenience and not part of the API. */
378 | export type FormattedItem = Realtime.Item & {
379 |   formatted: FormattedProperty
380 | }
381 | 
382 | /** Local item used strictly for convenience and not part of the API. */
383 | export type MaybeFormattedItem = Realtime.Item & {
384 |   formatted?: FormattedProperty
385 | }
386 | 
387 | export interface EventHandlerResult {
388 |   item?: MaybeFormattedItem
389 |   delta?: {
390 |     transcript?: string
391 |     audio?: Int16Array
392 |     text?: string
393 |     arguments?: string
394 |   }
395 |   response?: Realtime.Response
396 | }
397 | 
398 | export type ToolHandler = (params: any) => MaybePromise<any>
399 | 


--------------------------------------------------------------------------------
/src/utils.ts:
--------------------------------------------------------------------------------
  1 | import { customAlphabet } from 'nanoid'
  2 | 
  3 | export const isBrowser = !!(globalThis as any).document
  4 | 
  5 | export function hasNativeWebSocket(): boolean {
  6 |   return !!globalThis.WebSocket
  7 | }
  8 | 
  9 | export function getEnv(name: string): string | undefined {
 10 |   try {
 11 |     return typeof process !== 'undefined'
 12 |       ? // eslint-disable-next-line no-process-env
 13 |         process.env?.[name]
 14 |       : undefined
 15 |   } catch {
 16 |     return undefined
 17 |   }
 18 | }
 19 | 
 20 | export function assert(
 21 |   value: unknown,
 22 |   message?: string | Error
 23 | ): asserts value {
 24 |   if (value) {
 25 |     return
 26 |   }
 27 | 
 28 |   if (!message) {
 29 |     throw new Error('Assertion failed')
 30 |   }
 31 | 
 32 |   throw typeof message === 'string' ? new Error(message) : message
 33 | }
 34 | 
 35 | /**
 36 |  * Converts Float32Array of amplitude data to ArrayBuffer in Int16Array format.
 37 |  */
 38 | export function floatTo16BitPCM(float32Array: Float32Array): ArrayBuffer {
 39 |   const buffer = new ArrayBuffer(float32Array.length * 2)
 40 |   const view = new DataView(buffer)
 41 |   let offset = 0
 42 | 
 43 |   for (let i = 0; i < float32Array.length; i++, offset += 2) {
 44 |     const s = Math.max(-1, Math.min(1, float32Array[i]!))
 45 |     view.setInt16(offset, s < 0 ? s * 0x80_00 : s * 0x7f_ff, true)
 46 |   }
 47 | 
 48 |   return buffer
 49 | }
 50 | 
 51 | /**
 52 |  * Converts a base64 string to an ArrayBuffer.
 53 |  */
 54 | export function base64ToArrayBuffer(base64: string): ArrayBuffer {
 55 |   const binaryString = atob(base64)
 56 |   const len = binaryString.length
 57 |   const bytes = new Uint8Array(len)
 58 | 
 59 |   for (let i = 0; i < len; i++) {
 60 |     // eslint-disable-next-line unicorn/prefer-code-point
 61 |     bytes[i] = binaryString.charCodeAt(i)
 62 |   }
 63 | 
 64 |   return bytes.buffer
 65 | }
 66 | 
 67 | /**
 68 |  * Converts an ArrayBuffer, Int16Array or Float32Array to a base64 string.
 69 |  */
 70 | export function arrayBufferToBase64(
 71 |   arrayBuffer: ArrayBuffer | Int16Array | Float32Array
 72 | ): string {
 73 |   if (arrayBuffer instanceof Float32Array) {
 74 |     arrayBuffer = floatTo16BitPCM(arrayBuffer)
 75 |   } else if (arrayBuffer instanceof Int16Array) {
 76 |     arrayBuffer = arrayBuffer.buffer
 77 |   }
 78 | 
 79 |   const bytes = new Uint8Array(arrayBuffer)
 80 |   const chunkSize = 0x80_00 // 32KB chunk size
 81 |   let binary = ''
 82 | 
 83 |   for (let i = 0; i < bytes.length; i += chunkSize) {
 84 |     const chunk = bytes.subarray(i, i + chunkSize)
 85 |     binary += String.fromCharCode.apply(null, chunk as any)
 86 |   }
 87 | 
 88 |   return btoa(binary)
 89 | }
 90 | 
 91 | /**
 92 |  * Merge two Int16Arrays from Int16Arrays or ArrayBuffers.
 93 |  */
 94 | export function mergeInt16Arrays(
 95 |   left: ArrayBuffer | Int16Array,
 96 |   right: ArrayBuffer | Int16Array
 97 | ): Int16Array {
 98 |   if (left instanceof ArrayBuffer) {
 99 |     left = new Int16Array(left)
100 |   }
101 | 
102 |   if (right instanceof ArrayBuffer) {
103 |     right = new Int16Array(right)
104 |   }
105 | 
106 |   if (!(left instanceof Int16Array) || !(right instanceof Int16Array)) {
107 |     throw new TypeError(`Both items must be Int16Array`)
108 |   }
109 | 
110 |   const newValues = new Int16Array(left.length + right.length)
111 |   for (const [i, element] of left.entries()) {
112 |     newValues[i] = element
113 |   }
114 | 
115 |   for (const [j, element] of right.entries()) {
116 |     newValues[left.length + j] = element
117 |   }
118 | 
119 |   return newValues
120 | }
121 | 
122 | // base58; non-repeating chars
123 | const alphabet = '123456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz'
124 | const generateIdImpl = customAlphabet(alphabet, 21)
125 | 
126 | /**
127 |  * Generates an id to send with events and messages.
128 |  */
129 | export function generateId(prefix: string, size = 21): string {
130 |   const id = generateIdImpl(size)
131 |   return `${prefix}${id}`
132 | }
133 | 
134 | export const sleep = (t: number) =>
135 |   new Promise<void>((r) => setTimeout(() => r(), t))
136 | 
137 | /**
138 |  * Trims an event's content for debugging purposes to make logs easier to read.
139 |  */
140 | export function trimDebugEvent(
141 |   event?: any,
142 |   {
143 |     maxLimit = 200
144 |   }: {
145 |     maxLimit?: number
146 |   } = {}
147 | ): any {
148 |   if (!event) return event
149 | 
150 |   const e = structuredClone(event)
151 | 
152 |   if (e.item?.content?.find((c: any) => c.audio)) {
153 |     e.item.content = e.item.content.map(({ audio, c }: any) => {
154 |       if (audio) {
155 |         return {
156 |           ...c,
157 |           audio: '<base64 redacted...>'
158 |         }
159 |       } else {
160 |         return c
161 |       }
162 |     })
163 |   }
164 | 
165 |   if (e.audio) {
166 |     e.audio = '<audio redacted...>'
167 |   }
168 | 
169 |   if (e.delta?.length > maxLimit) {
170 |     e.delta = e.delta.slice(0, maxLimit) + '... (truncated)'
171 |   }
172 | 
173 |   return e
174 | }
175 | 


--------------------------------------------------------------------------------
/tsconfig.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "$schema": "https://json.schemastore.org/tsconfig",
 3 |   "compilerOptions": {
 4 |     "lib": ["esnext", "dom.iterable"],
 5 |     "module": "esnext",
 6 |     "moduleResolution": "bundler",
 7 |     "moduleDetection": "force",
 8 |     "noEmit": true,
 9 |     "target": "es2020",
10 |     "outDir": "dist",
11 |     "rootDir": ".",
12 | 
13 |     "allowImportingTsExtensions": false,
14 |     "allowJs": true,
15 |     "esModuleInterop": true,
16 |     "forceConsistentCasingInFileNames": true,
17 |     "incremental": false,
18 |     "isolatedModules": true,
19 |     "jsx": "preserve",
20 |     "noUncheckedIndexedAccess": true,
21 |     "resolveJsonModule": true,
22 |     "skipLibCheck": true,
23 |     "sourceMap": true,
24 |     "strict": true,
25 |     "useDefineForClassFields": true
26 |     // "verbatimModuleSyntax": true
27 |   },
28 |   "include": ["src", "examples/node"]
29 | }
30 | 


--------------------------------------------------------------------------------
/tsup.config.ts:
--------------------------------------------------------------------------------
 1 | import { defineConfig } from 'tsup'
 2 | 
 3 | export default defineConfig([
 4 |   {
 5 |     entry: ['src/index.ts', 'src/node/index.ts'],
 6 |     outDir: 'dist',
 7 |     target: 'node18',
 8 |     platform: 'node',
 9 |     format: ['esm'],
10 |     clean: true,
11 |     splitting: false,
12 |     sourcemap: true,
13 |     minify: false,
14 |     shims: true,
15 |     dts: true
16 |   }
17 | ])
18 | 


--------------------------------------------------------------------------------