├── .gitignore
├── package.json
├── server.js
├── index.html
├── public
└── vite.svg
├── src
├── style.css
└── main.js
└── README.md
/.gitignore:
--------------------------------------------------------------------------------
1 | # Logs
2 | logs
3 | *.log
4 | npm-debug.log*
5 | yarn-debug.log*
6 | yarn-error.log*
7 | pnpm-debug.log*
8 | lerna-debug.log*
9 |
10 | node_modules
11 | dist
12 | dist-ssr
13 | *.local
14 |
15 | # Editor directories and files
16 | .vscode/*
17 | !.vscode/extensions.json
18 | .idea
19 | .DS_Store
20 | *.suo
21 | *.ntvs*
22 | *.njsproj
23 | *.sln
24 | *.sw?
25 |
--------------------------------------------------------------------------------
/package.json:
--------------------------------------------------------------------------------
1 | {
2 | "name": "openai-webrtc-play",
3 | "private": true,
4 | "version": "0.0.0",
5 | "type": "module",
6 | "scripts": {
7 | "dev": "vite",
8 | "server": "node server.js",
9 | "build": "vite build",
10 | "preview": "vite preview"
11 | },
12 | "dependencies": {
13 | "cors": "^2.8.5",
14 | "express": "^4.21.2"
15 | },
16 | "devDependencies": {
17 | "vite": "^5.0.8"
18 | }
19 | }
20 |
--------------------------------------------------------------------------------
/server.js:
--------------------------------------------------------------------------------
1 | import express from "express";
2 | import cors from "cors";
3 |
4 | const app = express();
5 | app.use(cors());
6 |
7 | // 获取 ephemeral token 的端点
8 | app.get("/session", async (req, res) => {
9 | try {
10 | const response = await fetch(
11 | "https://api.openai.com/v1/realtime/sessions",
12 | {
13 | method: "POST",
14 | headers: {
15 | Authorization: `Bearer ${process.env.OPENAI_API_KEY}`,
16 | "Content-Type": "application/json",
17 | },
18 | body: JSON.stringify({
19 | model: "gpt-4o-realtime-preview-2024-12-17",
20 | voice: "verse",
21 | }),
22 | }
23 | );
24 |
25 | if (!response.ok) {
26 | throw new Error(`HTTP error! status: ${response.status}`);
27 | }
28 |
29 | const data = await response.json();
30 | res.json(data);
31 | } catch (error) {
32 | console.error("Error:", error);
33 | res.status(500).json({ error: error.message });
34 | }
35 | });
36 |
37 | const PORT = process.env.PORT || 3000;
38 | app.listen(PORT, () => {
39 | console.log(`Server is running on port ${PORT}`);
40 | });
41 |
--------------------------------------------------------------------------------
/index.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 | WebRTC Audio Chat
8 |
32 |
33 |
34 |
35 |
WebRTC Audio Chat
36 |
Ready to connect...
37 |
38 |
39 |
40 |
41 |
42 |
43 |
--------------------------------------------------------------------------------
/public/vite.svg:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/src/style.css:
--------------------------------------------------------------------------------
1 | :root {
2 | font-family: Inter, system-ui, Avenir, Helvetica, Arial, sans-serif;
3 | line-height: 1.5;
4 | font-weight: 400;
5 | color-scheme: light dark;
6 | color: rgba(255, 255, 255, 0.87);
7 | background-color: #242424;
8 | }
9 |
10 | body {
11 | margin: 0;
12 | display: flex;
13 | place-items: center;
14 | min-width: 320px;
15 | min-height: 100vh;
16 | }
17 |
18 | h1 {
19 | font-size: 2.5em;
20 | line-height: 1.1;
21 | margin-bottom: 1rem;
22 | }
23 |
24 | #app {
25 | max-width: 800px;
26 | margin: 0 auto;
27 | padding: 2rem;
28 | text-align: center;
29 | }
30 |
31 | #status {
32 | margin: 1rem;
33 | padding: 1rem;
34 | border-radius: 8px;
35 | background-color: rgba(255, 255, 255, 0.1);
36 | }
37 |
38 | button {
39 | border-radius: 8px;
40 | border: 1px solid transparent;
41 | padding: 0.8em 1.6em;
42 | font-size: 1em;
43 | font-weight: 500;
44 | font-family: inherit;
45 | background-color: #1a1a1a;
46 | cursor: pointer;
47 | transition: border-color 0.25s;
48 | margin: 0.5rem;
49 | }
50 |
51 | button:hover {
52 | border-color: #646cff;
53 | }
54 |
55 | button:focus,
56 | button:focus-visible {
57 | outline: 4px auto -webkit-focus-ring-color;
58 | }
59 |
60 | button:disabled {
61 | opacity: 0.5;
62 | cursor: not-allowed;
63 | }
64 |
65 | @media (prefers-color-scheme: light) {
66 | :root {
67 | color: #213547;
68 | background-color: #ffffff;
69 | }
70 | button {
71 | background-color: #f9f9f9;
72 | }
73 | #status {
74 | background-color: rgba(0, 0, 0, 0.05);
75 | }
76 | }
77 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # OpenAI WebRTC Audio Chat
2 |
3 | A simple web application that demonstrates real-time audio conversation with OpenAI's Realtime API using WebRTC. This application allows users to have voice conversations with OpenAI's language models.
4 |
5 | ## Features
6 |
7 | - Real-time audio streaming using WebRTC
8 | - Two-way audio communication
9 | - Simple and intuitive UI
10 | - Support for both light and dark themes
11 | - Real-time status updates
12 |
13 | ## Prerequisites
14 |
15 | - Node.js (v14 or higher)
16 | - OpenAI API key with access to Realtime API
17 | - Modern web browser with WebRTC support
18 |
19 | ## Setup
20 |
21 | 1. Clone the repository:
22 | ```bash
23 | git clone https://github.com/realtime-ai/openai-realtime-webrtc-demo.git
24 | cd openai-realtime-webrtc-demo
25 | ```
26 |
27 | 2. Install dependencies:
28 | ```bash
29 | npm install
30 | ```
31 |
32 | 3. Set up your OpenAI API key:
33 | ```bash
34 | export OPENAI_API_KEY=your_api_key_here
35 | ```
36 |
37 | ## Running the Application
38 |
39 | 1. Start the backend server:
40 | ```bash
41 | npm run server
42 | ```
43 |
44 | 2. In a new terminal, start the frontend development server:
45 | ```bash
46 | npm run dev
47 | ```
48 |
49 | 3. Open your browser and navigate to the URL shown in the terminal (typically http://localhost:5173)
50 |
51 | ## Usage
52 |
53 | 1. When the page loads, it will automatically initialize the WebRTC connection
54 | 2. Click "Start Recording" to begin a conversation
55 | 3. Speak into your microphone
56 | 4. The AI's responses will play through your speakers
57 | 5. Click "Stop Recording" to end the conversation
58 |
59 | ## Technical Details
60 |
61 | - Frontend: Vanilla JavaScript with Vite
62 | - Backend: Node.js with Express
63 | - API: OpenAI Realtime API with WebRTC
64 | - Real-time communication: WebRTC data channels and media streams
65 |
66 | ## Security Notes
67 |
68 | - The application uses ephemeral API keys for secure client-side connections
69 | - The main OpenAI API key is only used server-side
70 | - All communication is handled through secure WebRTC channels
71 |
72 | ## Development
73 |
74 | The project uses Vite for development and building. Available commands:
75 |
76 | ```bash
77 | npm run dev # Start development server
78 | npm run build # Build for production
79 | npm run preview # Preview production build
80 | npm run server # Start backend server
81 | ```
82 |
83 | ## License
84 |
85 | MIT
86 |
--------------------------------------------------------------------------------
/src/main.js:
--------------------------------------------------------------------------------
1 | import "./style.css";
2 |
3 | const startBtn = document.getElementById("startBtn");
4 | const stopBtn = document.getElementById("stopBtn");
5 | const statusDiv = document.getElementById("status");
6 |
7 | let peerConnection = null;
8 | let dataChannel = null;
9 |
10 | async function init() {
11 | try {
12 | // 从服务器获取 ephemeral key
13 | const tokenResponse = await fetch("http://localhost:3000/session");
14 | if (!tokenResponse.ok) {
15 | throw new Error(`Failed to get token: ${tokenResponse.status}`);
16 | }
17 | const tokenData = await tokenResponse.json();
18 | const EPHEMERAL_KEY = tokenData.client_secret.value;
19 |
20 | // 创建 WebRTC 连接
21 | peerConnection = new RTCPeerConnection();
22 |
23 | // 添加连接状态监听
24 | peerConnection.onconnectionstatechange = () => {
25 | updateStatus(`Connection state: ${peerConnection.connectionState}`);
26 | };
27 |
28 | peerConnection.oniceconnectionstatechange = () => {
29 | updateStatus(
30 | `ICE connection state: ${peerConnection.iceConnectionState}`
31 | );
32 | };
33 |
34 | // 设置音频元素播放模型返回的音频
35 | const audioEl = document.createElement("audio");
36 | audioEl.autoplay = true;
37 | peerConnection.ontrack = (e) => {
38 | audioEl.srcObject = e.streams[0];
39 | };
40 |
41 | // 设置数据通道
42 | dataChannel = peerConnection.createDataChannel("oai-events");
43 | dataChannel.addEventListener("message", (e) => {
44 | const event = JSON.parse(e.data);
45 | console.log("Received event:", event);
46 | updateStatus(`Received: ${event.type}`);
47 | });
48 |
49 | // 获取麦克风权限并添加音轨
50 | const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
51 | peerConnection.addTrack(stream.getTracks()[0], stream);
52 |
53 | // 创建并设置本地描述
54 | const offer = await peerConnection.createOffer();
55 | await peerConnection.setLocalDescription(offer);
56 |
57 | // 连接到 OpenAI Realtime API
58 | const baseUrl = "https://api.openai.com/v1/realtime";
59 | const model = "gpt-4o-realtime-preview-2024-12-17";
60 |
61 | try {
62 | const sdpResponse = await fetch(`${baseUrl}?model=${model}`, {
63 | method: "POST",
64 | body: offer.sdp,
65 | headers: {
66 | Authorization: `Bearer ${EPHEMERAL_KEY}`,
67 | "Content-Type": "application/sdp",
68 | },
69 | });
70 |
71 | if (!sdpResponse.ok) {
72 | throw new Error(`HTTP error! status: ${sdpResponse.status}`);
73 | }
74 |
75 | const answer = {
76 | type: "answer",
77 | sdp: await sdpResponse.text(),
78 | };
79 | await peerConnection.setRemoteDescription(answer);
80 | updateStatus("Connected to OpenAI Realtime API");
81 | startBtn.disabled = false;
82 | } catch (error) {
83 | updateStatus(`Connection error: ${error.message}`);
84 | console.error("Connection error:", error);
85 | }
86 | } catch (error) {
87 | updateStatus(`Initialization error: ${error.message}`);
88 | console.error("Initialization error:", error);
89 | }
90 | }
91 |
92 | function updateStatus(message) {
93 | const timestamp = new Date().toLocaleTimeString();
94 | statusDiv.innerHTML += `[${timestamp}] ${message}
`;
95 | console.log(message);
96 | }
97 |
98 | startBtn.addEventListener("click", async () => {
99 | try {
100 | if (dataChannel && dataChannel.readyState === "open") {
101 | const event = {
102 | type: "response.create",
103 | response: {
104 | modalities: ["text", "audio"],
105 | instructions: "Hello, how can I help you today?",
106 | },
107 | };
108 | dataChannel.send(JSON.stringify(event));
109 | updateStatus("Started conversation");
110 | startBtn.disabled = true;
111 | stopBtn.disabled = false;
112 | }
113 | } catch (error) {
114 | updateStatus(`Error starting conversation: ${error.message}`);
115 | }
116 | });
117 |
118 | stopBtn.addEventListener("click", () => {
119 | try {
120 | if (dataChannel && dataChannel.readyState === "open") {
121 | dataChannel.send(JSON.stringify({ type: "response.stop" }));
122 | updateStatus("Stopped conversation");
123 | startBtn.disabled = false;
124 | stopBtn.disabled = true;
125 | }
126 | } catch (error) {
127 | updateStatus(`Error stopping conversation: ${error.message}`);
128 | }
129 | });
130 |
131 | // 初始化应用
132 | init();
133 |
--------------------------------------------------------------------------------