├── .gitignore ├── package.json ├── server.js ├── index.html ├── public └── vite.svg ├── src ├── style.css └── main.js └── README.md /.gitignore: -------------------------------------------------------------------------------- 1 | # Logs 2 | logs 3 | *.log 4 | npm-debug.log* 5 | yarn-debug.log* 6 | yarn-error.log* 7 | pnpm-debug.log* 8 | lerna-debug.log* 9 | 10 | node_modules 11 | dist 12 | dist-ssr 13 | *.local 14 | 15 | # Editor directories and files 16 | .vscode/* 17 | !.vscode/extensions.json 18 | .idea 19 | .DS_Store 20 | *.suo 21 | *.ntvs* 22 | *.njsproj 23 | *.sln 24 | *.sw? 25 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "openai-webrtc-play", 3 | "private": true, 4 | "version": "0.0.0", 5 | "type": "module", 6 | "scripts": { 7 | "dev": "vite", 8 | "server": "node server.js", 9 | "build": "vite build", 10 | "preview": "vite preview" 11 | }, 12 | "dependencies": { 13 | "cors": "^2.8.5", 14 | "express": "^4.21.2" 15 | }, 16 | "devDependencies": { 17 | "vite": "^5.0.8" 18 | } 19 | } 20 | -------------------------------------------------------------------------------- /server.js: -------------------------------------------------------------------------------- 1 | import express from "express"; 2 | import cors from "cors"; 3 | 4 | const app = express(); 5 | app.use(cors()); 6 | 7 | // 获取 ephemeral token 的端点 8 | app.get("/session", async (req, res) => { 9 | try { 10 | const response = await fetch( 11 | "https://api.openai.com/v1/realtime/sessions", 12 | { 13 | method: "POST", 14 | headers: { 15 | Authorization: `Bearer ${process.env.OPENAI_API_KEY}`, 16 | "Content-Type": "application/json", 17 | }, 18 | body: JSON.stringify({ 19 | model: "gpt-4o-realtime-preview-2024-12-17", 20 | voice: "verse", 21 | }), 22 | } 23 | ); 24 | 25 | if (!response.ok) { 26 | throw new Error(`HTTP error! status: ${response.status}`); 27 | } 28 | 29 | const data = await response.json(); 30 | res.json(data); 31 | } catch (error) { 32 | console.error("Error:", error); 33 | res.status(500).json({ error: error.message }); 34 | } 35 | }); 36 | 37 | const PORT = process.env.PORT || 3000; 38 | app.listen(PORT, () => { 39 | console.log(`Server is running on port ${PORT}`); 40 | }); 41 | -------------------------------------------------------------------------------- /index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | WebRTC Audio Chat 8 | 32 | 33 | 34 |

35 |

WebRTC Audio Chat

36 |

Ready to connect...

37 | 38 | 39 |

40 | 41 | 42 | 43 | -------------------------------------------------------------------------------- /public/vite.svg: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/style.css: -------------------------------------------------------------------------------- 1 | :root { 2 | font-family: Inter, system-ui, Avenir, Helvetica, Arial, sans-serif; 3 | line-height: 1.5; 4 | font-weight: 400; 5 | color-scheme: light dark; 6 | color: rgba(255, 255, 255, 0.87); 7 | background-color: #242424; 8 | } 9 | 10 | body { 11 | margin: 0; 12 | display: flex; 13 | place-items: center; 14 | min-width: 320px; 15 | min-height: 100vh; 16 | } 17 | 18 | h1 { 19 | font-size: 2.5em; 20 | line-height: 1.1; 21 | margin-bottom: 1rem; 22 | } 23 | 24 | #app { 25 | max-width: 800px; 26 | margin: 0 auto; 27 | padding: 2rem; 28 | text-align: center; 29 | } 30 | 31 | #status { 32 | margin: 1rem; 33 | padding: 1rem; 34 | border-radius: 8px; 35 | background-color: rgba(255, 255, 255, 0.1); 36 | } 37 | 38 | button { 39 | border-radius: 8px; 40 | border: 1px solid transparent; 41 | padding: 0.8em 1.6em; 42 | font-size: 1em; 43 | font-weight: 500; 44 | font-family: inherit; 45 | background-color: #1a1a1a; 46 | cursor: pointer; 47 | transition: border-color 0.25s; 48 | margin: 0.5rem; 49 | } 50 | 51 | button:hover { 52 | border-color: #646cff; 53 | } 54 | 55 | button:focus, 56 | button:focus-visible { 57 | outline: 4px auto -webkit-focus-ring-color; 58 | } 59 | 60 | button:disabled { 61 | opacity: 0.5; 62 | cursor: not-allowed; 63 | } 64 | 65 | @media (prefers-color-scheme: light) { 66 | :root { 67 | color: #213547; 68 | background-color: #ffffff; 69 | } 70 | button { 71 | background-color: #f9f9f9; 72 | } 73 | #status { 74 | background-color: rgba(0, 0, 0, 0.05); 75 | } 76 | } 77 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # OpenAI WebRTC Audio Chat 2 | 3 | A simple web application that demonstrates real-time audio conversation with OpenAI's Realtime API using WebRTC. This application allows users to have voice conversations with OpenAI's language models. 4 | 5 | ## Features 6 | 7 | - Real-time audio streaming using WebRTC 8 | - Two-way audio communication 9 | - Simple and intuitive UI 10 | - Support for both light and dark themes 11 | - Real-time status updates 12 | 13 | ## Prerequisites 14 | 15 | - Node.js (v14 or higher) 16 | - OpenAI API key with access to Realtime API 17 | - Modern web browser with WebRTC support 18 | 19 | ## Setup 20 | 21 | 1. Clone the repository: 22 | ```bash 23 | git clone https://github.com/realtime-ai/openai-realtime-webrtc-demo.git 24 | cd openai-realtime-webrtc-demo 25 | ``` 26 | 27 | 2. Install dependencies: 28 | ```bash 29 | npm install 30 | ``` 31 | 32 | 3. Set up your OpenAI API key: 33 | ```bash 34 | export OPENAI_API_KEY=your_api_key_here 35 | ``` 36 | 37 | ## Running the Application 38 | 39 | 1. Start the backend server: 40 | ```bash 41 | npm run server 42 | ``` 43 | 44 | 2. In a new terminal, start the frontend development server: 45 | ```bash 46 | npm run dev 47 | ``` 48 | 49 | 3. Open your browser and navigate to the URL shown in the terminal (typically http://localhost:5173) 50 | 51 | ## Usage 52 | 53 | 1. When the page loads, it will automatically initialize the WebRTC connection 54 | 2. Click "Start Recording" to begin a conversation 55 | 3. Speak into your microphone 56 | 4. The AI's responses will play through your speakers 57 | 5. Click "Stop Recording" to end the conversation 58 | 59 | ## Technical Details 60 | 61 | - Frontend: Vanilla JavaScript with Vite 62 | - Backend: Node.js with Express 63 | - API: OpenAI Realtime API with WebRTC 64 | - Real-time communication: WebRTC data channels and media streams 65 | 66 | ## Security Notes 67 | 68 | - The application uses ephemeral API keys for secure client-side connections 69 | - The main OpenAI API key is only used server-side 70 | - All communication is handled through secure WebRTC channels 71 | 72 | ## Development 73 | 74 | The project uses Vite for development and building. Available commands: 75 | 76 | ```bash 77 | npm run dev # Start development server 78 | npm run build # Build for production 79 | npm run preview # Preview production build 80 | npm run server # Start backend server 81 | ``` 82 | 83 | ## License 84 | 85 | MIT 86 | -------------------------------------------------------------------------------- /src/main.js: -------------------------------------------------------------------------------- 1 | import "./style.css"; 2 | 3 | const startBtn = document.getElementById("startBtn"); 4 | const stopBtn = document.getElementById("stopBtn"); 5 | const statusDiv = document.getElementById("status"); 6 | 7 | let peerConnection = null; 8 | let dataChannel = null; 9 | 10 | async function init() { 11 | try { 12 | // 从服务器获取 ephemeral key 13 | const tokenResponse = await fetch("http://localhost:3000/session"); 14 | if (!tokenResponse.ok) { 15 | throw new Error(`Failed to get token: ${tokenResponse.status}`); 16 | } 17 | const tokenData = await tokenResponse.json(); 18 | const EPHEMERAL_KEY = tokenData.client_secret.value; 19 | 20 | // 创建 WebRTC 连接 21 | peerConnection = new RTCPeerConnection(); 22 | 23 | // 添加连接状态监听 24 | peerConnection.onconnectionstatechange = () => { 25 | updateStatus(`Connection state: ${peerConnection.connectionState}`); 26 | }; 27 | 28 | peerConnection.oniceconnectionstatechange = () => { 29 | updateStatus( 30 | `ICE connection state: ${peerConnection.iceConnectionState}` 31 | ); 32 | }; 33 | 34 | // 设置音频元素播放模型返回的音频 35 | const audioEl = document.createElement("audio"); 36 | audioEl.autoplay = true; 37 | peerConnection.ontrack = (e) => { 38 | audioEl.srcObject = e.streams[0]; 39 | }; 40 | 41 | // 设置数据通道 42 | dataChannel = peerConnection.createDataChannel("oai-events"); 43 | dataChannel.addEventListener("message", (e) => { 44 | const event = JSON.parse(e.data); 45 | console.log("Received event:", event); 46 | updateStatus(`Received: ${event.type}`); 47 | }); 48 | 49 | // 获取麦克风权限并添加音轨 50 | const stream = await navigator.mediaDevices.getUserMedia({ audio: true }); 51 | peerConnection.addTrack(stream.getTracks()[0], stream); 52 | 53 | // 创建并设置本地描述 54 | const offer = await peerConnection.createOffer(); 55 | await peerConnection.setLocalDescription(offer); 56 | 57 | // 连接到 OpenAI Realtime API 58 | const baseUrl = "https://api.openai.com/v1/realtime"; 59 | const model = "gpt-4o-realtime-preview-2024-12-17"; 60 | 61 | try { 62 | const sdpResponse = await fetch(`${baseUrl}?model=${model}`, { 63 | method: "POST", 64 | body: offer.sdp, 65 | headers: { 66 | Authorization: `Bearer ${EPHEMERAL_KEY}`, 67 | "Content-Type": "application/sdp", 68 | }, 69 | }); 70 | 71 | if (!sdpResponse.ok) { 72 | throw new Error(`HTTP error! status: ${sdpResponse.status}`); 73 | } 74 | 75 | const answer = { 76 | type: "answer", 77 | sdp: await sdpResponse.text(), 78 | }; 79 | await peerConnection.setRemoteDescription(answer); 80 | updateStatus("Connected to OpenAI Realtime API"); 81 | startBtn.disabled = false; 82 | } catch (error) { 83 | updateStatus(`Connection error: ${error.message}`); 84 | console.error("Connection error:", error); 85 | } 86 | } catch (error) { 87 | updateStatus(`Initialization error: ${error.message}`); 88 | console.error("Initialization error:", error); 89 | } 90 | } 91 | 92 | function updateStatus(message) { 93 | const timestamp = new Date().toLocaleTimeString(); 94 | statusDiv.innerHTML += `

[${timestamp}] ${message}

`; 95 | console.log(message); 96 | } 97 | 98 | startBtn.addEventListener("click", async () => { 99 | try { 100 | if (dataChannel && dataChannel.readyState === "open") { 101 | const event = { 102 | type: "response.create", 103 | response: { 104 | modalities: ["text", "audio"], 105 | instructions: "Hello, how can I help you today?", 106 | }, 107 | }; 108 | dataChannel.send(JSON.stringify(event)); 109 | updateStatus("Started conversation"); 110 | startBtn.disabled = true; 111 | stopBtn.disabled = false; 112 | } 113 | } catch (error) { 114 | updateStatus(`Error starting conversation: ${error.message}`); 115 | } 116 | }); 117 | 118 | stopBtn.addEventListener("click", () => { 119 | try { 120 | if (dataChannel && dataChannel.readyState === "open") { 121 | dataChannel.send(JSON.stringify({ type: "response.stop" })); 122 | updateStatus("Stopped conversation"); 123 | startBtn.disabled = false; 124 | stopBtn.disabled = true; 125 | } 126 | } catch (error) { 127 | updateStatus(`Error stopping conversation: ${error.message}`); 128 | } 129 | }); 130 | 131 | // 初始化应用 132 | init(); 133 | --------------------------------------------------------------------------------