├── .github └── FUNDING.yml ├── front ├── .idea │ ├── .gitignore │ ├── vcs.xml │ ├── misc.xml │ ├── modules.xml │ └── front.iml ├── styles.css ├── index.html └── script.js ├── java_backend ├── target │ └── classes │ │ ├── application.properties │ │ ├── org │ │ └── example │ │ │ ├── Application.class │ │ │ └── wep │ │ │ └── WebRTCController.class │ │ ├── desktop.ini │ │ └── application-test.properties ├── src │ ├── main │ │ ├── resources │ │ │ ├── application.properties │ │ │ ├── application-test.properties │ │ │ └── desktop.ini │ │ ├── desktop.ini │ │ └── java │ │ │ ├── desktop.ini │ │ │ └── org │ │ │ ├── desktop.ini │ │ │ └── example │ │ │ ├── desktop.ini │ │ │ ├── wep │ │ │ ├── desktop.ini │ │ │ ├── WebRTCController.java │ │ │ └── WebRTCWithEphemeralKeyController.java │ │ │ └── Application.java │ ├── desktop.ini │ └── test │ │ ├── desktop.ini │ │ └── java │ │ └── desktop.ini ├── .idea │ ├── vcs.xml │ ├── .gitignore │ ├── encodings.xml │ ├── misc.xml │ ├── compiler.xml │ └── jarRepositories.xml ├── desktop.ini └── pom.xml ├── .idea ├── vcs.xml ├── .gitignore ├── misc.xml ├── modules.xml └── FrankFu.iml ├── python_backend ├── .idea │ ├── vcs.xml │ ├── .gitignore │ ├── inspectionProfiles │ │ ├── profiles_settings.xml │ │ └── Project_Default.xml │ ├── modules.xml │ ├── misc.xml │ └── python_backend.iml ├── WebRTC.py └── WebRTCWithEphemeralKey.py ├── LICENSE └── Readme.md /.github/FUNDING.yml: -------------------------------------------------------------------------------- 1 | 2 | buy_me_a_coffee: fuwei007 3 | -------------------------------------------------------------------------------- /front/.idea/.gitignore: -------------------------------------------------------------------------------- 1 | # Default ignored files 2 | /shelf/ 3 | /workspace.xml 4 | -------------------------------------------------------------------------------- /java_backend/target/classes/application.properties: -------------------------------------------------------------------------------- 1 | spring.profiles.active=test 2 | -------------------------------------------------------------------------------- /java_backend/src/main/resources/application.properties: -------------------------------------------------------------------------------- 1 | spring.profiles.active=test 2 | -------------------------------------------------------------------------------- /java_backend/src/main/resources/application-test.properties: -------------------------------------------------------------------------------- 1 | server.port=8813 2 | baseUrl=http://localhost:8813 3 | 4 | 5 | apiKey=your key here 6 | 7 | 8 | -------------------------------------------------------------------------------- /java_backend/target/classes/org/example/Application.class: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fuwei007/OpenAIRealTimeAPIWebRTC/HEAD/java_backend/target/classes/org/example/Application.class -------------------------------------------------------------------------------- /.idea/vcs.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | -------------------------------------------------------------------------------- /java_backend/target/classes/org/example/wep/WebRTCController.class: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fuwei007/OpenAIRealTimeAPIWebRTC/HEAD/java_backend/target/classes/org/example/wep/WebRTCController.class -------------------------------------------------------------------------------- /front/.idea/vcs.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | -------------------------------------------------------------------------------- /.idea/.gitignore: -------------------------------------------------------------------------------- 1 | # Default ignored files 2 | /shelf/ 3 | /workspace.xml 4 | # Editor-based HTTP Client requests 5 | /httpRequests/ 6 | # Datasource local storage ignored files 7 | /dataSources/ 8 | /dataSources.local.xml 9 | -------------------------------------------------------------------------------- /java_backend/.idea/vcs.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | -------------------------------------------------------------------------------- /python_backend/.idea/vcs.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | -------------------------------------------------------------------------------- /java_backend/.idea/.gitignore: -------------------------------------------------------------------------------- 1 | # Default ignored files 2 | /shelf/ 3 | /workspace.xml 4 | # Editor-based HTTP Client requests 5 | /httpRequests/ 6 | # Datasource local storage ignored files 7 | /dataSources/ 8 | /dataSources.local.xml 9 | -------------------------------------------------------------------------------- /python_backend/.idea/.gitignore: -------------------------------------------------------------------------------- 1 | # Default ignored files 2 | /shelf/ 3 | /workspace.xml 4 | # Editor-based HTTP Client requests 5 | /httpRequests/ 6 | # Datasource local storage ignored files 7 | /dataSources/ 8 | /dataSources.local.xml 9 | -------------------------------------------------------------------------------- /python_backend/.idea/inspectionProfiles/profiles_settings.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 6 | -------------------------------------------------------------------------------- /.idea/misc.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | -------------------------------------------------------------------------------- /java_backend/desktop.ini: -------------------------------------------------------------------------------- 1 | [.ShellClassInfo] 2 | ConfirmFileOp=0 3 | IconResource=C:\Program Files\Google\Drive File Stream\101.0.3.0\GoogleDriveFS.exe,26 4 | -------------------------------------------------------------------------------- /java_backend/src/desktop.ini: -------------------------------------------------------------------------------- 1 | [.ShellClassInfo] 2 | ConfirmFileOp=0 3 | IconResource=C:\Program Files\Google\Drive File Stream\101.0.3.0\GoogleDriveFS.exe,26 4 | -------------------------------------------------------------------------------- /java_backend/src/main/desktop.ini: -------------------------------------------------------------------------------- 1 | [.ShellClassInfo] 2 | ConfirmFileOp=0 3 | IconResource=C:\Program Files\Google\Drive File Stream\101.0.3.0\GoogleDriveFS.exe,26 4 | -------------------------------------------------------------------------------- /java_backend/src/test/desktop.ini: -------------------------------------------------------------------------------- 1 | [.ShellClassInfo] 2 | ConfirmFileOp=0 3 | IconResource=C:\Program Files\Google\Drive File Stream\101.0.3.0\GoogleDriveFS.exe,26 4 | -------------------------------------------------------------------------------- /java_backend/src/main/java/desktop.ini: -------------------------------------------------------------------------------- 1 | [.ShellClassInfo] 2 | ConfirmFileOp=0 3 | IconResource=C:\Program Files\Google\Drive File Stream\101.0.3.0\GoogleDriveFS.exe,26 4 | -------------------------------------------------------------------------------- /java_backend/src/test/java/desktop.ini: -------------------------------------------------------------------------------- 1 | [.ShellClassInfo] 2 | ConfirmFileOp=0 3 | IconResource=C:\Program Files\Google\Drive File Stream\101.0.3.0\GoogleDriveFS.exe,26 4 | -------------------------------------------------------------------------------- /java_backend/target/classes/desktop.ini: -------------------------------------------------------------------------------- 1 | [.ShellClassInfo] 2 | ConfirmFileOp=0 3 | IconResource=C:\Program Files\Google\Drive File Stream\101.0.3.0\GoogleDriveFS.exe,26 4 | -------------------------------------------------------------------------------- /java_backend/src/main/java/org/desktop.ini: -------------------------------------------------------------------------------- 1 | [.ShellClassInfo] 2 | ConfirmFileOp=0 3 | IconResource=C:\Program Files\Google\Drive File Stream\101.0.3.0\GoogleDriveFS.exe,26 4 | -------------------------------------------------------------------------------- /java_backend/src/main/resources/desktop.ini: -------------------------------------------------------------------------------- 1 | [.ShellClassInfo] 2 | ConfirmFileOp=0 3 | IconResource=C:\Program Files\Google\Drive File Stream\101.0.3.0\GoogleDriveFS.exe,26 4 | -------------------------------------------------------------------------------- /java_backend/src/main/java/org/example/desktop.ini: -------------------------------------------------------------------------------- 1 | [.ShellClassInfo] 2 | ConfirmFileOp=0 3 | IconResource=C:\Program Files\Google\Drive File Stream\101.0.3.0\GoogleDriveFS.exe,26 4 | -------------------------------------------------------------------------------- /.idea/modules.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /front/.idea/misc.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | -------------------------------------------------------------------------------- /front/.idea/modules.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /java_backend/src/main/java/org/example/wep/desktop.ini: -------------------------------------------------------------------------------- 1 | [.ShellClassInfo] 2 | ConfirmFileOp=0 3 | IconResource=C:\Program Files\Google\Drive File Stream\101.0.3.0\GoogleDriveFS.exe,26 4 | -------------------------------------------------------------------------------- /java_backend/.idea/encodings.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | -------------------------------------------------------------------------------- /java_backend/target/classes/application-test.properties: -------------------------------------------------------------------------------- 1 | server.port=8813 2 | baseUrl=http://localhost:8813 3 | 4 | 5 | apiKey=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 6 | 7 | 8 | 9 | 10 | -------------------------------------------------------------------------------- /python_backend/.idea/modules.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /python_backend/.idea/misc.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 6 | 7 | -------------------------------------------------------------------------------- /.idea/FrankFu.iml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | -------------------------------------------------------------------------------- /front/.idea/front.iml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | -------------------------------------------------------------------------------- /python_backend/.idea/python_backend.iml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | -------------------------------------------------------------------------------- /java_backend/src/main/java/org/example/Application.java: -------------------------------------------------------------------------------- 1 | package org.example; 2 | 3 | import org.springframework.boot.SpringApplication; 4 | import org.springframework.boot.autoconfigure.SpringBootApplication; 5 | 6 | /** 7 | * @author admin 8 | */ 9 | @SpringBootApplication 10 | public class Application { 11 | 12 | public static void main(String[] args) { 13 | SpringApplication.run(Application.class, args); 14 | } 15 | } 16 | 17 | -------------------------------------------------------------------------------- /java_backend/.idea/misc.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 10 | 11 | 12 | -------------------------------------------------------------------------------- /front/styles.css: -------------------------------------------------------------------------------- 1 | /* Ensure the body takes up the full height */ 2 | body { 3 | margin: 0; 4 | display: flex; 5 | flex-direction: column; 6 | min-height: 100vh; 7 | } 8 | 9 | /* Main content should expand to fill available space */ 10 | .content { 11 | flex: 1; 12 | } 13 | 14 | /* Sticky footer styling */ 15 | footer { 16 | background-color: #333; 17 | color: #fff; 18 | text-align: center; 19 | padding: 10px; 20 | position: sticky; 21 | bottom: 0; 22 | } 23 | 24 | footer a { 25 | color: #fff; 26 | } 27 | -------------------------------------------------------------------------------- /front/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | Just a website 7 | 8 | 9 | 10 | 11 |
12 |

This is a plain old website

13 |

This is just a plain website that is using plain old JavaScript

14 | 15 | 16 |
17 | 18 | 19 | -------------------------------------------------------------------------------- /java_backend/.idea/compiler.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /java_backend/.idea/jarRepositories.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 9 | 10 | 14 | 15 | 19 | 20 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2024 Frank Fu 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /python_backend/.idea/inspectionProfiles/Project_Default.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 22 | -------------------------------------------------------------------------------- /java_backend/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 5 | 4.0.0 6 | 7 | org.example 8 | realtime_api_springboot 9 | 1.0-SNAPSHOT 10 | 11 | 12 | 8 13 | 8 14 | UTF-8 15 | 16 | 17 | 18 | 19 | com.fasterxml.jackson.core 20 | jackson-databind 21 | 2.15.0 22 | 23 | 24 | org.springframework.boot 25 | spring-boot-starter-web 26 | 2.7.13 27 | 28 | org.springframework.boot 29 | spring-boot-starter-websocket 30 | 2.7.13 31 | 32 | 33 | com.squareup.okhttp3 34 | okhttp 35 | 4.12.0 36 | 37 | 38 | -------------------------------------------------------------------------------- /python_backend/WebRTC.py: -------------------------------------------------------------------------------- 1 | from flask import Flask, request, jsonify 2 | from flask_cors import CORS # Import CORS 3 | import requests 4 | import os 5 | 6 | app = Flask(__name__) 7 | 8 | # Allow all domains to access your API (this is fine in development, but in production, specific domains should be restricted) 9 | CORS(app) 10 | 11 | # Configure OpenAI API URL and default instructions 12 | OPENAI_API_URL = "https://api.openai.com/v1/realtime" 13 | DEFAULT_INSTRUCTIONS = "You are helpful and have some tools installed.\n\nIn the tools you have the ability to control a robot hand." 14 | OPENAI_API_KEY = os.environ['OPENAI_API_KEY'] # Insert your own OpenAI key 15 | 16 | # Homepage route (optional) 17 | @app.route('/') 18 | def home(): 19 | return "Flask API is running!" 20 | 21 | @app.route('/api/rtc-connect', methods=['POST']) 22 | def connect_rtc(): 23 | # Get the request body from the client 24 | body = request.get_data(as_text=True) 25 | 26 | # Build the OpenAI API request URL 27 | url = f"{OPENAI_API_URL}?model=gpt-4o-realtime-preview-2024-12-17&instructions={DEFAULT_INSTRUCTIONS}&voice=ash" 28 | 29 | # Set the request headers 30 | headers = { 31 | "Authorization": f"Bearer {OPENAI_API_KEY}", 32 | "Content-Type": "application/sdp" 33 | } 34 | 35 | # Send POST request to the OpenAI API 36 | response = requests.post(url, headers=headers, data=body) 37 | 38 | # Return the OpenAI response, maintaining the same content type 39 | return response.content, 200, {'Content-Type': 'application/sdp'} 40 | 41 | if __name__ == '__main__': 42 | # Set Flask app to run on port 8813 43 | app.run(debug=True, port=8813) 44 | -------------------------------------------------------------------------------- /java_backend/src/main/java/org/example/wep/WebRTCController.java: -------------------------------------------------------------------------------- 1 | package org.example.wep; 2 | 3 | import org.springframework.beans.factory.annotation.Value; 4 | import org.springframework.http.*; 5 | import org.springframework.web.bind.annotation.CrossOrigin; 6 | import org.springframework.web.bind.annotation.PostMapping; 7 | import org.springframework.web.bind.annotation.RequestBody; 8 | import org.springframework.web.bind.annotation.RestController; 9 | import org.springframework.web.client.RestTemplate; 10 | import org.springframework.web.util.UriComponentsBuilder; 11 | 12 | @RestController 13 | public class WebRTCController { 14 | 15 | 16 | 17 | @Value("${apiKey}") 18 | private String openaiApiKey; 19 | private static final String OPENAI_API_URL = "https://api.openai.com/v1/realtime"; 20 | private static final String DEFAULT_INSTRUCTIONS = "You are helpful and have some tools installed.\n\nIn the tools you have the ability to control a robot hand."; 21 | 22 | @CrossOrigin(origins = "*") // Allow requests from this origin 23 | @PostMapping("/api/rtc-connect") 24 | public ResponseEntity connectRTC(@RequestBody String body) { 25 | // Build the URL with query parameters 26 | String url = UriComponentsBuilder.fromHttpUrl(OPENAI_API_URL) 27 | .queryParam("model", "gpt-4o-realtime-preview-2024-12-17") 28 | .queryParam("instructions", DEFAULT_INSTRUCTIONS) 29 | .queryParam("voice", "ash") 30 | .toUriString(); 31 | 32 | // Set up the headers 33 | HttpHeaders headers = new HttpHeaders(); 34 | headers.set("Authorization", "Bearer " + openaiApiKey); 35 | headers.setContentType(MediaType.parseMediaType("application/sdp")); 36 | 37 | // Create the request entity 38 | org.springframework.http.HttpEntity requestEntity = new org.springframework.http.HttpEntity<>(body, headers); 39 | 40 | // Send the request to OpenAI API 41 | RestTemplate restTemplate = new RestTemplate(); 42 | ResponseEntity response = restTemplate.exchange(url, HttpMethod.POST, requestEntity, String.class); 43 | 44 | // If response is not OK, throw an exception 45 | if (!response.getStatusCode().is2xxSuccessful()) { 46 | throw new RuntimeException("OpenAI API error: " + response.getStatusCode()); 47 | } 48 | 49 | // Return the SDP response with the correct content type 50 | return ResponseEntity.ok() 51 | .contentType(MediaType.parseMediaType("application/sdp")) 52 | .body(response.getBody()); 53 | } 54 | } -------------------------------------------------------------------------------- /Readme.md: -------------------------------------------------------------------------------- 1 | Youtube Video 2 | https://www.youtube.com/watch?v=Wnp0TnR46yc 3 | 4 | # 1. WebRTC 5 | The main purpose of WebRTC is to enable audio stream transmission and data channel communication between browsers. WebRTC allows two browsers to communicate in real-time without the need for a traditional server intermediary. 6 | 7 | ## 1. Audio Stream Transmission: 8 | Through WebRTC, code can capture the audio stream of the local browser (such as sound input from a microphone) and transmit it to a remote browser. This means that when a WebRTC connection is established, if the user speaks in the browser, the audio data can be transmitted in real-time to the remote end and played in the remote browser. In the code, `navigator.mediaDevices.getUserMedia({ audio: true })` captures the microphone's audio stream, and the audio stream is transmitted to the remote end through `peerConnection.addTransceiver(track)`. 9 | 10 | ## 2. Data Channel Communication: 11 | In addition to supporting audio and video stream transmission, WebRTC also supports Data Channel communication. The Data Channel is a reliable, low-latency transmission method that can be used to transmit text, binary data, and other formats between browsers. In this code, the `dataChannel` is used to exchange JSON-formatted data. Through the data channel, browsers can not only transmit audio streams but also interact with remote systems via WebRTC. For example, when a remote system requests the execution of a JavaScript function (such as changing the background color or retrieving HTML content from a page), the data channel transmits these commands, the browser performs the operation, and the result is sent back through the data channel. 12 | 13 | --- 14 | 15 | # 2. Functionality Implemented in the Demo 16 | Retrieve current HTML element content, change webpage background color, change font color, change button size and color. 17 | 18 | --- 19 | 20 | # 3. Detailed Steps: 21 | 22 | ## 1. Local Browser Initiates Request: 23 | The user's browser initiates a connection request through WebRTC by sending a request to the backend's `/api/rtc-connect` interface. This request includes the local browser's media stream, network settings, and other information. 24 | 25 | ## 2. Backend Processes the Request: 26 | The backend processes the request and calls the OpenAI API to generate the WebRTC SDP (Session Description Protocol) information. This SDP contains configuration information for audio streams, data channels, network addresses, and other parameters necessary to negotiate the WebRTC connection. 27 | 28 | ## 3. Backend Returns SDP Information: 29 | The backend returns the SDP data to the local browser. This SDP data contains all the configuration information required for the WebRTC connection. 30 | 31 | ## 4. Local Browser Processes SDP Data: 32 | The local browser uses the returned SDP data to initiate the connection through the WebRTC protocol. Specifically: 33 | - Audio Stream: The local browser begins receiving and sending audio streams through WebRTC's audio settings (rtpmap, rtcp, etc.). 34 | - Data Channel: The data channel setup is used for subsequent message transmission and control. 35 | - ICE Connection: Through configurations like ice-ufrag and ice-pwd, the browser performs NAT traversal and establishes a network connection with the remote device. 36 | 37 | ## 5. WebRTC Connection Established: 38 | The local browser establishes a WebRTC connection with the remote device (OpenAI API) and begins transmitting data through the audio stream and data channel. 39 | 40 | ## 6. Remote Device (OpenAI API): 41 | In this scenario, the OpenAI API acts as the remote WebRTC endpoint, receiving audio stream data from the local browser, processing it, and returning appropriate data or audio feedback. 42 | 43 | --- 44 | 45 | # 4. Configuration 46 | 1. Configure backend port, apiKey 47 | 2. Configure frontend link to the backend baseUrl 48 | -------------------------------------------------------------------------------- /java_backend/src/main/java/org/example/wep/WebRTCWithEphemeralKeyController.java: -------------------------------------------------------------------------------- 1 | package org.example.wep; 2 | 3 | import org.springframework.beans.factory.annotation.Value; 4 | import org.springframework.http.*; 5 | import org.springframework.web.bind.annotation.*; 6 | import org.springframework.web.client.RestTemplate; 7 | import org.springframework.web.util.UriComponentsBuilder; 8 | 9 | @RestController 10 | @RequestMapping("/api") 11 | public class WebRTCWithEphemeralKeyController { 12 | 13 | @Value("${apiKey}") 14 | private String openaiApiKey; 15 | 16 | private static final String OPENAI_SESSION_URL = "https://api.openai.com/v1/realtime/sessions"; 17 | private static final String OPENAI_API_URL = "https://api.openai.com/v1/realtime"; // May vary based on requirements 18 | private static final String MODEL_ID = "gpt-4o-realtime-preview-2024-12-17"; 19 | private static final String VOICE = "ash"; // Or other voices 20 | private static final String DEFAULT_INSTRUCTIONS = "You are helpful and have some tools installed.\n\nIn the tools you have the ability to control a robot hand."; 21 | 22 | /** 23 | * RTC connection endpoint for handling WebRTC SDP exchange and generating/using ephemeral tokens. 24 | */ 25 | @CrossOrigin(origins = "*") // Adjust allowed origins as needed to enhance security 26 | @PostMapping("/rtc-connect") 27 | public ResponseEntity connectRTC(@RequestBody String clientSdp) { 28 | RestTemplate restTemplate = new RestTemplate(); 29 | 30 | // Step 1: Generate ephemeral API token 31 | HttpHeaders tokenHeaders = new HttpHeaders(); 32 | tokenHeaders.set("Authorization", "Bearer " + openaiApiKey); 33 | tokenHeaders.setContentType(MediaType.APPLICATION_JSON); 34 | 35 | String tokenRequestBody = String.format("{\"model\": \"%s\", \"voice\": \"%s\"}", MODEL_ID, VOICE); 36 | 37 | HttpEntity tokenRequestEntity = new HttpEntity<>(tokenRequestBody, tokenHeaders); 38 | 39 | ResponseEntity tokenResponse = restTemplate.exchange( 40 | OPENAI_SESSION_URL, 41 | HttpMethod.POST, 42 | tokenRequestEntity, 43 | String.class 44 | ); 45 | 46 | if (!tokenResponse.getStatusCode().is2xxSuccessful()) { 47 | throw new RuntimeException("Failed to obtain ephemeral token, status code: " + tokenResponse.getStatusCode()); 48 | } 49 | 50 | // Assuming the returned JSON contains a `client_secret.value` field as the ephemeral token 51 | // Needs to be parsed based on the actual response structure 52 | String ephemeralToken; 53 | try { 54 | // Use Jackson or another JSON parsing library to parse the response 55 | // Here, assuming Jackson is used 56 | com.fasterxml.jackson.databind.ObjectMapper mapper = new com.fasterxml.jackson.databind.ObjectMapper(); 57 | com.fasterxml.jackson.databind.JsonNode root = mapper.readTree(tokenResponse.getBody()); 58 | ephemeralToken = root.path("client_secret").path("value").asText(); 59 | 60 | if (ephemeralToken == null || ephemeralToken.isEmpty()) { 61 | throw new RuntimeException("Ephemeral token is empty"); 62 | } 63 | } catch (Exception e) { 64 | throw new RuntimeException("Failed to parse ephemeral token: " + e.getMessage()); 65 | } 66 | 67 | // Step 2: Use the ephemeral token to perform SDP exchange with OpenAI's Realtime API 68 | // Build the URL 69 | String url = UriComponentsBuilder.fromHttpUrl(OPENAI_API_URL) 70 | .queryParam("model", MODEL_ID) 71 | .queryParam("instructions", DEFAULT_INSTRUCTIONS) 72 | .queryParam("voice", VOICE) 73 | .toUriString(); 74 | 75 | // Set request headers 76 | HttpHeaders sdpHeaders = new HttpHeaders(); 77 | sdpHeaders.set("Authorization", "Bearer " + ephemeralToken); 78 | sdpHeaders.setContentType(MediaType.parseMediaType("application/sdp")); 79 | 80 | // Create the request entity 81 | HttpEntity sdpRequestEntity = new HttpEntity<>(clientSdp, sdpHeaders); 82 | 83 | // Send SDP to OpenAI Realtime API 84 | ResponseEntity sdpResponse = restTemplate.exchange( 85 | url, 86 | HttpMethod.POST, 87 | sdpRequestEntity, 88 | String.class 89 | ); 90 | 91 | if (!sdpResponse.getStatusCode().is2xxSuccessful()) { 92 | throw new RuntimeException("OpenAI API SDP exchange error, status code: " + sdpResponse.getStatusCode()); 93 | } 94 | 95 | // Return OpenAI's SDP response to the client 96 | return ResponseEntity.ok() 97 | .contentType(MediaType.parseMediaType("application/sdp")) 98 | .body(sdpResponse.getBody()); 99 | } 100 | 101 | } 102 | -------------------------------------------------------------------------------- /python_backend/WebRTCWithEphemeralKey.py: -------------------------------------------------------------------------------- 1 | from flask import Flask, request, Response 2 | from flask_cors import CORS 3 | import requests 4 | import os 5 | import json 6 | import logging 7 | from dotenv import load_dotenv 8 | 9 | # Load environment variables from .env file (optional) 10 | load_dotenv() 11 | 12 | app = Flask(__name__) 13 | 14 | # Configure CORS to allow all origins (adjust in production for better security) 15 | CORS(app) 16 | 17 | # Configure logging 18 | logging.basicConfig(level=logging.INFO) 19 | logger = logging.getLogger(__name__) 20 | 21 | # Configuration 22 | OPENAI_API_KEY = os.environ.get('OPENAI_API_KEY') 23 | if not OPENAI_API_KEY: 24 | logger.error("OPENAI_API_KEY environment variable not set.") 25 | raise EnvironmentError("OPENAI_API_KEY environment variable not set.") 26 | 27 | OPENAI_SESSION_URL = "https://api.openai.com/v1/realtime/sessions" 28 | OPENAI_API_URL = "https://api.openai.com/v1/realtime" # May vary based on requirements 29 | MODEL_ID = "gpt-4o-realtime-preview-2024-12-17" 30 | VOICE = "ash" # Or other voices 31 | DEFAULT_INSTRUCTIONS = "You are helpful and have some tools installed.\n\nIn the tools you have the ability to control a robot hand." 32 | 33 | @app.route('/') 34 | def home(): 35 | return "Flask API is running!" 36 | 37 | @app.route('/api/rtc-connect', methods=['POST']) 38 | def connect_rtc(): 39 | """ 40 | RTC connection endpoint for handling WebRTC SDP exchange and generating/using ephemeral tokens. 41 | """ 42 | try: 43 | # Step 1: Retrieve the client's SDP from the request body 44 | client_sdp = request.get_data(as_text=True) 45 | if not client_sdp: 46 | logger.error("No SDP provided in the request body.") 47 | return Response("No SDP provided in the request body.", status=400) 48 | 49 | logger.info("Received SDP from client.") 50 | 51 | # Step 2: Generate ephemeral API token 52 | token_headers = { 53 | "Authorization": f"Bearer {OPENAI_API_KEY}", 54 | "Content-Type": "application/json" 55 | } 56 | token_payload = { 57 | "model": MODEL_ID, 58 | "voice": VOICE 59 | } 60 | 61 | logger.info("Requesting ephemeral token from OpenAI.") 62 | 63 | token_response = requests.post(OPENAI_SESSION_URL, headers=token_headers, json=token_payload) 64 | 65 | if not token_response.ok: 66 | logger.error(f"Failed to obtain ephemeral token, status code: {token_response.status_code}, response: {token_response.text}") 67 | return Response(f"Failed to obtain ephemeral token, status code: {token_response.status_code}", status=500) 68 | 69 | token_data = token_response.json() 70 | # Adjust the path based on the actual response structure 71 | # Assuming the ephemeral token is located at `client_secret.value` 72 | ephemeral_token = token_data.get('client_secret', {}).get('value', '') 73 | 74 | if not ephemeral_token: 75 | logger.error("Ephemeral token is empty or not found in the response.") 76 | return Response("Ephemeral token is empty or not found in the response.", status=500) 77 | 78 | logger.info("Ephemeral token obtained successfully.") 79 | 80 | # Step 3: Perform SDP exchange with OpenAI's Realtime API using the ephemeral token 81 | sdp_headers = { 82 | "Authorization": f"Bearer {ephemeral_token}", 83 | "Content-Type": "application/sdp" 84 | } 85 | sdp_params = { 86 | "model": MODEL_ID, 87 | "instructions": DEFAULT_INSTRUCTIONS, 88 | "voice": VOICE 89 | } 90 | 91 | # Build the full URL with query parameters 92 | sdp_url = requests.Request('POST', OPENAI_API_URL, params=sdp_params).prepare().url 93 | 94 | logger.info(f"Sending SDP to OpenAI Realtime API at {sdp_url}") 95 | 96 | sdp_response = requests.post(sdp_url, headers=sdp_headers, data=client_sdp) 97 | 98 | if not sdp_response.ok: 99 | logger.error(f"OpenAI API SDP exchange error, status code: {sdp_response.status_code}, response: {sdp_response.text}") 100 | return Response(f"OpenAI API SDP exchange error, status code: {sdp_response.status_code}", status=500) 101 | 102 | logger.info("SDP exchange with OpenAI completed successfully.") 103 | 104 | # Step 4: Return OpenAI's SDP response to the client with the correct content type 105 | return Response( 106 | response=sdp_response.content, 107 | status=200, 108 | mimetype='application/sdp' 109 | ) 110 | 111 | except Exception as e: 112 | logger.exception("An error occurred during the RTC connection process.") 113 | return Response(f"An error occurred: {str(e)}", status=500) 114 | 115 | if __name__ == '__main__': 116 | # Ensure the server runs on port 8813 117 | app.run(debug=True, port=8813) 118 | -------------------------------------------------------------------------------- /front/script.js: -------------------------------------------------------------------------------- 1 | // Set the basic API address for communication with the backend server 2 | const baseUrl = "http://107.204.228.201:8813"; 3 | // Flag indicating whether WebRTC is active, controls the enabling and disabling of connections 4 | let isWebRTCActive = false; 5 | // Create variables related to the WebRTC connection 6 | let peerConnection; 7 | let dataChannel; 8 | // Define an object that contains multiple functions; methods in fns will be called 9 | const fns = { 10 | // Get the HTML content of the current page 11 | getPageHTML: () => { 12 | return { 13 | success: true, 14 | html: document.documentElement.outerHTML 15 | }; // Return the entire page's HTML 16 | }, 17 | // Change the background color of the webpage 18 | changeBackgroundColor: ({ color }) => { 19 | document.body.style.backgroundColor = color; // Change the page's background color 20 | return { success: true, color }; // Return the changed color 21 | }, 22 | // Change the text color of the webpage 23 | changeTextColor: ({ color }) => { 24 | document.body.style.color = color; // Change the page's text color 25 | return { success: true, color }; // Return the changed color 26 | }, 27 | // Change the button's style (size and color) 28 | changeButtonStyle: ({ size, color }) => { 29 | const button = document.querySelector('button'); // Get the first button on the page (modify selector if there are multiple buttons) 30 | if (button) { 31 | // Change the button's size 32 | if (size) { 33 | button.style.fontSize = size; // Set font size 34 | } 35 | // Change the button's color 36 | if (color) { 37 | button.style.backgroundColor = color; // Set button background color 38 | } 39 | return { success: true, size, color }; // Return modified button style 40 | } else { 41 | return { success: false, message: 'Button element not found' }; // Return failure if no button is found 42 | } 43 | }, 44 | }; 45 | 46 | // When an audio stream is received, add it to the page and play it 47 | function handleTrack(event) { 48 | const el = document.createElement('audio'); // Create an audio element 49 | el.srcObject = event.streams[0]; // Set the audio stream as the element's source 50 | el.autoplay = el.controls = true; // Autoplay and display audio controls 51 | document.body.appendChild(el); // Add the audio element to the page 52 | } 53 | 54 | // Create a data channel for transmitting control messages (such as function calls) 55 | function createDataChannel() { 56 | // Create a data channel named 'response' 57 | dataChannel = peerConnection.createDataChannel('response'); 58 | // Configure data channel events 59 | dataChannel.addEventListener('open', () => { 60 | console.log('Data channel opened'); 61 | configureData(); // Configure data channel functions 62 | }); 63 | dataChannel.addEventListener('message', async (ev) => { 64 | const msg = JSON.parse(ev.data); // Parse the received message 65 | // If the message type is 'response.function_call_arguments.done', it indicates a function call request 66 | if (msg.type === 'response.function_call_arguments.done') { 67 | const fn = fns[msg.name]; // Get the corresponding function by name 68 | if (fn !== undefined) { 69 | console.log(`Calling local function ${msg.name}, parameters ${msg.arguments}`); 70 | const args = JSON.parse(msg.arguments); // Parse function parameters 71 | const result = await fn(args); // Call the local function and wait for the result 72 | console.log('Result', result); // Log the result of the function 73 | // Send the result of the function execution back to the other party 74 | const event = { 75 | type: 'conversation.item.create', // Create conversation item event 76 | item: { 77 | type: 'function_call_output', // Function call output 78 | call_id: msg.call_id, // Passed call_id 79 | output: JSON.stringify(result), // JSON string of the function execution result 80 | }, 81 | }; 82 | dataChannel.send(JSON.stringify(event)); // Send the result back to the remote side 83 | } 84 | } 85 | }); 86 | } 87 | 88 | // Configure data channel functions and tools 89 | function configureData() { 90 | console.log('Configuring data channel'); 91 | const event = { 92 | type: 'session.update', // Session update event 93 | session: { 94 | modalities: ['text', 'audio'], // Supported interaction modes: text and audio 95 | // Provide functional tools, pay attention to the names of these tools corresponding to the keys in the above fns object 96 | tools: [ 97 | { 98 | type: 'function', // Tool type is function 99 | name: 'changeBackgroundColor', // Function name 100 | description: 'Change the background color of the webpage', // Description 101 | parameters: { // Parameter description 102 | type: 'object', 103 | properties: { 104 | color: { 105 | type: 'string', 106 | description: 'Hexadecimal value of the color' 107 | }, // Color parameter 108 | }, 109 | }, 110 | }, 111 | { 112 | type: 'function', 113 | name: 'changeTextColor', 114 | description: 'Change the text color of the webpage', 115 | parameters: { 116 | type: 'object', 117 | properties: { 118 | color: { 119 | type: 'string', 120 | description: 'Hexadecimal value of the color' 121 | }, 122 | }, 123 | }, 124 | }, 125 | { 126 | type: 'function', 127 | name: 'getPageHTML', 128 | description: 'Get the HTML content of the current page', 129 | }, 130 | { 131 | type: 'function', // Tool type is function 132 | name: 'changeButtonStyle', // New function name 133 | description: 'Change the size and color of the button', // Description 134 | parameters: { // Parameter description 135 | type: 'object', 136 | properties: { 137 | size: { 138 | type: 'string', 139 | description: 'Font size of the button (e.g., "16px" or "1em")' 140 | }, // Button size 141 | color: { 142 | type: 'string', 143 | description: 'Background color of the button (e.g., "#ff0000" or "red")' 144 | }, // Button color 145 | }, 146 | }, 147 | }, 148 | ], 149 | }, 150 | }; 151 | dataChannel.send(JSON.stringify(event)); // Send the configured event data 152 | } 153 | 154 | // Get the control button element 155 | const toggleButton = document.getElementById('toggleWebRTCButton'); 156 | // Add a click event listener to the button to toggle the WebRTC connection state 157 | toggleButton.addEventListener('click', () => { 158 | // If WebRTC is active, stop the connection; otherwise, start WebRTC 159 | if (isWebRTCActive) { 160 | stopWebRTC(); // Stop WebRTC 161 | toggleButton.textContent = 'start'; // Update button text 162 | } else { 163 | startWebRTC(); // Start WebRTC 164 | toggleButton.textContent = 'stop'; // Update button text 165 | } 166 | }); 167 | 168 | // Capture microphone input stream and initiate WebRTC connection 169 | function startWebRTC() { 170 | // If WebRTC is already active, return directly 171 | if (isWebRTCActive) return; 172 | // Create a new peerConnection object to establish a WebRTC connection 173 | peerConnection = new RTCPeerConnection(); 174 | peerConnection.ontrack = handleTrack; // Bind audio stream processing function 175 | createDataChannel(); // Create data channel 176 | // Request user's audio stream 177 | navigator.mediaDevices.getUserMedia({ audio: true }).then((stream) => { 178 | // Add each track from the audio stream to the peerConnection 179 | stream.getTracks().forEach((track) => peerConnection.addTransceiver(track, { direction: 'sendrecv' })); 180 | // Create an offer for the local connection 181 | peerConnection.createOffer().then((offer) => { 182 | peerConnection.setLocalDescription(offer); // Set local description (offer) 183 | // Send the offer to the backend for signaling exchange 184 | fetch(baseUrl + '/api/rtc-connect', { 185 | method: 'POST', 186 | body: offer.sdp, // Send the SDP of the offer to the backend 187 | headers: { 188 | 'Content-Type': 'application/sdp', 189 | }, 190 | }) 191 | .then((r) => r.text()) 192 | .then((answer) => { 193 | // Get the answer returned by the backend and set it as the remote description 194 | peerConnection.setRemoteDescription({ sdp: answer, type: 'answer' }); 195 | }); 196 | }); 197 | }); 198 | // Mark WebRTC as active 199 | isWebRTCActive = true; 200 | } 201 | 202 | // Stop the WebRTC connection and clean up all resources 203 | function stopWebRTC() { 204 | // If WebRTC is not active, return directly 205 | if (!isWebRTCActive) return; 206 | // Stop the received audio tracks 207 | const tracks = peerConnection.getReceivers().map(receiver => receiver.track); 208 | tracks.forEach(track => track.stop()); 209 | // Close the data channel and WebRTC connection 210 | if (dataChannel) dataChannel.close(); 211 | if (peerConnection) peerConnection.close(); 212 | // Reset connection and channel objects 213 | peerConnection = null; 214 | dataChannel = null; 215 | // Mark WebRTC as not active 216 | isWebRTCActive = false; 217 | } 218 | --------------------------------------------------------------------------------