├── .github
└── FUNDING.yml
├── front
├── .idea
│ ├── .gitignore
│ ├── vcs.xml
│ ├── misc.xml
│ ├── modules.xml
│ └── front.iml
├── styles.css
├── index.html
└── script.js
├── java_backend
├── target
│ └── classes
│ │ ├── application.properties
│ │ ├── org
│ │ └── example
│ │ │ ├── Application.class
│ │ │ └── wep
│ │ │ └── WebRTCController.class
│ │ ├── desktop.ini
│ │ └── application-test.properties
├── src
│ ├── main
│ │ ├── resources
│ │ │ ├── application.properties
│ │ │ ├── application-test.properties
│ │ │ └── desktop.ini
│ │ ├── desktop.ini
│ │ └── java
│ │ │ ├── desktop.ini
│ │ │ └── org
│ │ │ ├── desktop.ini
│ │ │ └── example
│ │ │ ├── desktop.ini
│ │ │ ├── wep
│ │ │ ├── desktop.ini
│ │ │ ├── WebRTCController.java
│ │ │ └── WebRTCWithEphemeralKeyController.java
│ │ │ └── Application.java
│ ├── desktop.ini
│ └── test
│ │ ├── desktop.ini
│ │ └── java
│ │ └── desktop.ini
├── .idea
│ ├── vcs.xml
│ ├── .gitignore
│ ├── encodings.xml
│ ├── misc.xml
│ ├── compiler.xml
│ └── jarRepositories.xml
├── desktop.ini
└── pom.xml
├── .idea
├── vcs.xml
├── .gitignore
├── misc.xml
├── modules.xml
└── FrankFu.iml
├── python_backend
├── .idea
│ ├── vcs.xml
│ ├── .gitignore
│ ├── inspectionProfiles
│ │ ├── profiles_settings.xml
│ │ └── Project_Default.xml
│ ├── modules.xml
│ ├── misc.xml
│ └── python_backend.iml
├── WebRTC.py
└── WebRTCWithEphemeralKey.py
├── LICENSE
└── Readme.md
/.github/FUNDING.yml:
--------------------------------------------------------------------------------
1 |
2 | buy_me_a_coffee: fuwei007
3 |
--------------------------------------------------------------------------------
/front/.idea/.gitignore:
--------------------------------------------------------------------------------
1 | # Default ignored files
2 | /shelf/
3 | /workspace.xml
4 |
--------------------------------------------------------------------------------
/java_backend/target/classes/application.properties:
--------------------------------------------------------------------------------
1 | spring.profiles.active=test
2 |
--------------------------------------------------------------------------------
/java_backend/src/main/resources/application.properties:
--------------------------------------------------------------------------------
1 | spring.profiles.active=test
2 |
--------------------------------------------------------------------------------
/java_backend/src/main/resources/application-test.properties:
--------------------------------------------------------------------------------
1 | server.port=8813
2 | baseUrl=http://localhost:8813
3 |
4 |
5 | apiKey=your key here
6 |
7 |
8 |
--------------------------------------------------------------------------------
/java_backend/target/classes/org/example/Application.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fuwei007/OpenAIRealTimeAPIWebRTC/HEAD/java_backend/target/classes/org/example/Application.class
--------------------------------------------------------------------------------
/.idea/vcs.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
--------------------------------------------------------------------------------
/java_backend/target/classes/org/example/wep/WebRTCController.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fuwei007/OpenAIRealTimeAPIWebRTC/HEAD/java_backend/target/classes/org/example/wep/WebRTCController.class
--------------------------------------------------------------------------------
/front/.idea/vcs.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
--------------------------------------------------------------------------------
/.idea/.gitignore:
--------------------------------------------------------------------------------
1 | # Default ignored files
2 | /shelf/
3 | /workspace.xml
4 | # Editor-based HTTP Client requests
5 | /httpRequests/
6 | # Datasource local storage ignored files
7 | /dataSources/
8 | /dataSources.local.xml
9 |
--------------------------------------------------------------------------------
/java_backend/.idea/vcs.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
--------------------------------------------------------------------------------
/python_backend/.idea/vcs.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
--------------------------------------------------------------------------------
/java_backend/.idea/.gitignore:
--------------------------------------------------------------------------------
1 | # Default ignored files
2 | /shelf/
3 | /workspace.xml
4 | # Editor-based HTTP Client requests
5 | /httpRequests/
6 | # Datasource local storage ignored files
7 | /dataSources/
8 | /dataSources.local.xml
9 |
--------------------------------------------------------------------------------
/python_backend/.idea/.gitignore:
--------------------------------------------------------------------------------
1 | # Default ignored files
2 | /shelf/
3 | /workspace.xml
4 | # Editor-based HTTP Client requests
5 | /httpRequests/
6 | # Datasource local storage ignored files
7 | /dataSources/
8 | /dataSources.local.xml
9 |
--------------------------------------------------------------------------------
/python_backend/.idea/inspectionProfiles/profiles_settings.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
--------------------------------------------------------------------------------
/.idea/misc.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
--------------------------------------------------------------------------------
/java_backend/desktop.ini:
--------------------------------------------------------------------------------
1 | [ . S h e l l C l a s s I n f o ]
2 | C o n f i r m F i l e O p = 0
3 | I c o n R e s o u r c e = C : \ P r o g r a m F i l e s \ G o o g l e \ D r i v e F i l e S t r e a m \ 1 0 1 . 0 . 3 . 0 \ G o o g l e D r i v e F S . e x e , 2 6
4 |
--------------------------------------------------------------------------------
/java_backend/src/desktop.ini:
--------------------------------------------------------------------------------
1 | [ . S h e l l C l a s s I n f o ]
2 | C o n f i r m F i l e O p = 0
3 | I c o n R e s o u r c e = C : \ P r o g r a m F i l e s \ G o o g l e \ D r i v e F i l e S t r e a m \ 1 0 1 . 0 . 3 . 0 \ G o o g l e D r i v e F S . e x e , 2 6
4 |
--------------------------------------------------------------------------------
/java_backend/src/main/desktop.ini:
--------------------------------------------------------------------------------
1 | [ . S h e l l C l a s s I n f o ]
2 | C o n f i r m F i l e O p = 0
3 | I c o n R e s o u r c e = C : \ P r o g r a m F i l e s \ G o o g l e \ D r i v e F i l e S t r e a m \ 1 0 1 . 0 . 3 . 0 \ G o o g l e D r i v e F S . e x e , 2 6
4 |
--------------------------------------------------------------------------------
/java_backend/src/test/desktop.ini:
--------------------------------------------------------------------------------
1 | [ . S h e l l C l a s s I n f o ]
2 | C o n f i r m F i l e O p = 0
3 | I c o n R e s o u r c e = C : \ P r o g r a m F i l e s \ G o o g l e \ D r i v e F i l e S t r e a m \ 1 0 1 . 0 . 3 . 0 \ G o o g l e D r i v e F S . e x e , 2 6
4 |
--------------------------------------------------------------------------------
/java_backend/src/main/java/desktop.ini:
--------------------------------------------------------------------------------
1 | [ . S h e l l C l a s s I n f o ]
2 | C o n f i r m F i l e O p = 0
3 | I c o n R e s o u r c e = C : \ P r o g r a m F i l e s \ G o o g l e \ D r i v e F i l e S t r e a m \ 1 0 1 . 0 . 3 . 0 \ G o o g l e D r i v e F S . e x e , 2 6
4 |
--------------------------------------------------------------------------------
/java_backend/src/test/java/desktop.ini:
--------------------------------------------------------------------------------
1 | [ . S h e l l C l a s s I n f o ]
2 | C o n f i r m F i l e O p = 0
3 | I c o n R e s o u r c e = C : \ P r o g r a m F i l e s \ G o o g l e \ D r i v e F i l e S t r e a m \ 1 0 1 . 0 . 3 . 0 \ G o o g l e D r i v e F S . e x e , 2 6
4 |
--------------------------------------------------------------------------------
/java_backend/target/classes/desktop.ini:
--------------------------------------------------------------------------------
1 | [ . S h e l l C l a s s I n f o ]
2 | C o n f i r m F i l e O p = 0
3 | I c o n R e s o u r c e = C : \ P r o g r a m F i l e s \ G o o g l e \ D r i v e F i l e S t r e a m \ 1 0 1 . 0 . 3 . 0 \ G o o g l e D r i v e F S . e x e , 2 6
4 |
--------------------------------------------------------------------------------
/java_backend/src/main/java/org/desktop.ini:
--------------------------------------------------------------------------------
1 | [ . S h e l l C l a s s I n f o ]
2 | C o n f i r m F i l e O p = 0
3 | I c o n R e s o u r c e = C : \ P r o g r a m F i l e s \ G o o g l e \ D r i v e F i l e S t r e a m \ 1 0 1 . 0 . 3 . 0 \ G o o g l e D r i v e F S . e x e , 2 6
4 |
--------------------------------------------------------------------------------
/java_backend/src/main/resources/desktop.ini:
--------------------------------------------------------------------------------
1 | [ . S h e l l C l a s s I n f o ]
2 | C o n f i r m F i l e O p = 0
3 | I c o n R e s o u r c e = C : \ P r o g r a m F i l e s \ G o o g l e \ D r i v e F i l e S t r e a m \ 1 0 1 . 0 . 3 . 0 \ G o o g l e D r i v e F S . e x e , 2 6
4 |
--------------------------------------------------------------------------------
/java_backend/src/main/java/org/example/desktop.ini:
--------------------------------------------------------------------------------
1 | [ . S h e l l C l a s s I n f o ]
2 | C o n f i r m F i l e O p = 0
3 | I c o n R e s o u r c e = C : \ P r o g r a m F i l e s \ G o o g l e \ D r i v e F i l e S t r e a m \ 1 0 1 . 0 . 3 . 0 \ G o o g l e D r i v e F S . e x e , 2 6
4 |
--------------------------------------------------------------------------------
/.idea/modules.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
--------------------------------------------------------------------------------
/front/.idea/misc.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
--------------------------------------------------------------------------------
/front/.idea/modules.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
--------------------------------------------------------------------------------
/java_backend/src/main/java/org/example/wep/desktop.ini:
--------------------------------------------------------------------------------
1 | [ . S h e l l C l a s s I n f o ]
2 | C o n f i r m F i l e O p = 0
3 | I c o n R e s o u r c e = C : \ P r o g r a m F i l e s \ G o o g l e \ D r i v e F i l e S t r e a m \ 1 0 1 . 0 . 3 . 0 \ G o o g l e D r i v e F S . e x e , 2 6
4 |
--------------------------------------------------------------------------------
/java_backend/.idea/encodings.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
--------------------------------------------------------------------------------
/java_backend/target/classes/application-test.properties:
--------------------------------------------------------------------------------
1 | server.port=8813
2 | baseUrl=http://localhost:8813
3 |
4 |
5 | apiKey=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
6 |
7 |
8 |
9 |
10 |
--------------------------------------------------------------------------------
/python_backend/.idea/modules.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
--------------------------------------------------------------------------------
/python_backend/.idea/misc.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
--------------------------------------------------------------------------------
/.idea/FrankFu.iml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
--------------------------------------------------------------------------------
/front/.idea/front.iml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
--------------------------------------------------------------------------------
/python_backend/.idea/python_backend.iml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
--------------------------------------------------------------------------------
/java_backend/src/main/java/org/example/Application.java:
--------------------------------------------------------------------------------
1 | package org.example;
2 |
3 | import org.springframework.boot.SpringApplication;
4 | import org.springframework.boot.autoconfigure.SpringBootApplication;
5 |
6 | /**
7 | * @author admin
8 | */
9 | @SpringBootApplication
10 | public class Application {
11 |
12 | public static void main(String[] args) {
13 | SpringApplication.run(Application.class, args);
14 | }
15 | }
16 |
17 |
--------------------------------------------------------------------------------
/java_backend/.idea/misc.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
10 |
11 |
12 |
--------------------------------------------------------------------------------
/front/styles.css:
--------------------------------------------------------------------------------
1 | /* Ensure the body takes up the full height */
2 | body {
3 | margin: 0;
4 | display: flex;
5 | flex-direction: column;
6 | min-height: 100vh;
7 | }
8 |
9 | /* Main content should expand to fill available space */
10 | .content {
11 | flex: 1;
12 | }
13 |
14 | /* Sticky footer styling */
15 | footer {
16 | background-color: #333;
17 | color: #fff;
18 | text-align: center;
19 | padding: 10px;
20 | position: sticky;
21 | bottom: 0;
22 | }
23 |
24 | footer a {
25 | color: #fff;
26 | }
27 |
--------------------------------------------------------------------------------
/front/index.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 | Just a website
7 |
8 |
9 |
10 |
11 |
12 |
This is a plain old website
13 |
This is just a plain website that is using plain old JavaScript
18 |
19 |
20 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2024 Frank Fu
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/python_backend/.idea/inspectionProfiles/Project_Default.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
20 |
21 |
22 |
--------------------------------------------------------------------------------
/java_backend/pom.xml:
--------------------------------------------------------------------------------
1 |
2 |
5 | 4.0.0
6 |
7 | org.example
8 | realtime_api_springboot
9 | 1.0-SNAPSHOT
10 |
11 |
12 | 8
13 | 8
14 | UTF-8
15 |
16 |
17 |
18 |
19 | com.fasterxml.jackson.core
20 | jackson-databind
21 | 2.15.0
22 |
23 |
24 | org.springframework.boot
25 | spring-boot-starter-web
26 | 2.7.13
27 |
28 | org.springframework.boot
29 | spring-boot-starter-websocket
30 | 2.7.13
31 |
32 |
33 | com.squareup.okhttp3
34 | okhttp
35 | 4.12.0
36 |
37 |
38 |
--------------------------------------------------------------------------------
/python_backend/WebRTC.py:
--------------------------------------------------------------------------------
1 | from flask import Flask, request, jsonify
2 | from flask_cors import CORS # Import CORS
3 | import requests
4 | import os
5 |
6 | app = Flask(__name__)
7 |
8 | # Allow all domains to access your API (this is fine in development, but in production, specific domains should be restricted)
9 | CORS(app)
10 |
11 | # Configure OpenAI API URL and default instructions
12 | OPENAI_API_URL = "https://api.openai.com/v1/realtime"
13 | DEFAULT_INSTRUCTIONS = "You are helpful and have some tools installed.\n\nIn the tools you have the ability to control a robot hand."
14 | OPENAI_API_KEY = os.environ['OPENAI_API_KEY'] # Insert your own OpenAI key
15 |
16 | # Homepage route (optional)
17 | @app.route('/')
18 | def home():
19 | return "Flask API is running!"
20 |
21 | @app.route('/api/rtc-connect', methods=['POST'])
22 | def connect_rtc():
23 | # Get the request body from the client
24 | body = request.get_data(as_text=True)
25 |
26 | # Build the OpenAI API request URL
27 | url = f"{OPENAI_API_URL}?model=gpt-4o-realtime-preview-2024-12-17&instructions={DEFAULT_INSTRUCTIONS}&voice=ash"
28 |
29 | # Set the request headers
30 | headers = {
31 | "Authorization": f"Bearer {OPENAI_API_KEY}",
32 | "Content-Type": "application/sdp"
33 | }
34 |
35 | # Send POST request to the OpenAI API
36 | response = requests.post(url, headers=headers, data=body)
37 |
38 | # Return the OpenAI response, maintaining the same content type
39 | return response.content, 200, {'Content-Type': 'application/sdp'}
40 |
41 | if __name__ == '__main__':
42 | # Set Flask app to run on port 8813
43 | app.run(debug=True, port=8813)
44 |
--------------------------------------------------------------------------------
/java_backend/src/main/java/org/example/wep/WebRTCController.java:
--------------------------------------------------------------------------------
1 | package org.example.wep;
2 |
3 | import org.springframework.beans.factory.annotation.Value;
4 | import org.springframework.http.*;
5 | import org.springframework.web.bind.annotation.CrossOrigin;
6 | import org.springframework.web.bind.annotation.PostMapping;
7 | import org.springframework.web.bind.annotation.RequestBody;
8 | import org.springframework.web.bind.annotation.RestController;
9 | import org.springframework.web.client.RestTemplate;
10 | import org.springframework.web.util.UriComponentsBuilder;
11 |
12 | @RestController
13 | public class WebRTCController {
14 |
15 |
16 |
17 | @Value("${apiKey}")
18 | private String openaiApiKey;
19 | private static final String OPENAI_API_URL = "https://api.openai.com/v1/realtime";
20 | private static final String DEFAULT_INSTRUCTIONS = "You are helpful and have some tools installed.\n\nIn the tools you have the ability to control a robot hand.";
21 |
22 | @CrossOrigin(origins = "*") // Allow requests from this origin
23 | @PostMapping("/api/rtc-connect")
24 | public ResponseEntity connectRTC(@RequestBody String body) {
25 | // Build the URL with query parameters
26 | String url = UriComponentsBuilder.fromHttpUrl(OPENAI_API_URL)
27 | .queryParam("model", "gpt-4o-realtime-preview-2024-12-17")
28 | .queryParam("instructions", DEFAULT_INSTRUCTIONS)
29 | .queryParam("voice", "ash")
30 | .toUriString();
31 |
32 | // Set up the headers
33 | HttpHeaders headers = new HttpHeaders();
34 | headers.set("Authorization", "Bearer " + openaiApiKey);
35 | headers.setContentType(MediaType.parseMediaType("application/sdp"));
36 |
37 | // Create the request entity
38 | org.springframework.http.HttpEntity requestEntity = new org.springframework.http.HttpEntity<>(body, headers);
39 |
40 | // Send the request to OpenAI API
41 | RestTemplate restTemplate = new RestTemplate();
42 | ResponseEntity response = restTemplate.exchange(url, HttpMethod.POST, requestEntity, String.class);
43 |
44 | // If response is not OK, throw an exception
45 | if (!response.getStatusCode().is2xxSuccessful()) {
46 | throw new RuntimeException("OpenAI API error: " + response.getStatusCode());
47 | }
48 |
49 | // Return the SDP response with the correct content type
50 | return ResponseEntity.ok()
51 | .contentType(MediaType.parseMediaType("application/sdp"))
52 | .body(response.getBody());
53 | }
54 | }
--------------------------------------------------------------------------------
/Readme.md:
--------------------------------------------------------------------------------
1 | Youtube Video
2 | https://www.youtube.com/watch?v=Wnp0TnR46yc
3 |
4 | # 1. WebRTC
5 | The main purpose of WebRTC is to enable audio stream transmission and data channel communication between browsers. WebRTC allows two browsers to communicate in real-time without the need for a traditional server intermediary.
6 |
7 | ## 1. Audio Stream Transmission:
8 | Through WebRTC, code can capture the audio stream of the local browser (such as sound input from a microphone) and transmit it to a remote browser. This means that when a WebRTC connection is established, if the user speaks in the browser, the audio data can be transmitted in real-time to the remote end and played in the remote browser. In the code, `navigator.mediaDevices.getUserMedia({ audio: true })` captures the microphone's audio stream, and the audio stream is transmitted to the remote end through `peerConnection.addTransceiver(track)`.
9 |
10 | ## 2. Data Channel Communication:
11 | In addition to supporting audio and video stream transmission, WebRTC also supports Data Channel communication. The Data Channel is a reliable, low-latency transmission method that can be used to transmit text, binary data, and other formats between browsers. In this code, the `dataChannel` is used to exchange JSON-formatted data. Through the data channel, browsers can not only transmit audio streams but also interact with remote systems via WebRTC. For example, when a remote system requests the execution of a JavaScript function (such as changing the background color or retrieving HTML content from a page), the data channel transmits these commands, the browser performs the operation, and the result is sent back through the data channel.
12 |
13 | ---
14 |
15 | # 2. Functionality Implemented in the Demo
16 | Retrieve current HTML element content, change webpage background color, change font color, change button size and color.
17 |
18 | ---
19 |
20 | # 3. Detailed Steps:
21 |
22 | ## 1. Local Browser Initiates Request:
23 | The user's browser initiates a connection request through WebRTC by sending a request to the backend's `/api/rtc-connect` interface. This request includes the local browser's media stream, network settings, and other information.
24 |
25 | ## 2. Backend Processes the Request:
26 | The backend processes the request and calls the OpenAI API to generate the WebRTC SDP (Session Description Protocol) information. This SDP contains configuration information for audio streams, data channels, network addresses, and other parameters necessary to negotiate the WebRTC connection.
27 |
28 | ## 3. Backend Returns SDP Information:
29 | The backend returns the SDP data to the local browser. This SDP data contains all the configuration information required for the WebRTC connection.
30 |
31 | ## 4. Local Browser Processes SDP Data:
32 | The local browser uses the returned SDP data to initiate the connection through the WebRTC protocol. Specifically:
33 | - Audio Stream: The local browser begins receiving and sending audio streams through WebRTC's audio settings (rtpmap, rtcp, etc.).
34 | - Data Channel: The data channel setup is used for subsequent message transmission and control.
35 | - ICE Connection: Through configurations like ice-ufrag and ice-pwd, the browser performs NAT traversal and establishes a network connection with the remote device.
36 |
37 | ## 5. WebRTC Connection Established:
38 | The local browser establishes a WebRTC connection with the remote device (OpenAI API) and begins transmitting data through the audio stream and data channel.
39 |
40 | ## 6. Remote Device (OpenAI API):
41 | In this scenario, the OpenAI API acts as the remote WebRTC endpoint, receiving audio stream data from the local browser, processing it, and returning appropriate data or audio feedback.
42 |
43 | ---
44 |
45 | # 4. Configuration
46 | 1. Configure backend port, apiKey
47 | 2. Configure frontend link to the backend baseUrl
48 |
--------------------------------------------------------------------------------
/java_backend/src/main/java/org/example/wep/WebRTCWithEphemeralKeyController.java:
--------------------------------------------------------------------------------
1 | package org.example.wep;
2 |
3 | import org.springframework.beans.factory.annotation.Value;
4 | import org.springframework.http.*;
5 | import org.springframework.web.bind.annotation.*;
6 | import org.springframework.web.client.RestTemplate;
7 | import org.springframework.web.util.UriComponentsBuilder;
8 |
9 | @RestController
10 | @RequestMapping("/api")
11 | public class WebRTCWithEphemeralKeyController {
12 |
13 | @Value("${apiKey}")
14 | private String openaiApiKey;
15 |
16 | private static final String OPENAI_SESSION_URL = "https://api.openai.com/v1/realtime/sessions";
17 | private static final String OPENAI_API_URL = "https://api.openai.com/v1/realtime"; // May vary based on requirements
18 | private static final String MODEL_ID = "gpt-4o-realtime-preview-2024-12-17";
19 | private static final String VOICE = "ash"; // Or other voices
20 | private static final String DEFAULT_INSTRUCTIONS = "You are helpful and have some tools installed.\n\nIn the tools you have the ability to control a robot hand.";
21 |
22 | /**
23 | * RTC connection endpoint for handling WebRTC SDP exchange and generating/using ephemeral tokens.
24 | */
25 | @CrossOrigin(origins = "*") // Adjust allowed origins as needed to enhance security
26 | @PostMapping("/rtc-connect")
27 | public ResponseEntity connectRTC(@RequestBody String clientSdp) {
28 | RestTemplate restTemplate = new RestTemplate();
29 |
30 | // Step 1: Generate ephemeral API token
31 | HttpHeaders tokenHeaders = new HttpHeaders();
32 | tokenHeaders.set("Authorization", "Bearer " + openaiApiKey);
33 | tokenHeaders.setContentType(MediaType.APPLICATION_JSON);
34 |
35 | String tokenRequestBody = String.format("{\"model\": \"%s\", \"voice\": \"%s\"}", MODEL_ID, VOICE);
36 |
37 | HttpEntity tokenRequestEntity = new HttpEntity<>(tokenRequestBody, tokenHeaders);
38 |
39 | ResponseEntity tokenResponse = restTemplate.exchange(
40 | OPENAI_SESSION_URL,
41 | HttpMethod.POST,
42 | tokenRequestEntity,
43 | String.class
44 | );
45 |
46 | if (!tokenResponse.getStatusCode().is2xxSuccessful()) {
47 | throw new RuntimeException("Failed to obtain ephemeral token, status code: " + tokenResponse.getStatusCode());
48 | }
49 |
50 | // Assuming the returned JSON contains a `client_secret.value` field as the ephemeral token
51 | // Needs to be parsed based on the actual response structure
52 | String ephemeralToken;
53 | try {
54 | // Use Jackson or another JSON parsing library to parse the response
55 | // Here, assuming Jackson is used
56 | com.fasterxml.jackson.databind.ObjectMapper mapper = new com.fasterxml.jackson.databind.ObjectMapper();
57 | com.fasterxml.jackson.databind.JsonNode root = mapper.readTree(tokenResponse.getBody());
58 | ephemeralToken = root.path("client_secret").path("value").asText();
59 |
60 | if (ephemeralToken == null || ephemeralToken.isEmpty()) {
61 | throw new RuntimeException("Ephemeral token is empty");
62 | }
63 | } catch (Exception e) {
64 | throw new RuntimeException("Failed to parse ephemeral token: " + e.getMessage());
65 | }
66 |
67 | // Step 2: Use the ephemeral token to perform SDP exchange with OpenAI's Realtime API
68 | // Build the URL
69 | String url = UriComponentsBuilder.fromHttpUrl(OPENAI_API_URL)
70 | .queryParam("model", MODEL_ID)
71 | .queryParam("instructions", DEFAULT_INSTRUCTIONS)
72 | .queryParam("voice", VOICE)
73 | .toUriString();
74 |
75 | // Set request headers
76 | HttpHeaders sdpHeaders = new HttpHeaders();
77 | sdpHeaders.set("Authorization", "Bearer " + ephemeralToken);
78 | sdpHeaders.setContentType(MediaType.parseMediaType("application/sdp"));
79 |
80 | // Create the request entity
81 | HttpEntity sdpRequestEntity = new HttpEntity<>(clientSdp, sdpHeaders);
82 |
83 | // Send SDP to OpenAI Realtime API
84 | ResponseEntity sdpResponse = restTemplate.exchange(
85 | url,
86 | HttpMethod.POST,
87 | sdpRequestEntity,
88 | String.class
89 | );
90 |
91 | if (!sdpResponse.getStatusCode().is2xxSuccessful()) {
92 | throw new RuntimeException("OpenAI API SDP exchange error, status code: " + sdpResponse.getStatusCode());
93 | }
94 |
95 | // Return OpenAI's SDP response to the client
96 | return ResponseEntity.ok()
97 | .contentType(MediaType.parseMediaType("application/sdp"))
98 | .body(sdpResponse.getBody());
99 | }
100 |
101 | }
102 |
--------------------------------------------------------------------------------
/python_backend/WebRTCWithEphemeralKey.py:
--------------------------------------------------------------------------------
1 | from flask import Flask, request, Response
2 | from flask_cors import CORS
3 | import requests
4 | import os
5 | import json
6 | import logging
7 | from dotenv import load_dotenv
8 |
9 | # Load environment variables from .env file (optional)
10 | load_dotenv()
11 |
12 | app = Flask(__name__)
13 |
14 | # Configure CORS to allow all origins (adjust in production for better security)
15 | CORS(app)
16 |
17 | # Configure logging
18 | logging.basicConfig(level=logging.INFO)
19 | logger = logging.getLogger(__name__)
20 |
21 | # Configuration
22 | OPENAI_API_KEY = os.environ.get('OPENAI_API_KEY')
23 | if not OPENAI_API_KEY:
24 | logger.error("OPENAI_API_KEY environment variable not set.")
25 | raise EnvironmentError("OPENAI_API_KEY environment variable not set.")
26 |
27 | OPENAI_SESSION_URL = "https://api.openai.com/v1/realtime/sessions"
28 | OPENAI_API_URL = "https://api.openai.com/v1/realtime" # May vary based on requirements
29 | MODEL_ID = "gpt-4o-realtime-preview-2024-12-17"
30 | VOICE = "ash" # Or other voices
31 | DEFAULT_INSTRUCTIONS = "You are helpful and have some tools installed.\n\nIn the tools you have the ability to control a robot hand."
32 |
33 | @app.route('/')
34 | def home():
35 | return "Flask API is running!"
36 |
37 | @app.route('/api/rtc-connect', methods=['POST'])
38 | def connect_rtc():
39 | """
40 | RTC connection endpoint for handling WebRTC SDP exchange and generating/using ephemeral tokens.
41 | """
42 | try:
43 | # Step 1: Retrieve the client's SDP from the request body
44 | client_sdp = request.get_data(as_text=True)
45 | if not client_sdp:
46 | logger.error("No SDP provided in the request body.")
47 | return Response("No SDP provided in the request body.", status=400)
48 |
49 | logger.info("Received SDP from client.")
50 |
51 | # Step 2: Generate ephemeral API token
52 | token_headers = {
53 | "Authorization": f"Bearer {OPENAI_API_KEY}",
54 | "Content-Type": "application/json"
55 | }
56 | token_payload = {
57 | "model": MODEL_ID,
58 | "voice": VOICE
59 | }
60 |
61 | logger.info("Requesting ephemeral token from OpenAI.")
62 |
63 | token_response = requests.post(OPENAI_SESSION_URL, headers=token_headers, json=token_payload)
64 |
65 | if not token_response.ok:
66 | logger.error(f"Failed to obtain ephemeral token, status code: {token_response.status_code}, response: {token_response.text}")
67 | return Response(f"Failed to obtain ephemeral token, status code: {token_response.status_code}", status=500)
68 |
69 | token_data = token_response.json()
70 | # Adjust the path based on the actual response structure
71 | # Assuming the ephemeral token is located at `client_secret.value`
72 | ephemeral_token = token_data.get('client_secret', {}).get('value', '')
73 |
74 | if not ephemeral_token:
75 | logger.error("Ephemeral token is empty or not found in the response.")
76 | return Response("Ephemeral token is empty or not found in the response.", status=500)
77 |
78 | logger.info("Ephemeral token obtained successfully.")
79 |
80 | # Step 3: Perform SDP exchange with OpenAI's Realtime API using the ephemeral token
81 | sdp_headers = {
82 | "Authorization": f"Bearer {ephemeral_token}",
83 | "Content-Type": "application/sdp"
84 | }
85 | sdp_params = {
86 | "model": MODEL_ID,
87 | "instructions": DEFAULT_INSTRUCTIONS,
88 | "voice": VOICE
89 | }
90 |
91 | # Build the full URL with query parameters
92 | sdp_url = requests.Request('POST', OPENAI_API_URL, params=sdp_params).prepare().url
93 |
94 | logger.info(f"Sending SDP to OpenAI Realtime API at {sdp_url}")
95 |
96 | sdp_response = requests.post(sdp_url, headers=sdp_headers, data=client_sdp)
97 |
98 | if not sdp_response.ok:
99 | logger.error(f"OpenAI API SDP exchange error, status code: {sdp_response.status_code}, response: {sdp_response.text}")
100 | return Response(f"OpenAI API SDP exchange error, status code: {sdp_response.status_code}", status=500)
101 |
102 | logger.info("SDP exchange with OpenAI completed successfully.")
103 |
104 | # Step 4: Return OpenAI's SDP response to the client with the correct content type
105 | return Response(
106 | response=sdp_response.content,
107 | status=200,
108 | mimetype='application/sdp'
109 | )
110 |
111 | except Exception as e:
112 | logger.exception("An error occurred during the RTC connection process.")
113 | return Response(f"An error occurred: {str(e)}", status=500)
114 |
115 | if __name__ == '__main__':
116 | # Ensure the server runs on port 8813
117 | app.run(debug=True, port=8813)
118 |
--------------------------------------------------------------------------------
/front/script.js:
--------------------------------------------------------------------------------
1 | // Set the basic API address for communication with the backend server
2 | const baseUrl = "http://107.204.228.201:8813";
3 | // Flag indicating whether WebRTC is active, controls the enabling and disabling of connections
4 | let isWebRTCActive = false;
5 | // Create variables related to the WebRTC connection
6 | let peerConnection;
7 | let dataChannel;
8 | // Define an object that contains multiple functions; methods in fns will be called
9 | const fns = {
10 | // Get the HTML content of the current page
11 | getPageHTML: () => {
12 | return {
13 | success: true,
14 | html: document.documentElement.outerHTML
15 | }; // Return the entire page's HTML
16 | },
17 | // Change the background color of the webpage
18 | changeBackgroundColor: ({ color }) => {
19 | document.body.style.backgroundColor = color; // Change the page's background color
20 | return { success: true, color }; // Return the changed color
21 | },
22 | // Change the text color of the webpage
23 | changeTextColor: ({ color }) => {
24 | document.body.style.color = color; // Change the page's text color
25 | return { success: true, color }; // Return the changed color
26 | },
27 | // Change the button's style (size and color)
28 | changeButtonStyle: ({ size, color }) => {
29 | const button = document.querySelector('button'); // Get the first button on the page (modify selector if there are multiple buttons)
30 | if (button) {
31 | // Change the button's size
32 | if (size) {
33 | button.style.fontSize = size; // Set font size
34 | }
35 | // Change the button's color
36 | if (color) {
37 | button.style.backgroundColor = color; // Set button background color
38 | }
39 | return { success: true, size, color }; // Return modified button style
40 | } else {
41 | return { success: false, message: 'Button element not found' }; // Return failure if no button is found
42 | }
43 | },
44 | };
45 |
46 | // When an audio stream is received, add it to the page and play it
47 | function handleTrack(event) {
48 | const el = document.createElement('audio'); // Create an audio element
49 | el.srcObject = event.streams[0]; // Set the audio stream as the element's source
50 | el.autoplay = el.controls = true; // Autoplay and display audio controls
51 | document.body.appendChild(el); // Add the audio element to the page
52 | }
53 |
54 | // Create a data channel for transmitting control messages (such as function calls)
55 | function createDataChannel() {
56 | // Create a data channel named 'response'
57 | dataChannel = peerConnection.createDataChannel('response');
58 | // Configure data channel events
59 | dataChannel.addEventListener('open', () => {
60 | console.log('Data channel opened');
61 | configureData(); // Configure data channel functions
62 | });
63 | dataChannel.addEventListener('message', async (ev) => {
64 | const msg = JSON.parse(ev.data); // Parse the received message
65 | // If the message type is 'response.function_call_arguments.done', it indicates a function call request
66 | if (msg.type === 'response.function_call_arguments.done') {
67 | const fn = fns[msg.name]; // Get the corresponding function by name
68 | if (fn !== undefined) {
69 | console.log(`Calling local function ${msg.name}, parameters ${msg.arguments}`);
70 | const args = JSON.parse(msg.arguments); // Parse function parameters
71 | const result = await fn(args); // Call the local function and wait for the result
72 | console.log('Result', result); // Log the result of the function
73 | // Send the result of the function execution back to the other party
74 | const event = {
75 | type: 'conversation.item.create', // Create conversation item event
76 | item: {
77 | type: 'function_call_output', // Function call output
78 | call_id: msg.call_id, // Passed call_id
79 | output: JSON.stringify(result), // JSON string of the function execution result
80 | },
81 | };
82 | dataChannel.send(JSON.stringify(event)); // Send the result back to the remote side
83 | }
84 | }
85 | });
86 | }
87 |
88 | // Configure data channel functions and tools
89 | function configureData() {
90 | console.log('Configuring data channel');
91 | const event = {
92 | type: 'session.update', // Session update event
93 | session: {
94 | modalities: ['text', 'audio'], // Supported interaction modes: text and audio
95 | // Provide functional tools, pay attention to the names of these tools corresponding to the keys in the above fns object
96 | tools: [
97 | {
98 | type: 'function', // Tool type is function
99 | name: 'changeBackgroundColor', // Function name
100 | description: 'Change the background color of the webpage', // Description
101 | parameters: { // Parameter description
102 | type: 'object',
103 | properties: {
104 | color: {
105 | type: 'string',
106 | description: 'Hexadecimal value of the color'
107 | }, // Color parameter
108 | },
109 | },
110 | },
111 | {
112 | type: 'function',
113 | name: 'changeTextColor',
114 | description: 'Change the text color of the webpage',
115 | parameters: {
116 | type: 'object',
117 | properties: {
118 | color: {
119 | type: 'string',
120 | description: 'Hexadecimal value of the color'
121 | },
122 | },
123 | },
124 | },
125 | {
126 | type: 'function',
127 | name: 'getPageHTML',
128 | description: 'Get the HTML content of the current page',
129 | },
130 | {
131 | type: 'function', // Tool type is function
132 | name: 'changeButtonStyle', // New function name
133 | description: 'Change the size and color of the button', // Description
134 | parameters: { // Parameter description
135 | type: 'object',
136 | properties: {
137 | size: {
138 | type: 'string',
139 | description: 'Font size of the button (e.g., "16px" or "1em")'
140 | }, // Button size
141 | color: {
142 | type: 'string',
143 | description: 'Background color of the button (e.g., "#ff0000" or "red")'
144 | }, // Button color
145 | },
146 | },
147 | },
148 | ],
149 | },
150 | };
151 | dataChannel.send(JSON.stringify(event)); // Send the configured event data
152 | }
153 |
154 | // Get the control button element
155 | const toggleButton = document.getElementById('toggleWebRTCButton');
156 | // Add a click event listener to the button to toggle the WebRTC connection state
157 | toggleButton.addEventListener('click', () => {
158 | // If WebRTC is active, stop the connection; otherwise, start WebRTC
159 | if (isWebRTCActive) {
160 | stopWebRTC(); // Stop WebRTC
161 | toggleButton.textContent = 'start'; // Update button text
162 | } else {
163 | startWebRTC(); // Start WebRTC
164 | toggleButton.textContent = 'stop'; // Update button text
165 | }
166 | });
167 |
168 | // Capture microphone input stream and initiate WebRTC connection
169 | function startWebRTC() {
170 | // If WebRTC is already active, return directly
171 | if (isWebRTCActive) return;
172 | // Create a new peerConnection object to establish a WebRTC connection
173 | peerConnection = new RTCPeerConnection();
174 | peerConnection.ontrack = handleTrack; // Bind audio stream processing function
175 | createDataChannel(); // Create data channel
176 | // Request user's audio stream
177 | navigator.mediaDevices.getUserMedia({ audio: true }).then((stream) => {
178 | // Add each track from the audio stream to the peerConnection
179 | stream.getTracks().forEach((track) => peerConnection.addTransceiver(track, { direction: 'sendrecv' }));
180 | // Create an offer for the local connection
181 | peerConnection.createOffer().then((offer) => {
182 | peerConnection.setLocalDescription(offer); // Set local description (offer)
183 | // Send the offer to the backend for signaling exchange
184 | fetch(baseUrl + '/api/rtc-connect', {
185 | method: 'POST',
186 | body: offer.sdp, // Send the SDP of the offer to the backend
187 | headers: {
188 | 'Content-Type': 'application/sdp',
189 | },
190 | })
191 | .then((r) => r.text())
192 | .then((answer) => {
193 | // Get the answer returned by the backend and set it as the remote description
194 | peerConnection.setRemoteDescription({ sdp: answer, type: 'answer' });
195 | });
196 | });
197 | });
198 | // Mark WebRTC as active
199 | isWebRTCActive = true;
200 | }
201 |
202 | // Stop the WebRTC connection and clean up all resources
203 | function stopWebRTC() {
204 | // If WebRTC is not active, return directly
205 | if (!isWebRTCActive) return;
206 | // Stop the received audio tracks
207 | const tracks = peerConnection.getReceivers().map(receiver => receiver.track);
208 | tracks.forEach(track => track.stop());
209 | // Close the data channel and WebRTC connection
210 | if (dataChannel) dataChannel.close();
211 | if (peerConnection) peerConnection.close();
212 | // Reset connection and channel objects
213 | peerConnection = null;
214 | dataChannel = null;
215 | // Mark WebRTC as not active
216 | isWebRTCActive = false;
217 | }
218 |
--------------------------------------------------------------------------------