├── .gitignore
├── 0001-feat-disable-volc-esp-libs.patch
├── LICENSE.txt
├── README.md
├── client
    └── espressif
    │   └── esp32s3_demo
    │       ├── CMakeLists.txt
    │       ├── components
    │           ├── README.md
    │           └── VolcEngineRTCLite
    │           │   ├── CMakeLists.txt
    │           │   ├── include
    │           │       ├── PLACEHOLDER
    │           │       └── VolcEngineRTCLite.h
    │           │   └── libs
    │           │       └── esp32s3
    │           │           ├── LICENSE
    │           │           ├── PLACEHOLDER
    │           │           └── libVolcEngineRTCLite.a
    │       ├── main
    │           ├── AudioPipeline.c
    │           ├── AudioPipeline.h
    │           ├── CMakeLists.txt
    │           ├── Kconfig.projbuild
    │           ├── RtcBotUtils.c
    │           ├── RtcBotUtils.h
    │           ├── RtcHttpUtils.c
    │           ├── RtcHttpUtils.h
    │           └── VolcRTCDemo.c
    │       ├── partitions.csv
    │       ├── sdkconfig
    │       ├── sdkconfig.defaults
    │       └── sdkconfig.defaults.esp32s3
├── resource
    └── image
    │   └── tech_support.png
└── server
    └── src
        ├── AccessToken.py
        ├── README.md
        ├── RtcAigcConfig.py
        ├── RtcAigcService.py
        └── RtcApiRequester.py


/.gitignore:
--------------------------------------------------------------------------------
1 | __pycache__


--------------------------------------------------------------------------------
/0001-feat-disable-volc-esp-libs.patch:
--------------------------------------------------------------------------------
 1 | From 6c66cfb0606ed9efe7fc72495a8293aec86e6bf5 Mon Sep 17 00:00:00 2001
 2 | From: yuhuawei <yuhuawei.123@bytedance.com>
 3 | Date: Sat, 24 May 2025 22:53:54 +0800
 4 | Subject: [PATCH] feat: disable volc esp libs
 5 | 
 6 | ---
 7 |  components/clouds/CMakeLists.txt | 9 +++++----
 8 |  1 file changed, 5 insertions(+), 4 deletions(-)
 9 | 
10 | diff --git a/components/clouds/CMakeLists.txt b/components/clouds/CMakeLists.txt
11 | index ad734a1c..30efc462 100644
12 | --- a/components/clouds/CMakeLists.txt
13 | +++ b/components/clouds/CMakeLists.txt
14 | @@ -3,7 +3,8 @@ if (DEFINED ENV{DUER_PATH})
15 |      return()
16 |  endif()
17 |  
18 | -set(COMPONENT_ADD_INCLUDEDIRS ./dueros/lightduer/include ./volc_engine_rtc/include)
19 | +# set(COMPONENT_ADD_INCLUDEDIRS ./dueros/lightduer/include ./volc_engine_rtc/include)
20 | +set(COMPONENT_ADD_INCLUDEDIRS ./dueros/lightduer/include)
21 |  
22 |  # Edit following two lines to set component requirements (see docs)
23 |  set(COMPONENT_SRCS)
24 | @@ -14,9 +15,9 @@ target_link_libraries(${COMPONENT_TARGET} INTERFACE "-L${CMAKE_CURRENT_LIST_DIR}
25 |  
26 |  IF ((CONFIG_IDF_TARGET STREQUAL "esp32s3") OR (CONFIG_IDF_TARGET STREQUAL "esp32") OR (CONFIG_IDF_TARGET STREQUAL "esp32c5"))
27 |  IF (IDF_VERSION_MAJOR EQUAL 5)
28 | -add_prebuilt_library(VolcEngineRTCLite "${CMAKE_CURRENT_LIST_DIR}/volc_engine_rtc/libs/${CONFIG_IDF_TARGET}/libVolcEngineRTCLite.a"
29 | -                    REQUIRES mbedtls espressif__zlib json lwip)
30 | -target_link_libraries(${COMPONENT_LIB} INTERFACE VolcEngineRTCLite)
31 | +# add_prebuilt_library(VolcEngineRTCLite "${CMAKE_CURRENT_LIST_DIR}/volc_engine_rtc/libs/${CONFIG_IDF_TARGET}/libVolcEngineRTCLite.a"
32 | +#                     REQUIRES mbedtls espressif__zlib json lwip)
33 | +# target_link_libraries(${COMPONENT_LIB} INTERFACE VolcEngineRTCLite)
34 |  ENDIF()
35 |  ENDIF()
36 |   
37 | -- 
38 | 2.44.0
39 | 
40 | 


--------------------------------------------------------------------------------
/LICENSE.txt:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (2025) Beijing Volcano Engine Technology Ltd.
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | <h1 align="center"><img src="https://iam.volccdn.com/obj/volcengine-public/pic/volcengine-icon.png"></h1>
  2 | <h1 align="center">IoT RTC AIGC Demo</h1>
  3 | 欢迎使用IoT RTC AIGC Demo，本文档为您介绍如何使用本Demo。
  4 | 
  5 | 
  6 | ## 快速入门
  7 | 
  8 | ### 前置准备
  9 | - Linux服务器，且开发环境满足Python 3.8及以上版本。
 10 | - 乐鑫 ESP32-S3-Korvo-2 或 AtomS3R 开发板。
 11 | - 参考如下流程开通硬件服务。
 12 |   - 开通火山引擎实时音视频、语音识别、音频合成、火山方舟大模型服务。参看[开通服务](https://www.volcengine.com/docs/6348/1315561)开通相关产品、配置角色策略并获取以下参数值：
 13 |     - 火山引擎 AK
 14 |     - 火山引擎 SK
 15 |     - 实时音视频应用 APPID
 16 |     - 实时音视频应用 APPKEY
 17 |     - 语音技术-语音识别-流式语音识别 APPID
 18 |     - 语音技术-音频生成-语音合成 APPID
 19 |     - 语音技术-音频生成-语音合成 Voice_type
 20 |     - 火山方舟大模型 EndPointId
 21 |   - 配置不同权限账号调用智能体, [创建角色](https://www.volcengine.com/docs/6348/1315561)
 22 |   - [启用硬件场景配置](https://console.volcengine.com/rtc/aigc/cloudRTC)，并使用相应的房间规则
 23 | 
 24 | ### 运行服务端
 25 | 
 26 | > 服务端示例仅供开发者快速体验和演示，请勿在生产环境中使用。生产环境的服务端需要你自行开发。
 27 | 
 28 | 
 29 | #### 硬件要求
 30 | 
 31 | - PC服务器（Linux 建议使用 ubuntu18.04 及以上版本， 服务端示例程序在 Windows 11 python 3.12, MacOs python 3.9, Ubuntu 24.04 python 3.12实测可以正常运行）
 32 | 
 33 | #### 安装服务依赖
 34 | 
 35 | 
 36 | ```shell
 37 | pip install requests
 38 | ```
 39 | 
 40 | #### 下载并配置工程
 41 | 
 42 | 1. 克隆实时对话式 AI 硬件 Demo 示例
 43 | 
 44 | 
 45 |     ```shell
 46 |     git clone https://github.com/volcengine/rtc-aigc-embedded-demo.git
 47 |     ```
 48 | 
 49 | 2. 进入服务端 Demo 目录
 50 | 
 51 | 
 52 |     ```shell
 53 |     cd rtc-aigc-embedded-demo/server/src
 54 |     ```
 55 | 
 56 | 3. 设置配置文件
 57 | 
 58 |     进入服务端配置文件 `rtc-aigc-embedded-demo/server/src/RtcAigcConfig.py`，设置如下参数
 59 | 
 60 | 
 61 |     ```python
 62 |     # 鉴权 AK/SK。前往 https://console.volcengine.com/iam/keymanage 获取
 63 |     SK = "WmpCbVl6Y3hOR1JrT************1tTTRZalF4WW1FeE56WQ=="
 64 |     AK = "AKLTNWQyODQ1MDM5Y***********WRmM2Y2NTJlMTQyZjI"
 65 | 
 66 |     # 实时音视频 App ID。前往 https://console.volcengine.com/rtc/listRTC 获取或创建
 67 |     CONFIG_RTC_APPID = "67582ac8******0174410bd1"
 68 |     # 实时音视频 APP KEY。前往 https://console.volcengine.com/rtc/listRTC 获取
 69 |     RTC_APP_KEY = "1a6a03723c******222ada877ee13b"
 70 | 
 71 |     # 大模型推理接入点 EndPointId 前往 https://console.volcengine.com/ark/region:ark+cn-beijing/endpoint?config=%7B%7D 创建
 72 |     DEFAULT_END_POINT_ID = "ep-2025******160517-hlnzt"
 73 |     # 音频生成-语音合成 Voice_type，前往 https://console.volcengine.com/speech/service/8 获取
 74 |     DEFAULT_VOICE_TYPE = "BV007_******ming"
 75 | 
 76 |     # 语音识别-流式语音识别 APPID 前往 https://console.volcengine.com/speech/service/16 获取
 77 |     ASR_APP_ID = "884***621"
 78 |     # 语音识别-流式语音识别 ACCESS TOKEN 前往 https://console.volcengine.com/speech/service/16 获取
 79 |     ASR_ACCESS_TOKEN = "M_X6X***BeXa1"
 80 | 
 81 |     # 音频生成-语音合成 APPID，前往 https://console.volcengine.com/speech/service/8 获取
 82 |     TTS_APP_ID = "884***9621"
 83 |     # 音频生成-语音合成 ACCESS TOKEN，前往 https://console.volcengine.com/speech/service/8 获取
 84 |     TTS_ACCESS_TOKEN = "M_X6X***BeXa1"
 85 | 
 86 |     # 服务端监听端口号,你可以根据实际业务需求设置端口号
 87 |     PORT = 8080
 88 |     ```
 89 | 
 90 | #### 运行服务
 91 | 
 92 | 在 `rtc-aigc-embedded-demo/server/src`目录下运行服务
 93 | 
 94 | 
 95 | ```python
 96 | python3 RtcAigcService.py
 97 | ```
 98 | 
 99 | ### 运行设备端
100 | 
101 | 本文以 Mac 操作系统为例。
102 | 
103 | #### 硬件要求
104 | 
105 | - 乐鑫 ESP32-S3-Korvo-2 开发板。
106 | 
107 | - USB数据线（两条 A 转Micro-B 数据线，一条作为电源线，一条作为串口线）。
108 | 
109 | - PC（Windows、Linux 或者 macOS）。
110 | 
111 | #### 乐鑫环境配置
112 | 详见[开发环境配置文档](https://docs.espressif.com/projects/esp-idf/zh_CN/stable/esp32s3/get-started/index.html)
113 | 1. 安装 CMake 和 Ninja 编译工具
114 | 
115 |     ```shell
116 |     brew install cmake ninja dfu-util
117 |     ```
118 | 
119 | 2. 将 乐鑫 ADF 框架克隆到本地，并同步各子仓（submodule）代码
120 | > **注意：** demo 中使用的 ADF 版本为 [eca11f20e56f9b5321b714da4305e123672d92a9], 对应 IDF 版本为 [v5.4], 请确保 ADF 版本与 IDF 版本匹配。
121 |     1. clone 乐鑫ADF 框架
122 | 
123 |     ```shell
124 |     git clone https://github.com/espressif/esp-adf.git // cloneADF框架
125 |     ```
126 |     2. 进入esp-adf目录
127 | 
128 |     ```shell
129 |     cd esp-adf
130 |     ```
131 |     3. 切换到乐鑫ADF指定版本
132 |     ```shell
133 |     git reset --hard eca11f20e56f9b5321b714da4305e123672d92a9
134 |     ```
135 |     4. 同步各子仓代码
136 | 
137 |     ```shell
138 |     git submodule update --init --recursive
139 |     ```
140 | 
141 | 3. 安装乐鑫 esp32s3 开发环境相关依赖
142 | 
143 |     ```shell
144 |     ./install.sh esp32s3
145 |     ```
146 | 
147 |     成功安装所有依赖后，命令行会出现如下提示
148 | 
149 |     ```shell
150 |     All done! You can now run:
151 |     . ./export.sh
152 |     ```
153 | 
154 |     > 对于 macOS 用户，如在上述任何步骤中遇到以下错误:
155 |     >
156 |     > `<urlopen error [SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: unable to get local issuer certificate (_ssl.c:xxx)`
157 |     >
158 |     > 可前往访达->应用程序->Python 文件夹，点击`Install Certificates.command` 安装证书。了解更多信息，请参考 <a target="_blank" href="https://github.com/espressif/esp-idf/issues/4775">安装 ESP-IDF 工具时出现的下载错误</a>。
159 | 
160 | 
161 | 4. 设置环境变量
162 | 
163 |     > **每次打开命令行窗口均需要运行该命令进行设置**
164 | 
165 |     ```shell
166 |     . ./export.sh
167 |     ```
168 | 
169 | #### 下载并配置工程
170 | 
171 | 1. 将实时对话式 AI 硬件示例工程 clone 到 乐鑫 ADF examples 目录下
172 |     1. 进入 esp-adf/examples 目录
173 | 
174 |     ```shell
175 |     cd $ADF_PATH/examples
176 |     ```
177 |     1. clone 实时对话式 AI 硬件示例工程
178 | 
179 |     ```shell
180 |     git clone https://github.com/volcengine/rtc-aigc-embedded-demo.git 
181 |     ```
182 | 
183 | 2. 禁用乐鑫工程中的火山组件
184 |     1. 进入 esp-adf 目录
185 | 
186 |     ```shell
187 |     cd $ADF_PATH
188 |     ```
189 |     2. 禁用乐鑫工程中的火山组件
190 | 
191 |     ```shell
192 |     git apply $ADF_PATH/examples/rtc-aigc-embedded-demo/0001-feat-disable-volc-esp-libs.patch
193 |     ```
194 | 
195 | #### 编译固件
196 | 
197 | 1. 进入`esp-adf/examples/rtc-aigc-embedded-demo/client/espressif/esp32s3_demo` 目录下编译固件
198 |     1. 进入 esp32s3_demo 目录
199 | 
200 |     ```shell
201 |     cd $ADF_PATH/examples/rtc-aigc-embedded-demo/client/espressif/esp32s3_demo
202 |     ```
203 |     2. 设置编译目标平台
204 | 
205 |     ```shell
206 |     idf.py set-target esp32s3
207 |     ```
208 |     3. 设置WIFI账号密码、RTC APPID、服务端地址和端口号
209 |     ```shell
210 |     idf.py menuconfig
211 |     ```
212 |     进入 `Example Configuration` 菜单，在 `WiFi SSID` 及 `WiFi Password` 中填入你的 WIFI 账号和密码，在 `RTC APPID` 中填入你的 RTC APPID (前往 https://console.volcengine.com/rtc/listRTC 获取)，在 `AIGENT Server Host` 中填入你的服务端地址和端口号，并保存。
213 | 
214 |     4. 设置开发板型号
215 |     ```shell
216 |     idf.py menuconfig
217 |     ```
218 |     进入 `Audio HAL` 菜单，在 `Audio board` 中选择你的开发板型号。(例如: 方舟开发板选择 `M5STACK-ATOMS3R`)，并保存。
219 | 
220 |     5. 编译固件
221 | 
222 |     ```shell
223 |     idf.py build
224 |     ```
225 | 
226 | #### 烧录并运行示例 Demo
227 | 
228 | 1. 打开乐鑫开发板电源开关
229 | 
230 | 2. 烧录固件
231 | 
232 | 
233 |     ```shell
234 |     idf.py flash
235 |     ```
236 | 
237 | 3. 运行示例 Demo 并查看串口日志输出
238 | 
239 | 
240 |     ```shell
241 |     idf.py monitor
242 |     ```
243 | ## 进阶阅读
244 | [服务端示例接口说明](server/src/README.md)
245 | 
246 | ## 技术交流
247 |  欢迎加入我们的技术交流群或提出Issue，一起探讨技术，一起学习进步。
248 | <div align=center><img src="resource/image/tech_support.png" width="200"></div>


--------------------------------------------------------------------------------
/client/espressif/esp32s3_demo/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | # For more information about build system see
 2 | # https://docs.espressif.com/projects/esp-idf/en/latest/api-guides/build-system.html
 3 | # The following five lines of boilerplate have to be in your project's
 4 | # CMakeLists in this exact order for cmake to work correctly
 5 | cmake_minimum_required(VERSION 3.5)
 6 | 
 7 | # include($ENV{IDF_PATH}/tools/cmake/project.cmake)
 8 | 
 9 | include($ENV{ADF_PATH}/CMakeLists.txt)
10 | include($ENV{IDF_PATH}/tools/cmake/project.cmake)
11 | project(VolcRTCDemo)
12 | 
13 | # add_compile_definitions(LWIP_DEBUG=1)
14 | 


--------------------------------------------------------------------------------
/client/espressif/esp32s3_demo/components/README.md:
--------------------------------------------------------------------------------
1 | 此文件夹中需要放置VolcEngineRTCLite库
2 | 


--------------------------------------------------------------------------------
/client/espressif/esp32s3_demo/components/VolcEngineRTCLite/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | idf_component_register(
2 |     INCLUDE_DIRS "include"
3 | )
4 | 
5 | add_prebuilt_library(VolcEngineRTCLite "${CMAKE_CURRENT_SOURCE_DIR}/libs/${CONFIG_IDF_TARGET}/libVolcEngineRTCLite.a"
6 |                     REQUIRES mbedtls espressif__zlib json lwip)
7 | target_link_libraries(${COMPONENT_LIB} INTERFACE VolcEngineRTCLite)
8 | 


--------------------------------------------------------------------------------
/client/espressif/esp32s3_demo/components/VolcEngineRTCLite/include/PLACEHOLDER:
--------------------------------------------------------------------------------
1 | 放置VolcEngineRTCLite.h
2 | 


--------------------------------------------------------------------------------
/client/espressif/esp32s3_demo/components/VolcEngineRTCLite/include/VolcEngineRTCLite.h:
--------------------------------------------------------------------------------
   1 | /*
   2 |  * Copyright (c) 2024 The VolcEngineRTCLite project authors. All Rights Reserved.
   3 |  * @brief VolcEngineRTCLite Interface Lite
   4 |  */
   5 | 
   6 | #ifndef __BYTE_RTC_API_H__
   7 | #define __BYTE_RTC_API_H__
   8 | 
   9 | #include <stdint.h>
  10 | #include <stddef.h>
  11 | #include <stdbool.h>
  12 | 
  13 | #ifdef __cplusplus
  14 | extern "C" {
  15 | #endif
  16 | 
  17 | #if defined(_MSC_VER)
  18 | #if defined(__BUILDING_BYTE_RTC_SDK__)
  19 | #define __byte_rtc_api__ __declspec(dllexport)
  20 | #else
  21 | #define __byte_rtc_api__ __declspec(dllimport)
  22 | #endif
  23 | #else
  24 | #define __byte_rtc_api__ __attribute__((visibility("default")))
  25 | #endif
  26 | 
  27 | #define BYTE_RTC_API_VERSION "1.0.3"
  28 | #define BYTE_RTC_API_VERSION_NUM 0x1003
  29 | 
  30 | 
  31 | /**
  32 |  * @locale zh
  33 |  * @type errorcode
  34 |  * @brief 回调错误码。  <br>
  35 |  *        SDK 内部遇到不可恢复的错误时，会通过 `on_global_error` 或 `on_room_error` 回调通知用户。
  36 |  */
  37 | 
  38 | typedef enum {
  39 |     /**
  40 |      * @locale zh
  41 |      * @brief Token 无效。
  42 |      *        加入房间时使用的 Token 无效或过期失效。需要用户重新获取 Token，并调用 `byte_rtc_renew_token` 方法更新 Token。
  43 |      */
  44 |     ERR_INVALID_TOKEN = -1000,
  45 |     /**
  46 |      * @locale zh
  47 |      * @brief 加入房间错误。
  48 |      *        加入房间时发生未知错误导致加入房间失败。需要用户重新加入房间。
  49 |      */
  50 |     ERR_JOIN_ROOM = -1001,
  51 |     /**
  52 |      * @locale zh
  53 |      * @brief 没有发布音视频流权限。
  54 |      *        用户在所在房间中发布音视频流失败，失败原因为用户没有发布流的权限。
  55 |      */
  56 |     ERR_NO_PUBLISH_PERMISSION = -1002,
  57 |     /**
  58 |      * @locale zh
  59 |      * @brief 没有订阅音视频流权限。
  60 |      *        用户订阅所在房间中的音视频流失败，失败原因为用户没有订阅流的权限。
  61 |      */
  62 |     ERR_NO_SUBSCRIBE_PERMISSION = -1003,
  63 |     /**
  64 |      * @locale zh
  65 |      * @brief 相同用户 ID 的用户加入本房间，当前用户被踢出房间
  66 |      */
  67 |     ERR_NO_DUPLICATE_LOGIN = -1004,
  68 |     /**
  69 |      * @locale zh
  70 |      * @brief 服务端调用 OpenAPI 将当前用户踢出房间
  71 |      */
  72 |     ERR_KICKED_OUT = -1006,
  73 |     /**
  74 |      * @locale zh
  75 |      * @brief 当调用 `byte_rtc_join_room` ，如果room 非法，会返回null，并抛出该error
  76 |      */
  77 |     ERR_ROOMID_ILLEGAL = -1007,
  78 |     /**
  79 |      * @locale zh
  80 |      * @brief Token 过期。调用 `byte_rtc_join_room` 使用新的 Token 重新加入房间。
  81 |      */
  82 |     ERR_ROOM_TOKEN_EXPIRED = -1009,
  83 |     /**
  84 |      * @locale zh
  85 |      * @brief 调用 `updateToken` 传入的 Token 无效
  86 |      */
  87 |     ERR_UPDATE_TOKEN_WITH_INVALID_TOKEN = -1010,
  88 |     /**
  89 |      * @locale zh
  90 |      * @brief 服务端调用 OpenAPI 解散房间，所有用户被移出房间。
  91 |      */
  92 |     ERR_ROOM_DISMISS = -1011,
  93 |     /**
  94 |      * @locale zh
  95 |      * @brief 加入房间错误。 <br>
  96 |      *        加入房间时, LICENSE 计费账号未使用 LICENSE_AUTHENTICATE SDK，加入房间错误。
  97 |      */
  98 |     ERR_JOIN_ROOM_WITHOUT_LICENSE_AUTHENTICATE_SDK = -1012,
  99 |     /**
 100 |      * @locale zh
 101 |      * @brief 通话回路检测已经存在同样 room 的房间了
 102 |      */
 103 |     ERR_ROOM_ALREADY_EXIST = -1013,
 104 |     /**
 105 |      * @locale zh
 106 |      * @brief 加入多个房间时使用了不同的 uid。<br>
 107 |      *        同一个引擎实例实例中，用户需使用同一个 uid 加入不同的房间。
 108 |      */
 109 |     ERR_USERID_DIFFERENT = -1014,
 110 |     /**
 111 |      * @locale zh
 112 |      * @brief 服务端license过期，拒绝加入房间。 <br>
 113 |      */
 114 |     ERR_JOIN_ROOM_SERVER_LICENSE_EXPIRED = -1017,
 115 |     /**
 116 |      * @locale zh
 117 |      * @brief 超过服务端license许可的并发量上限，拒绝加入房间。 <br>
 118 |      */
 119 |     ERR_JOIN_ROOM_EXCEEDS_THE_UPPER_LIMIT = -1018,
 120 |     /**
 121 |      * @locale zh
 122 |      * @brief license参数错误，拒绝加入房间。 <br>
 123 |      */
 124 |     ERR_JOIN_ROOM_LICENSE_PARAMETER_ERROR = -1019,
 125 |     /**
 126 |      * @locale zh
 127 |      * @brief license 证书路径错误。 <br>
 128 |      */
 129 |     ERR_JOIN_ROOM_LICENSE_FILE_PATH = -1020,
 130 |     /**
 131 |      * @locale zh
 132 |      * @brief license 证书不合法。 <br>
 133 |      */
 134 |     ERR_JOIN_ROOM_LICENSE_ILLEGAL = -1021,
 135 |     /**
 136 |      * @locale zh
 137 |      * @brief license 证书已经过期，拒绝加入房间。 <br>
 138 |      */
 139 |     ERR_JOIN_ROOM_LICENSE_EXPIRED = -1022,
 140 |     /**
 141 |      * @locale zh
 142 |      * @brief license 证书内容不匹配。 <br>
 143 |      */
 144 |     ERR_JOIN_ROOM_LICENSE_INFORMATION_NOT_MATCH = -1023,
 145 |     /**
 146 |      * @locale zh
 147 |      * @brief license 当前证书与缓存证书不匹配。 <br>
 148 |      */
 149 |     ERR_JOIN_ROOM_LICENSE_NOT_MATCH_WITH_CACHE = -1024,
 150 |     /**
 151 |      * @locale zh
 152 |      * @brief 房间被封禁。 <br>
 153 |      */
 154 |     ERR_JOIN_ROOM_FORBIDDEN = -1025,
 155 |     /**
 156 |      * @locale zh
 157 |      * @brief 用户被封禁。 <br>
 158 |      */
 159 |     ERR_JOIN_ROOM_USER_FORBIDDEN = -1026,
 160 |     /**
 161 |      * @locale zh
 162 |      * @brief 订阅音视频流失败，订阅音视频流总数超过上限。
 163 |      */
 164 |     ERR_OVER_STREAM_SUBSCRIBE_LIMIT = -1070,
 165 |     /**
 166 |      * @locale zh
 167 |      * @brief 发布流失败，发布流总数超过上限。
 168 |      *        RTC 系统会限制单个房间内发布的总流数，总流数包括视频流、音频流和屏幕流。如果房间内发布流数已达上限时，本地用户再向房间中发布流时会失败，同时会收到此错误通知。
 169 |      */
 170 |     ERR_OVER_STREAM_PUBLISH_LIMIT = -1080,
 171 |     /**
 172 |      * @locale zh
 173 |      * @brief 服务端异常状态导致退出房间。  <br>
 174 |      *        SDK与信令服务器断开，并不再自动重连，可联系技术支持。  <br>
 175 |      */
 176 |     ERR_ABNORMAL_SERVER_STATUS = -1084,
 177 | 
 178 | } error_code_e;
 179 | 
 180 | /** Error code. */
 181 | 
 182 | /**
 183 |  * @locale zh
 184 |  * @type keytype
 185 |  * @brief 用户离线原因。
 186 |  */
 187 | typedef enum {
 188 |     /**
 189 |      * @locale zh
 190 |      * @brief 用户主动离线
 191 |      */
 192 |     USER_OFFLINE_QUIT = 0,
 193 |     /**
 194 |      * @locale zh
 195 |      * @brief 用户超时掉线
 196 |      */
 197 |     USER_OFFLINE_DROPPED = 1,
 198 | 
 199 | } user_offline_reason_e;
 200 | 
 201 | /**
 202 |  * @locale zh
 203 |  * @type keytype
 204 |  * @brief 视频数据类型。
 205 |  */
 206 | typedef enum {
 207 |     /**
 208 |      * @locale zh
 209 |      
 210 |      * @brief 未知视频数据类型
 211 |      */
 212 |      VIDEO_DATA_TYPE_UNKNOWN = 0,
 213 | 
 214 |     /**
 215 |      * @locale zh
 216 |      
 217 |      * @brief H264
 218 |      */
 219 |     VIDEO_DATA_TYPE_H264    = 1,
 220 |     /**
 221 |      * @locale zh
 222 |      * @brief BYTEVC1
 223 |      */
 224 |     VIDEO_DATA_TYPE_BYTEVC1 = 2,
 225 | 
 226 | 
 227 | } video_data_type_e;
 228 | 
 229 | /**
 230 |  * @locale zh
 231 |  * @type keytype
 232 |  * @brief 视频帧类型。
 233 |  */
 234 | typedef enum {
 235 |     /**
 236 |      * @locale zh
 237 |      * @brief 未知类型   <br>
 238 |      *        如果设置为 `VIDEO_FRAME_AUTO_DETECT`，SDK 会自行判断视频帧类型。
 239 |      */
 240 |     VIDEO_FRAME_AUTO_DETECT = 0,
 241 |     /**
 242 |      * @locale zh
 243 |      * @brief 关键帧
 244 |      */
 245 |     VIDEO_FRAME_KEY = 1,
 246 |     /**
 247 |      * @locale zh
 248 |      * @brief P 帧
 249 |      */
 250 |     VIDEO_FRAME_DELTA = 2,
 251 | } video_frame_type_e;
 252 | 
 253 | 
 254 | /**
 255 |  * @locale zh
 256 |  * @type keytype
 257 |  * @brief 视频流类型。
 258 |  */
 259 | typedef enum {
 260 |     /**
 261 |      * @locale zh
 262 |      * @brief 主流
 263 |      */
 264 |     VIDEO_STREAM_HIGH = 0,
 265 |     /**
 266 |      * @locale zh
 267 |      * @brief 辅流
 268 |      */
 269 |     VIDEO_STREAM_LOW = 1,
 270 | 
 271 | } video_stream_type_e;
 272 | 
 273 | /**
 274 |  * @locale zh
 275 |  * @type keytype
 276 |  * @brief 视频帧信息。
 277 |  */
 278 | typedef struct {
 279 |     /**
 280 |      * @locale zh
 281 |      * @brief 视频数据类型，参看 video_data_type_e{@link #video_data_type_e}。
 282 |      */
 283 |     video_data_type_e data_type;
 284 |     /**
 285 |      * @locale zh
 286 |      * @brief 视频流类型，参看 video_data_type_e{@link #video_data_type_e}。
 287 |      */
 288 |     video_stream_type_e stream_type;
 289 |     /**
 290 |      * @locale zh
 291 |      * @brief 视频帧类型，参看 video_frame_type_e{@link #video_frame_type_e}。
 292 |      */
 293 |     video_frame_type_e frame_type;
 294 |     /**
 295 |      * @locale zh
 296 |      * @brief 视频帧率
 297 |      */
 298 |     int frame_rate;
 299 |     
 300 | } video_frame_info_t;
 301 | 
 302 | /**
 303 |  * @locale zh
 304 |  * @type keytype
 305 |  * @brief 音频编码类型。
 306 |  */
 307 | typedef enum {
 308 |     /**
 309 |      * @locale zh
 310 |      * @brief OPUS
 311 |      */
 312 |     AUDIO_CODEC_TYPE_OPUS   = 1,
 313 |     /**
 314 |      * @locale zh
 315 |      * @brief G722
 316 |      */
 317 |     AUDIO_CODEC_TYPE_G722   = 2,
 318 |     /**
 319 |     * @locale zh
 320 |     * @brief AACLC
 321 |     */
 322 |     AUDIO_CODEC_TYPE_AACLC  = 3,
 323 | 
 324 |     /**
 325 |      * @locale zh
 326 |      * @brief G711A
 327 |      */
 328 |     AUDIO_CODEC_TYPE_G711A  = 4,
 329 |     
 330 | } audio_codec_type_e;
 331 | 
 332 | 
 333 | /**
 334 |  * @locale zh
 335 |  * @type keytype
 336 |  * @brief 视频编码类型。
 337 |  */
 338 | /**
 339 |  * @locale en
 340 |  * @type keytype
 341 |  * @brief video codec type list.
 342 |  */
 343 | typedef enum {
 344 |     /**
 345 |      * @locale zh
 346 |      * @brief 编码类型H264
 347 |      */
 348 |     /**
 349 |      * @locale en
 350 |      * @brief codec type H264
 351 |      */
 352 |     VIDEO_CODEC_TYPE_H264 = 0,
 353 |     
 354 |     /**
 355 |      * @locale zh
 356 |      * @brief 编码类型ByteVC1
 357 |      */
 358 |     /**
 359 |      * @locale en
 360 |      * @brief codec type ByteVC1
 361 |      */
 362 |     VIDEO_CODEC_TYPE_BYTEVC1 = 1,
 363 |     
 364 | } video_codec_type_e;
 365 | 
 366 | /**
 367 |  * @locale zh
 368 |  * @type keytype
 369 |  * @brief 音频数据类型。
 370 |  */
 371 | typedef enum {
 372 |     /**
 373 |      * @locale zh
 374 |      * @brief OPUS
 375 |      */
 376 |     AUDIO_DATA_TYPE_UNKNOWN = 0,
 377 |     /**
 378 |      * @locale zh
 379 |      * @brief OPUS
 380 |      */
 381 |     AUDIO_DATA_TYPE_OPUS    = 1,
 382 |     /**
 383 |      * @locale zh
 384 |      * @brief G722
 385 |      */
 386 |     AUDIO_DATA_TYPE_G722    = 2,
 387 |     /**
 388 |      * @locale zh
 389 |      * @brief AACLC
 390 |      */
 391 |     AUDIO_DATA_TYPE_AACLC   = 3,
 392 |    
 393 |     /**
 394 |      * @locale zh
 395 |      * @brief PCMA
 396 |      */
 397 |     AUDIO_DATA_TYPE_PCMA    = 4,
 398 | 
 399 |     /**
 400 |      * @locale zh
 401 |      * @brief PCM
 402 |      */
 403 |     AUDIO_DATA_TYPE_PCM = 5,
 404 | } audio_data_type_e;
 405 | 
 406 | /**
 407 |  * @locale zh
 408 |  * @type keytype
 409 |  * @brief 音频帧信息。
 410 |  */
 411 | typedef struct {
 412 |     /**
 413 |      * @locale zh
 414 |      * @brief 音频数据类型，参看 audio_data_type_e{@link #audio_data_type_e}。
 415 |      */
 416 |     audio_data_type_e data_type;
 417 | 
 418 | } audio_frame_info_t;
 419 | 
 420 | /**
 421 |  * @locale zh
 422 |  * @type keytype
 423 |  * @brief SDK 日志等级。
 424 |  */
 425 | typedef enum {
 426 |     /**
 427 |     * @locale zh
 428 |     * @brief 信息级别。
 429 |     */
 430 |     BYTE_RTC_LOG_LEVEL_INFO    = 1,
 431 |     /**
 432 |     * @locale zh
 433 |     * @brief 警告级别。
 434 |     */
 435 |     BYTE_RTC_LOG_LEVEL_WARN    = 2,
 436 |     /**
 437 |     * @locale zh
 438 |     * @brief 错误级别。
 439 |     */
 440 |     BYTE_RTC_LOG_LEVEL_ERROR   = 3,
 441 |     /**
 442 |     * @locale zh
 443 |     * @brief 严重错误级别。
 444 |     */
 445 |     BYTE_RTC_LOG_LEVEL_FATAL   = 4
 446 | 
 447 | } byte_rtc_log_level_e;
 448 | 
 449 | /**
 450 |  * @locale zh
 451 |  * @type keytype
 452 |  * @brief 房间音视频自动订阅选项。
 453 |  */
 454 | typedef struct {
 455 |     /**
 456 |      * @locale zh
 457 |      * @brief 是否自动订阅远端用户的音频流。<br>
 458 |      *        - true： 是
 459 |      *        - false：否   
 460 |      */
 461 |     bool auto_subscribe_audio;
 462 |     /**
 463 |      * @locale zh
 464 |      * @brief 是否自动订阅远端用户的视频流。<br>
 465 |      *        - true： 是
 466 |      *        - false：否        
 467 |      */
 468 |     bool auto_subscribe_video;
 469 | 
 470 |     /**
 471 |      * @locale zh
 472 |      * @brief 是否自动发布本端用户的音频流。<br>
 473 |      *        - true： 是
 474 |      *        - false：否   
 475 |      */
 476 | 
 477 |     bool auto_publish_audio;
 478 | 
 479 |     /**
 480 |      * @locale zh
 481 |      * @brief 是否自动发布本端用户的视频流。<br>
 482 |      *        - true： 是
 483 |      *        - false：否   
 484 |      */
 485 |     bool auto_publish_video;
 486 | 
 487 | } byte_rtc_room_options_t;
 488 | 
 489 | /**
 490 |  * @locale zh
 491 |  * @type keytype
 492 |  * @brief 网络事件类型。
 493 |  * @hidden
 494 |  */
 495 | typedef enum {
 496 |     NETWORK_EVENT_DOWN = 0,
 497 |     NETWORK_EVENT_UP,
 498 |     NETWORK_EVENT_CHANGE,
 499 | } network_event_type_e;
 500 | 
 501 | /**
 502 |  * @locale zh
 503 |  * @type keytype
 504 |  * @brief 实时信令消息类型。
 505 |  */
 506 | typedef enum {
 507 |     /**
 508 |      * @locale zh
 509 |      * @brief 可靠消息
 510 |      */
 511 |     RTS_MESSAGE_RELIABLE = 0,
 512 |     
 513 | } rts_message_type;
 514 | 
 515 | typedef void * byte_rtc_engine_t;
 516 | 
 517 | 
 518 | /**
 519 |  * @locale zh
 520 |  * @type callback
 521 |  * @brief SDK 事件回调结构体
 522 |  * @note 回调函数是在 SDK 内部线程同步抛出来的，请不要做耗时操作，否则可能导致 SDK 运行异常。
 523 |  */
 524 | typedef struct {
 525 | 
 526 | /**
 527 |  * @locale zh
 528 |  * @type callback
 529 |  * @list 回调
 530 |  * @order 0
 531 |  * @brief SDK 错误信息回调 <br>
 532 |  * @param engine 通过byte_rtc_create{@link #byte_rtc_create}创建的引擎实例
 533 |  * @param code 错误码，参看 error_code_e{@link #error_code_e}
 534 |  * @param msg 错误信息
 535 |  */
 536 | void (*on_global_error)(byte_rtc_engine_t engine,int code, const char * msg);
 537 | 
 538 | /**
 539 |  * @locale zh
 540 |  * @type callback
 541 |  * @list 回调
 542 |  * @order 2
 543 |  * @brief 加入房间成功回调。
 544 |  * @param engine 通过byte_rtc_create{@link #byte_rtc_create}创建的引擎实例
 545 |  * @param room 房间名
 546 |  * @param elapsed_ms 从开始加入房间到加入房间成功的耗时，单位：毫秒
 547 |  * @param rejoin 当网络断开时，重连后自动触发重新加入房间
 548 |  *               - True：重新加入房间
 549 |  *               - False：首次加入房间
 550 |  */
 551 | void (*on_join_room_success)(byte_rtc_engine_t engine,const char * room, int elapsed_ms, bool rejoin);
 552 | 
 553 | 
 554 | /**
 555 |  * @locale zh
 556 |  * @type callback
 557 |  * @list 回调
 558 |  * @order 1
 559 |  * @brief 加入房间失败或异常退出房间回调
 560 |  * @param engine 通过byte_rtc_create{@link #byte_rtc_create}创建的引擎实例
 561 |  * @param room 房间名
 562 |  * @param code 错误码，参看 error_code_e{@link #error_code_e}
 563 |  * @param msg 错误信息
 564 |  */
 565 | void (*on_room_error)(byte_rtc_engine_t engine,const char * room, int code, const char * msg);
 566 | 
 567 | 
 568 | /**
 569 |  * @locale zh
 570 |  * @type callback
 571 |  * @list 回调
 572 |  * @order 3
 573 |  * @brief 远端用户加入房间回调 <br>
 574 |           远端用户断网后重新连入房间时，房间内其他用户将收到该事件  <br>
 575 |           新加入房间用户也会收到加入房间前已在房间内的用户的入房间回调通知
 576 |  * @param engine 通过byte_rtc_create{@link #byte_rtc_create}创建的引擎实例
 577 |  * @param room 房间名
 578 |  * @param uid 远端用户名
 579 |  * @param elapsed_ms 加入房间耗时（保留字段）
 580 |  */
 581 | void (*on_user_joined)(byte_rtc_engine_t engine,const char * room, const char * uid,int elapsed_ms);
 582 | 
 583 | /**
 584 |  * @locale zh
 585 |  * @type callback
 586 |  * @list 回调
 587 |  * @order 4
 588 |  * @brief 远端用户离开房间<br>
 589 |  *        房间内其他用户会收到此事件
 590 |  * @param engine 通过byte_rtc_create{@link #byte_rtc_create}创建的引擎实例
 591 |  * @param room 房间名
 592 |  * @param uid 远端用户名
 593 |  * @param reason 用户离开房间的原因, 参看 user_offline_reason_e{@link #user_offline_reason_e}
 594 |  */
 595 | void (*on_user_offline)(byte_rtc_engine_t engine,const char * room, const char * uid , int reason);
 596 | 
 597 | /**
 598 |  * @locale zh
 599 |  * @type callback
 600 |  * @list 回调
 601 |  * @order 8
 602 |  * @brief 房间内用户暂停/恢复发送音频流时，房间内其他用户会收到此回调
 603 |  * @param engine 通过byte_rtc_create{@link #byte_rtc_create}创建的引擎实例
 604 |  * @param room 房间名
 605 |  * @param uid 远端用户名
 606 |  * @param muted 发送状态 <br>
 607 | *         - true(1)：不发送 <br>
 608 |  *        - false(0)：发送
 609 |  */
 610 | void (*on_user_mute_audio)(byte_rtc_engine_t engine,const char * room, const char * uid ,int muted);
 611 | 
 612 | /**
 613 |  * @locale zh
 614 |  * @type callback
 615 |  * @list 回调
 616 |  * @order 7
 617 |  * @brief 房间内用户暂停/恢复发送视频流时，房间内其他用户会收到此回调
 618 |  * @param engine 通过byte_rtc_create{@link #byte_rtc_create}创建的引擎实例
 619 |  * @param room 房间名
 620 |  * @param uid 远端用户名
 621 |  * @param muted 发送状态 <br>
 622 | *         - true(1)：不发送 <br>
 623 |  *        - false(0)：发送
 624 |  */
 625 | void (*on_user_mute_video)(byte_rtc_engine_t engine,const char * room, const char * uid ,int muted);
 626 | 
 627 | /**
 628 |  * @locale zh
 629 |  * @type callback 
 630 |  * @list 回调
 631 |  * @order 9
 632 |  * @brief 提示流发布端需重新生成关键帧的回调
 633 |  * @param engine 通过byte_rtc_create{@link #byte_rtc_create}创建的引擎实例
 634 |  * @param room 房间名
 635 |  * @param uid 远端用户名
 636 |  */
 637 | void (*on_key_frame_gen_req)(byte_rtc_engine_t engine,const char * room, const char * uid);
 638 | 
 639 | /**
 640 |  * @locale zh
 641 |  * @type callback
 642 |  * @list 回调
 643 |  * @order 6
 644 |  * @brief 返回远端单个用户的音频数据
 645 |  * @param engine 通过byte_rtc_create{@link #byte_rtc_create}创建的引擎实例
 646 |  * @param room 房间名
 647 |  * @param uid 远端用户名
 648 |  * @param sent_ts 发送时间 （暂不支持）
 649 |  * @param data 音频数据类型，参看 audio_data_type_e{@link #audio_data_type_e}
 650 |  * @param data_ptr 音频数据
 651 |  * @param data_len 音频数据长度，单位字节
 652 |  */
 653 | void (*on_audio_data)(byte_rtc_engine_t engine,const char * room, const char * uid ,uint16_t sent_ts,
 654 |     audio_data_type_e codec, const void * data_ptr, size_t data_len);
 655 | 
 656 | /**
 657 |  * @locale zh
 658 |  * @type callback
 659 |  * @list 回调
 660 |  * @order 5
 661 |  * @brief 返回远端单个用户的视频数据
 662 |  * @param engine 通过byte_rtc_create{@link #byte_rtc_create}创建的引擎实例
 663 |  * @param room 房间名
 664 |  * @param uid  远端用户名
 665 |  * @param sent_ts 发送时间（暂不支持）
 666 |  * @param codec 视频编码类型，参看 video_data_type_e{@link #video_data_type_e}
 667 |  * @param is_key_frame 帧类型
 668 |  *                     - 0: 非关键帧
 669 |  *                     - !0: 关键帧
 670 |  * @param data_ptr 视频数据
 671 |  * @param data_len 视频数据长度，单位字节
 672 |  */
 673 | void (*on_video_data)(byte_rtc_engine_t engine,const char * room, const char * uid ,uint16_t sent_ts,
 674 |                       video_data_type_e codec, int is_key_frame,
 675 |                       const void * data_ptr, size_t data_len);
 676 | 
 677 | /**
 678 |  * @locale zh
 679 |  * @type callback
 680 |  * @list 回调
 681 |  * @order 10
 682 |  * @brief 当带宽估计码率发生变化时，触发该回调。<br>
 683 |  *        此时你需要将编码器码率调至目标码率
 684 |  * @param engine 通过byte_rtc_create{@link #byte_rtc_create}创建的引擎实例
 685 |  * @param room 房间名
 686 |  * @param target_bps 目标码率，单位 bps
 687 |  */
 688 | void (*on_target_bitrate_changed)(byte_rtc_engine_t engine,const char * room, uint32_t target_bps);
 689 | 
 690 | 
 691 | /**
 692 |  * @locale zh
 693 |  * @type callback
 694 |  * @list 回调
 695 |  * @order 12
 696 |  * @brief 返回远端用户发送的实时信令消息
 697 |  * @param engine 通过byte_rtc_create{@link #byte_rtc_create}创建的引擎实例
 698 |  * @param room 房间名
 699 |  * @param src  远端用户名
 700 |  * @param message 实时信令消息
 701 |  * @param size 实时信令消息长度
 702 |  * @param binary 是否未二进制消息
 703 |  */
 704 | 
 705 | void (*on_message_received)(byte_rtc_engine_t engine,const char * room,const char * src, const uint8_t * message,int size,bool binary);
 706 | 
 707 | 
 708 | /**
 709 |  * @locale zh
 710 |  * @type callback
 711 |  * @list 回调
 712 |  * @order 11
 713 |  * @brief 实时信令消息发送结果通知
 714 |  * @param engine 通过byte_rtc_create{@link #byte_rtc_create}创建的引擎实例
 715 |  * @param room 房间名
 716 |  * @param msgid  发送消息的id，用来和发送的消息匹配
 717 |  * @param error 发送消息错误码，0表示发送成功
 718 |  * @param extencontent 扩展信息，暂时未使用
 719 |  */
 720 | void (*on_message_send_result)(byte_rtc_engine_t engine,const char * room,int64_t msgid, int error,const char * extencontent);
 721 | 
 722 | /**
 723 |  * @locale zh
 724 |  * @type callback
 725 |  * @list 回调
 726 |  * @order 13
 727 |  * @brief Token 加入房间权限过期前 30 秒，触发该回调。<br>
 728 |  *        收到该回调后，你需调用 `byte_rtc_renew_token` 更新 Token 加入房间权限
 729 |  * @param engine 通过byte_rtc_create{@link #byte_rtc_create}创建的引擎实例
 730 |  * @param room 房间名
 731 |  */
 732 | void (*on_token_privilege_will_expire)(byte_rtc_engine_t engine,const char * room);
 733 | 
 734 | /**
 735 |  * @locale zh
 736 |  * @type callback
 737 |  * @list 回调
 738 |  * @order 14
 739 |  * @brief license 过期提醒。在剩余天数低于 30 天时，收到此回调
 740 |  * @param engine 通过byte_rtc_create{@link #byte_rtc_create}创建的引擎实例
 741 |  * @param daysleft license 剩余有效天数
 742 |  */
 743 | 
 744 | void (*on_license_expire_warning)(byte_rtc_engine_t engine,int daysleft);
 745 | 
 746 | /**
 747 |  * @locale zh
 748 |  * @type callback
 749 |  * @list 回调
 750 |  * @order 15
 751 |  * @brief engine 实例清理(byte_rtc_fini)结束通知，只有收到该通知之后，重新创建实例(byte_rtc_init)才是安全的
 752 |  * @param engine 通过byte_rtc_create{@link #byte_rtc_create}创建的引擎实例
 753 |  */
 754 | 
 755 | void (*on_fini_notify)(byte_rtc_engine_t engine);
 756 | 
 757 | } byte_rtc_event_handler_t;
 758 | 
 759 | 
 760 | /**
 761 |  * @locale zh
 762 |  * @type api
 763 |  * @list 方法
 764 |  * @order 1
 765 |  * @brief 获取 SDK 版本号
 766 |  * @return SDK 版本号
 767 |  */
 768 | extern __byte_rtc_api__ const char * byte_rtc_get_version(void);
 769 | 
 770 | /**
 771 |  * @locale zh
 772 |  * @type api
 773 |  * @hidden
 774 |  * @brief 错误码转对应字符串
 775 |  * @note 不必释放此字符串
 776 |  * @param err 错误码
 777 |  * @return 错误信息
 778 |  */
 779 | extern __byte_rtc_api__ const char * byte_rtc_err_2_str(int err);
 780 | 
 781 | /**
 782 |  * @locale zh
 783 |  * @type api
 784 |  * @list 方法
 785 |  * @order 7
 786 |  * @brief 设置 SDK 日志等级
 787 |  * @param engine 通过byte_rtc_create{@link #byte_rtc_create}创建的引擎实例
 788 |  * @param level 日志等级，参看 byte_rtc_log_level_e{@link #byte_rtc_log_level_e}
 789 |  */
 790 | extern __byte_rtc_api__ void byte_rtc_set_log_level(byte_rtc_engine_t engine,int level);
 791 | 
 792 | /**
 793 |  * @locale zh
 794 |  * @type api
 795 |  * @list 方法
 796 |  * @order 5
 797 |  * @brief 设置 SDK 日志文件路径、大小和数目
 798 |  * @param engine 通过byte_rtc_create{@link #byte_rtc_create}创建的引擎实例
 799 |  * @param log_path 日志文件存放路径
 800 |  * @param size_per_file 单个日志文件大小，单位字节
 801 |  * @param max_file_count 日志文件最大个数
 802 |  * @return 方法调用结果：<br>
 803 |  *          - 0：成功
 804 |  *          - -1：失败，路径参数无效
 805 |  */
 806 | extern __byte_rtc_api__ int byte_rtc_config_log(byte_rtc_engine_t engine,const char * log_path,int size_per_file, int max_file_count);
 807 | 
 808 | /**
 809 |  * @locale zh
 810 |  * @type api
 811 |  * @list 方法
 812 |  * @order 3
 813 |  * @brief 创建引擎实例,该方法是整个SDK调用的第一个方法
 814 |  * @param app_id 应用 ID
 815 |  * @param event_handler 回调方法，参看 byte_rtc_event_handler_t{@link #byte_rtc_event_handler_t}
 816 |  * @return 引擎实例
 817 |  */
 818 | extern __byte_rtc_api__ byte_rtc_engine_t byte_rtc_create(const char * app_id, const byte_rtc_event_handler_t * event_handler);
 819 | 
 820 | /**
 821 |  * @locale zh
 822 |  * @type api
 823 |  * @list 方法
 824 |  * @order 13
 825 |  * @brief 初始化引擎实例
 826 |  * @note 仅能被初始化一次
 827 |  * @param engine 通过byte_rtc_create{@link #byte_rtc_create}创建的引擎实例
 828 |  * @return 方法调用结果：<br>
 829 |  *         - 0：成功 <br>
 830 |  *         - -1: appid 或 event_handler 为空 <br>
 831 |  *         - -2：引擎实例已被初始化 <br>
 832 |  *         - -3：引擎实例创建失败，请检查是否有可用内存
 833 |  */
 834 | extern __byte_rtc_api__ int byte_rtc_init(byte_rtc_engine_t engine);
 835 | 
 836 | /**
 837 |  * @locale zh
 838 |  * @type api
 839 |  * @list 方法
 840 |  * @order 35
 841 |  * @brief 销毁引擎实例,VolcEngineRTCLite内部完成销毁操作之后，通过回调（on_fini_notify）通知上层 参看 byte_rtc_event_handler_t{@link #byte_rtc_event_handler_t}
 842 |  * @param engine 通过byte_rtc_create{@link #byte_rtc_create}创建的引擎实例
 843 |  * @return 方法调用结果：
 844 |  *         - 0：成功 <br>
 845 |  *         - -1: 引擎实例不存在
 846 |  */
 847 | extern __byte_rtc_api__ int byte_rtc_fini(byte_rtc_engine_t engine);
 848 | 
 849 | /**
 850 |  * @locale zh
 851 |  * @type api
 852 |  * @list 方法
 853 |  * @order 37
 854 |  * @brief 销毁引擎实例,只有在收到on_fini_notify的回调之后，调用此方法才是安全的
 855 |  * @param engine 通过byte_rtc_create{@link #byte_rtc_create}创建的引擎实例
 856 |  */
 857 | extern __byte_rtc_api__ void byte_rtc_destory(byte_rtc_engine_t engine);
 858 | 
 859 | 
 860 | /**
 861 |  * @locale zh
 862 |  * @type api
 863 |  * @list 方法
 864 |  * @order 9
 865 |  * @brief 将自定义的数据与引擎实例关联起来
 866 |  * @param engine 通过byte_rtc_create{@link #byte_rtc_create}创建的引擎实例
 867 |  * @param user_data 设置用户自定义数据
 868 |  */
 869 | extern __byte_rtc_api__ void byte_rtc_set_user_data(byte_rtc_engine_t engine,void * user_data);
 870 | 
 871 | 
 872 | /**
 873 |  * @locale zh
 874 |  * @type api
 875 |  * @list 方法
 876 |  * @order 11
 877 |  * @brief 获取与引擎实例相关联的自定义数据
 878 |  * @param engine 通过byte_rtc_create{@link #byte_rtc_create}创建的引擎实例
 879 |  */
 880 | extern __byte_rtc_api__ void * byte_rtc_get_user_data(byte_rtc_engine_t engine);
 881 | 
 882 | /**
 883 |  * @locale zh
 884 |  * @type api
 885 |  * @list 方法
 886 |  * @order 15
 887 |  * @brief 设置音频编码格式
 888 |  * @param engine 通过byte_rtc_create{@link #byte_rtc_create}创建的引擎实例
 889 |  * @param audio_codec_type 音频编码类型，参看 audio_codec_type_e{@link #audio_codec_type_e}
 890 |  * @return 方法调用结果：<br>
 891 |  *         - 0：成功 <br>
 892 |  *         - -1: 引擎实例不存在 <br>
 893 |  *         - -2：编码格式暂不被支持
 894 |  */
 895 | extern __byte_rtc_api__ int byte_rtc_set_audio_codec(byte_rtc_engine_t engine,audio_codec_type_e audio_codec_type);
 896 | 
 897 | /**
 898 |  * @locale zh
 899 |  * @type api
 900 |  * @list 方法
 901 |  * @order 17
 902 |  * @brief 设置视频编码格式，暂仅支持 `VIDEO_CODEC_TYPE_H264`{@link #VIDEO_CODEC_TYPE_H264} 和 `VIDEO_CODEC_TYPE_BYTEVC1`{@link #VIDEO_CODEC_TYPE_BYTEVC1}
 903 |  * @param engine 通过byte_rtc_create{@link #byte_rtc_create}创建的引擎实例 
 904 |  * @param video_codec_type 视频编码类型，参看 video_codec_type{@link #video_codec_type}
 905 |  * @return 方法调用结果：<br>
 906 |  *         - 0：成功。 <br>
 907 |  *         - -1: 引擎不存在。 <br>
 908 |  *         - -2：编码格式暂不被支持。
 909 |  */
 910 | extern __byte_rtc_api__ int byte_rtc_set_video_codec(byte_rtc_engine_t engine,video_codec_type_e video_codec_type);
 911 | 
 912 | /**
 913 |  * @locale zh
 914 |  * @type api
 915 |  * @list 方法
 916 |  * @order 19
 917 |  * @brief 加入房间
 918 |  * @param engine 通过byte_rtc_create{@link #byte_rtc_create}创建的引擎实例
 919 |  * @param room 房间名
 920 |  * @param uid 用户名
 921 |  * @param token 动态密钥，用于对加入房间用户进行鉴权验证  <br>
 922 |  * @param options 房间音视频自动订阅设置，参看 byte_rtc_room_options_t{@link #byte_rtc_room_options_t} <br>
 923 |  *                此版本无效，默认使用自动订阅
 924 |  * @return 方法调用结果：<br>
 925 |  *         - 0：成功 <br>
 926 |  *         - -1：引擎实例不存在 <br>
 927 |  *         - -2：输入参数为空 <br>
 928 |  *         - -3：已加入过房间
 929 |  */
 930 | extern __byte_rtc_api__ int byte_rtc_join_room(byte_rtc_engine_t engine,const char * room, const char * uid,
 931 |                                                 const char * token, byte_rtc_room_options_t * options);
 932 | 
 933 | 
 934 | /**
 935 |  * @locale zh
 936 |  * @type api
 937 |  * @order 33
 938 |  * @list 方法
 939 |  * @brief  退出房间
 940 |  * @param engine 通过byte_rtc_create{@link #byte_rtc_create}创建的引擎实例
 941 |  * @param room 房间名
 942 |  * @return 方法调用结果：<br>
 943 |  *         - 0：成功 <br>
 944 |  *         - -1：引擎实例不存在 <br>
 945 |  *         - -2：输入参数为空
 946 |  */
 947 | extern __byte_rtc_api__ int byte_rtc_leave_room(byte_rtc_engine_t engine,const char * room);
 948 | 
 949 | /**
 950 |  * @locale zh
 951 |  * @type api
 952 |  * @list 方法
 953 |  * @order 31
 954 |  * @brief 更新 Token <br>
 955 |  *        收到 on_token_privilege_will_expire{@link #byte_rtc_event_handler_t#on_token_privilege_will_expire} 时，必须重新获取 Token，调用此方法更新 Token，以保证通话的正常进行
 956 |  * @param engine 通过byte_rtc_create{@link #byte_rtc_create}创建的引擎实例
 957 |  * @param room 房间名
 958 |  * @param token 动态密钥。用于对加入房间用户进行鉴权验证  <br>
 959 |  * @return 方法调用结果：<br>
 960 |  *         - 0：成功 <br>
 961 |  *         - -1：引擎实例不存在 <br>
 962 |  *         - -2：输入参数为空
 963 |  */
 964 | extern __byte_rtc_api__ int byte_rtc_renew_token(byte_rtc_engine_t engine,const char * room,const char * token);
 965 | 
 966 | /**
 967 |  * @locale zh
 968 |  * @type api
 969 |  * @list 方法
 970 |  * @order 27
 971 |  * @brief 控制媒体流(本端 &远端)(音频 & 视频)流状态
 972 |  * @param engine 通过byte_rtc_create{@link #byte_rtc_create}创建的引擎实例
 973 |  * @param room 房间名
 974 |  * @param uid 用户Id，非空控制的是远端的用户，空值控制的是本端用户
 975 |  * @param video 媒体类型，true:视频流，false:音频流
 976 |  * @param mute 接收状态
 977 |  * @return 方法调用结果：<br>
 978 |  *         - 0：成功 <br>
 979 |  *         - -1：引擎实例不存在 <br>
 980 |  *         - -2：输入参数为空
 981 |  */
 982 | extern __byte_rtc_api__ int byte_rtc_mute(byte_rtc_engine_t engine,const char * room, const char * uid, bool video, bool mute);
 983 | 
 984 | /**
 985 |  * @locale zh
 986 |  * @type api
 987 |  * @list 方法
 988 |  * @order 29
 989 |  * @brief 请求远端用户重编关键帧
 990 |  * @param engine 通过byte_rtc_create{@link #byte_rtc_create}创建的引擎实例
 991 |  * @param room 房间名
 992 |  * @param remote_uid 远端用户名
 993 |  * @return 方法调用结果：<br>
 994 |  *         - 0：成功 <br>
 995 |  *         - -1：引擎实例不存在 <br>
 996 |  *         - -2：输入参数为空
 997 |  */
 998 | extern __byte_rtc_api__ int byte_rtc_request_video_key_frame(byte_rtc_engine_t engine,const char * room, const char * remote_uid);
 999 | 
1000 | /**
1001 |  * @locale zh
1002 |  * @type api
1003 |  * @list 方法
1004 |  * @order 21
1005 |  * @brief 发送音频帧
1006 |  * @param engine 通过byte_rtc_create{@link #byte_rtc_create}创建的引擎实例
1007 |  * @param room 房间名
1008 |  * @param data_ptr 音频数据
1009 |  * @param data_len 数据长度，单位字节
1010 |  * @param info_ptr 音频帧信息，参看 audio_frame_info_t{@link #audio_frame_info_t}
1011 |  * @return 方法调用结果：<br>
1012 |  *         - 0：成功 <br>
1013 |  *         - -1：引擎实例不存在 <br>
1014 |  *         - -2：输入参数为空
1015 |  */
1016 | extern __byte_rtc_api__ int byte_rtc_send_audio_data(byte_rtc_engine_t engine,const char * room,const void * data_ptr, size_t data_len,
1017 |                                                    audio_frame_info_t * info_ptr);
1018 | 
1019 | /**
1020 |  * @locale zh
1021 |  * @type api
1022 |  * @list 方法
1023 |  * @order 23
1024 |  * @brief 发送视频帧
1025 |  * @param engine 通过byte_rtc_create{@link #byte_rtc_create}创建的引擎实例
1026 |  * @param room 房间名
1027 |  * @param data_ptr 视频数据
1028 |  * @param data_len 数据长度
1029 |  * @param info_ptr 视频帧信息，参看 video_frame_info_t{@link #video_frame_info_t}
1030 |  * @note - 仅支持 `VIDEO_DATA_TYPE_H264` <br>
1031 |  *       - 每个用户仅支持一路流，仅使用 `VIDEO_STREAM_HIGH`
1032 |  * @return 方法调用结果：<br>
1033 |  *         - 0：成功 <br>
1034 |  *         - -1：引擎实例不存在 <br>
1035 |  *         - -2：输入参数为空
1036 |  */
1037 | extern __byte_rtc_api__ int byte_rtc_send_video_data(byte_rtc_engine_t engine,const char * room,const void *data_ptr, size_t data_len,
1038 |                                                    video_frame_info_t * info_ptr);
1039 | 
1040 | 
1041 | /**
1042 |  * @locale zh
1043 |  * @type api
1044 |  * @list 方法
1045 |  * @order 25
1046 |  * @brief 发送实时信令消息
1047 |  * @param engine 通过byte_rtc_create{@link #byte_rtc_create}创建的引擎实例
1048 |  * @param room 房间名
1049 |  * @param target 接收消息的目标用户，如果target传NULL表示发送的是房间内广播消息
1050 |  * @param data_ptr 实时信令消息数据
1051 |  * @param data_len 实时信令消息长度
1052 |  * @param binary 指定消息是否是二进制消息
1053 |  * @param type 用于指定实时信令消息类型，目前仅支持RTS_MESSAGE_RELIABLE
1054 | 
1055 |  * @return 方法调用结果：<br>
1056 |  *         - < 0：失败 <br>
1057 |  *         - >= 0：消息id<br>
1058 |  */
1059 | 
1060 | extern __byte_rtc_api__ int64_t byte_rtc_rts_send_message(byte_rtc_engine_t engine,const char * room,const char * target,const void * data_ptr, 
1061 |                                                     size_t data_len,bool binary,rts_message_type type);
1062 | 
1063 | /**
1064 |  * @locale zh
1065 |  * @type api
1066 |  * @hidden
1067 |  * @brief 设置 SDK 参数
1068 |  * @param engine 通过byte_rtc_create{@link #byte_rtc_create}创建的引擎实例
1069 |  * @param params json 格式参数
1070 |  * @return 方法调用结果：<br>
1071 |  *         - 0：成功 <br>
1072 |  *         - <0：失败
1073 |  */
1074 | extern __byte_rtc_api__ int byte_rtc_set_params(byte_rtc_engine_t engine,const char * params);
1075 | 
1076 | #ifdef __cplusplus
1077 | }
1078 | #endif
1079 | #endif /* __BYTE_RTC_API_H__ */
1080 | 


--------------------------------------------------------------------------------
/client/espressif/esp32s3_demo/components/VolcEngineRTCLite/libs/esp32s3/LICENSE:
--------------------------------------------------------------------------------
 1 | Copyright 2025 Beijing Volcano Engine Technology Co., Ltd. All Rights Reserved.
 2 | 
 3 | The VolcEngineRTCLite was developed by Beijing Volcano Engine Technology Co., Ltd. (hereinafter “Volcano Engine”). Any rights, including but not limited to any copyrights or patent rights, are owned by and proprietary material of the Volcano Engine. 
 4 | 
 5 | VolcEngineRTCLite is licensed solely for integration within Volcengine Real Time Communication which shall be used under a valid commercial license.  Customers can contact conversational_ai@bytedance.com for such commercial license.  Here is also a link to Volcengine Real Time Communication: https://www.volcengine.com/docs/6348/1310537.
 6 | 
 7 | Without Volcano Engine's prior written permission, any use of VolcEngineRTCLite beyond its incorporation into Volcengine Real Time Communication, in particular any use for commercial purposes, is strictly prohibited. This includes, without limitation, incorporation in a commercial product, use in a commercial service, or production of other artefacts for commercial purposes. 
 8 | 
 9 | Without Volcano Engine's prior written permission, you shall not reproduce, modify, decompile, disassemble, reverse-engineer, or create derivative works of VolcEngineRTCLite, nor access, extract, or obtain its source code by any means, or make it available in any form to any third party in any form. Any unauthorized modifications, derivative works, or adaptations of VolcEngineRTCLite, along with all associated intellectual property rights, shall automatically and exclusively vest in Volcano Engine.
10 | 
11 | Except otherwise provided by Volcano Engine, the VolcEngineRTCLite is provided “AS IS” without any warranties. Volcano Engine disclaims all liability for damages arising from its use. 


--------------------------------------------------------------------------------
/client/espressif/esp32s3_demo/components/VolcEngineRTCLite/libs/esp32s3/PLACEHOLDER:
--------------------------------------------------------------------------------
1 | 放置libVolcEngineRTCLite.a
2 | 


--------------------------------------------------------------------------------
/client/espressif/esp32s3_demo/components/VolcEngineRTCLite/libs/esp32s3/libVolcEngineRTCLite.a:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/volcengine/rtc-aigc-embedded-demo/9076c6c76592bef14f33b019f9441875ffc5b69c/client/espressif/esp32s3_demo/components/VolcEngineRTCLite/libs/esp32s3/libVolcEngineRTCLite.a


--------------------------------------------------------------------------------
/client/espressif/esp32s3_demo/main/AudioPipeline.c:
--------------------------------------------------------------------------------
  1 | // Copyright (2025) Beijing Volcano Engine Technology Ltd.
  2 | // SPDX-License-Identifier: MIT
  3 | 
  4 | #include "AudioPipeline.h"
  5 | #include <string.h>
  6 | #include "esp_log.h"
  7 | #include "sdkconfig.h"
  8 | #include "audio_element.h"
  9 | #include "audio_pipeline.h"
 10 | #include "audio_event_iface.h"
 11 | #include "audio_common.h"
 12 | #include "audio_sys.h"
 13 | #include "board.h"
 14 | #include "algorithm_stream.h"
 15 | #include "filter_resample.h"
 16 | #include "i2s_stream.h"
 17 | #include "pthread.h"
 18 | #ifdef CONFIG_ESP32_S3_KORVO2_V3_BOARD
 19 | #include "es7210.h"
 20 | #elif CONFIG_M5STACK_ATOMS3R_BOARD
 21 | #include "es8311.h"
 22 | #endif
 23 | 
 24 | #include "esp_timer.h"
 25 | 
 26 | 
 27 | #if defined (RTC_DEMO_AUDIO_PIPELINE_CODEC_OPUS)
 28 | #include "raw_opus_encoder.h"
 29 | #include "raw_opus_decoder.h"
 30 | #elif defined (RTC_DEMO_AUDIO_PIPELINE_CODEC_AAC)
 31 | #include "aac_encoder.h"
 32 | #include "aac_decoder.h"
 33 | #elif defined (RTC_DEMO_AUDIO_PIPELINE_CODEC_G711A)
 34 | #include "g711_encoder.h"
 35 | #include "g711_decoder.h"
 36 | #endif
 37 | #include "audio_idf_version.h"
 38 | #include "raw_stream.h"
 39 | 
 40 | 
 41 | #define CHANNEL                     1
 42 | static const char *TAG = "AUDIO_PIPELINE";
 43 | #define I2S_SAMPLE_RATE             16000
 44 | #define ALGO_SAMPLE_RATE            16000
 45 | #ifdef CONFIG_ESP32_S3_KORVO2_V3_BOARD
 46 | #define ALGORITHM_STREAM_SAMPLE_BIT 32
 47 | #define CHANNEL_FORMAT              I2S_CHANNEL_TYPE_ONLY_LEFT
 48 | #define ALGORITHM_INPUT_FORMAT      "RM"
 49 | #define CHANNEL_NUM                 1
 50 | #elif CONFIG_M5STACK_ATOMS3R_BOARD
 51 | #define ALGORITHM_STREAM_SAMPLE_BIT 16
 52 | #define CHANNEL_FORMAT              I2S_CHANNEL_TYPE_RIGHT_LEFT
 53 | #define ALGORITHM_INPUT_FORMAT      "MR"
 54 | #define CHANNEL_NUM                 2
 55 | #endif
 56 | 
 57 | #if (RTC_DEMO_AUDIO_PIPELINE_CODEC_OPUS)
 58 | #define CODEC_NAME          "opus"
 59 | #define CODEC_SAMPLE_RATE   16000
 60 | #define BIT_RATE            32000
 61 | #define COMPLEXITY          10
 62 | #define FRAME_TIME_MS       20 
 63 | 
 64 | #define DEC_SAMPLE_RATE     16000
 65 | #define DEC_BIT_RATE        16000
 66 | #elif (RTC_DEMO_AUDIO_PIPELINE_CODEC_AAC)
 67 | #define CODEC_NAME          "aac"
 68 | #define CODEC_SAMPLE_RATE   16000
 69 | #define BIT_RATE            80000
 70 | #elif (RTC_DEMO_AUDIO_PIPELINE_CODEC_G711A)
 71 | #define CODEC_NAME          "g711a"
 72 | #define CODEC_SAMPLE_RATE   8000
 73 | #elif (RTC_DEMO_AUDIO_PIPELINE_CODEC_PCM)
 74 | #define CODEC_NAME          "g711a"
 75 | #define CODEC_SAMPLE_RATE   8000
 76 | #endif
 77 | 
 78 | struct  recorder_pipeline_t {
 79 |     audio_pipeline_handle_t audio_pipeline;
 80 |     audio_element_handle_t i2s_stream_reader;
 81 |     audio_element_handle_t audio_encoder;
 82 |     audio_element_handle_t raw_reader;
 83 |     audio_element_handle_t rsp;
 84 |     audio_element_handle_t algo_aec;
 85 | };
 86 | 
 87 | 
 88 | struct  player_pipeline_t {
 89 |     audio_pipeline_handle_t audio_pipeline;
 90 |     audio_element_handle_t raw_writer;
 91 |     audio_element_handle_t audio_decoder;
 92 |     audio_element_handle_t rsp;
 93 |     audio_element_handle_t i2s_stream_writer;
 94 | };
 95 | 
 96 | static audio_element_handle_t create_resample_stream(int src_rate, int src_ch, int dest_rate, int dest_ch)
 97 | {
 98 |     rsp_filter_cfg_t rsp_cfg = DEFAULT_RESAMPLE_FILTER_CONFIG();
 99 |     rsp_cfg.src_rate = src_rate;
100 |     rsp_cfg.src_ch = src_ch;
101 |     rsp_cfg.dest_rate = dest_rate;
102 |     rsp_cfg.dest_ch = dest_ch;
103 |     rsp_cfg.complexity = 5;
104 |     audio_element_handle_t stream = rsp_filter_init(&rsp_cfg);
105 |     return stream;
106 | }
107 | 
108 | static audio_element_handle_t create_record_i2s_stream(void)
109 | {
110 | #if CONFIG_ESP32_S3_KORVO2_V3_BOARD
111 |     es7210_adc_set_gain(ES7210_INPUT_MIC3, GAIN_30DB);
112 | #elif CONFIG_M5STACK_ATOMS3R_BOARD
113 |     es8311_set_mic_gain(ES8311_MIC_GAIN_36DB);
114 | #endif
115 |     i2s_stream_cfg_t i2s_cfg = I2S_STREAM_CFG_DEFAULT_WITH_PARA(CODEC_ADC_I2S_PORT, I2S_SAMPLE_RATE, ALGORITHM_STREAM_SAMPLE_BIT, AUDIO_STREAM_READER); // 参数需要仔细检查
116 |     i2s_cfg.type = AUDIO_STREAM_READER;
117 |     i2s_stream_set_channel_type(&i2s_cfg, CHANNEL_FORMAT);
118 |     i2s_cfg.std_cfg.clk_cfg.sample_rate_hz = I2S_SAMPLE_RATE;
119 |     return i2s_stream_init(&i2s_cfg);
120 | }
121 | 
122 | static audio_element_handle_t create_record_encoder_stream(void)
123 | {
124 | #ifdef RTC_DEMO_AUDIO_PIPELINE_CODEC_OPUS
125 |     raw_opus_enc_config_t opus_cfg = RAW_OPUS_ENC_CONFIG_DEFAULT();
126 |     opus_cfg.sample_rate        = CODEC_SAMPLE_RATE;
127 |     opus_cfg.channel            = CHANNEL;
128 |     opus_cfg.bitrate            = BIT_RATE;
129 |     opus_cfg.complexity         = 0; // COMPLEXITY;
130 |     opus_cfg.task_core          = 1;
131 |     return raw_opus_encoder_init(&opus_cfg);
132 | #elif defined (RTC_DEMO_AUDIO_PIPELINE_CODEC_AAC)
133 |     aac_encoder_cfg_t aac_cfg = DEFAULT_AAC_ENCODER_CONFIG();
134 |     aac_cfg.sample_rate        = CODEC_SAMPLE_RATE;
135 |     aac_cfg.channel            = CHANNEL;
136 |     aac_cfg.bitrate            = BIT_RATE;
137 |     pipeline->audio_encoder = aac_encoder_init(&aac_cfg);
138 |     return audio_pipeline_register(pipeline->audio_pipeline, pipeline->audio_encoder, CODEC_NAME);
139 | #elif defined (RTC_DEMO_AUDIO_PIPELINE_CODEC_G711A)
140 |     g711_encoder_cfg_t g711_cfg = DEFAULT_G711_ENCODER_CONFIG();
141 |     return g711_encoder_init(&g711_cfg);
142 | #else
143 |     return NULL;
144 | #endif
145 | }
146 | 
147 | static audio_element_handle_t create_record_raw_stream(void)
148 | {
149 |     audio_element_handle_t raw_stream = NULL;
150 |     raw_stream_cfg_t raw_cfg = RAW_STREAM_CFG_DEFAULT();
151 |     raw_cfg.type = AUDIO_STREAM_WRITER;
152 |     raw_cfg.out_rb_size = 2 * 1024;
153 |     raw_stream = raw_stream_init(&raw_cfg);
154 |     audio_element_set_output_timeout(raw_stream, portMAX_DELAY);
155 |     return raw_stream;
156 | }
157 | 
158 | static audio_element_handle_t create_record_algo_stream(void)
159 | {
160 |     ESP_LOGI(TAG, "[3.1] Create algorithm stream for aec");
161 |     algorithm_stream_cfg_t algo_config = ALGORITHM_STREAM_CFG_DEFAULT();
162 |     // algo_config.swap_ch = true;
163 |     algo_config.sample_rate = ALGO_SAMPLE_RATE;
164 |     algo_config.out_rb_size = 256;
165 |     algo_config.algo_mask = ALGORITHM_STREAM_DEFAULT_MASK | ALGORITHM_STREAM_USE_AGC;
166 |     algo_config.input_format = ALGORITHM_INPUT_FORMAT;
167 |     audio_element_handle_t element_algo = algo_stream_init(&algo_config);
168 |     audio_element_set_music_info(element_algo, ALGO_SAMPLE_RATE, 1, 16);
169 |     audio_element_set_input_timeout(element_algo, portMAX_DELAY);
170 |     return element_algo;
171 | }
172 | 
173 | recorder_pipeline_handle_t recorder_pipeline_open()
174 | {
175 |     recorder_pipeline_handle_t pipeline = heap_caps_calloc(1, sizeof(recorder_pipeline_t), MALLOC_CAP_SPIRAM | MALLOC_CAP_DEFAULT);
176 |     esp_log_level_set("*", ESP_LOG_WARN);
177 |     esp_log_level_set(TAG, ESP_LOG_INFO);
178 | 
179 |     // create and register streams
180 |     audio_pipeline_cfg_t pipeline_cfg = DEFAULT_AUDIO_PIPELINE_CONFIG();
181 |     pipeline->audio_pipeline = audio_pipeline_init(&pipeline_cfg);
182 |     mem_assert(pipeline->audio_pipeline);
183 | 
184 |     pipeline->i2s_stream_reader = create_record_i2s_stream();
185 |     audio_pipeline_register(pipeline->audio_pipeline, pipeline->i2s_stream_reader, "i2s");
186 |     
187 |     pipeline->algo_aec = create_record_algo_stream();
188 |     audio_pipeline_register(pipeline->audio_pipeline, pipeline->algo_aec, "algo");
189 | 
190 | #ifndef RTC_DEMO_AUDIO_PIPELINE_CODEC_OPUS
191 |     pipeline->rsp = create_resample_stream(I2S_SAMPLE_RATE, 1, CODEC_SAMPLE_RATE, 1);
192 |     audio_pipeline_register(pipeline->audio_pipeline, pipeline->rsp, "rsp");
193 | #endif
194 | 
195 |     pipeline->audio_encoder = create_record_encoder_stream();
196 |     if (pipeline->audio_encoder) {
197 |         audio_pipeline_register(pipeline->audio_pipeline, pipeline->audio_encoder, CODEC_NAME);
198 |     }
199 | 
200 |     pipeline->raw_reader = create_record_raw_stream();
201 |     audio_pipeline_register(pipeline->audio_pipeline, pipeline->raw_reader, "raw");
202 | 
203 | #ifdef RTC_DEMO_AUDIO_PIPELINE_CODEC_OPUS
204 |     const char *link_tag[] = {"i2s", "algo", CODEC_NAME, "raw"};
205 | #elif defined (RTC_DEMO_AUDIO_PIPELINE_CODEC_AAC)
206 |     const char *link_tag[] = {"i2s", "aac", "rsp", CODEC_NAME, "raw"};
207 | #elif defined (RTC_DEMO_AUDIO_PIPELINE_CODEC_G711A)
208 |     const char *link_tag[] = {"i2s", "algo", "rsp", "g711a", "raw"};
209 | #elif defined (RTC_DEMO_AUDIO_PIPELINE_CODEC_PCM)
210 |     const char *link_tag[] = {"i2s", "algo", "rsp", "raw"};
211 | #endif
212 | 
213 |     audio_pipeline_link(pipeline->audio_pipeline, &link_tag[0], sizeof(link_tag) / sizeof(link_tag[0]));
214 |     return pipeline;
215 | }
216 | 
217 | void recorder_pipeline_close(recorder_pipeline_handle_t pipeline)  {
218 |     audio_pipeline_stop(pipeline->audio_pipeline);
219 |     audio_pipeline_wait_for_stop(pipeline->audio_pipeline);
220 |     audio_pipeline_terminate(pipeline->audio_pipeline);
221 | 
222 |     if (pipeline->i2s_stream_reader) {
223 |         audio_pipeline_unregister(pipeline->audio_pipeline, pipeline->i2s_stream_reader);
224 |         audio_element_deinit(pipeline->i2s_stream_reader);
225 |     }
226 |     if (pipeline->audio_encoder) {
227 |         audio_pipeline_unregister(pipeline->audio_pipeline, pipeline->audio_encoder);
228 |         audio_element_deinit(pipeline->audio_encoder);
229 |     }
230 |     if (pipeline->raw_reader) {
231 |         audio_pipeline_unregister(pipeline->audio_pipeline, pipeline->raw_reader);
232 |         audio_element_deinit(pipeline->raw_reader);
233 |     }
234 |     if (pipeline->rsp) {
235 |         audio_pipeline_unregister(pipeline->audio_pipeline, pipeline->rsp);
236 |         audio_element_deinit(pipeline->rsp);
237 |     }
238 |     if (pipeline->algo_aec) {
239 |         audio_pipeline_unregister(pipeline->audio_pipeline, pipeline->algo_aec);
240 |         audio_element_deinit(pipeline->algo_aec);
241 |     }
242 | 
243 |     audio_pipeline_deinit(pipeline->audio_pipeline);
244 |     heap_caps_free(pipeline);
245 | };
246 | 
247 | void recorder_pipeline_run(recorder_pipeline_handle_t pipeline){
248 |     audio_pipeline_run(pipeline->audio_pipeline);
249 | };
250 | 
251 | int recorder_pipeline_get_default_read_size(recorder_pipeline_handle_t pipeline){
252 |     #if defined (RTC_DEMO_AUDIO_PIPELINE_CODEC_OPUS)
253 |         return 80;
254 |     #elif defined (RTC_DEMO_AUDIO_PIPELINE_CODEC_AAC)
255 |         return -1;//
256 |     #elif defined (RTC_DEMO_AUDIO_PIPELINE_CODEC_G711A)
257 |         return 160;
258 |     #elif defined (RTC_DEMO_AUDIO_PIPELINE_CODEC_PCM)
259 |         return 320;
260 |     #endif
261 | };
262 | 
263 | audio_element_handle_t recorder_pipeline_get_raw_reader(recorder_pipeline_handle_t pipeline){
264 |     return pipeline->raw_reader;
265 | };
266 | audio_pipeline_handle_t recorder_pipeline_get_pipeline(recorder_pipeline_handle_t pipeline){
267 |     return pipeline->audio_pipeline;
268 | };
269 | 
270 | int recorder_pipeline_read(recorder_pipeline_handle_t pipeline,char *buffer, int buf_size) {
271 |     return raw_stream_read(pipeline->raw_reader, buffer,buf_size);
272 | }
273 | 
274 | static audio_element_handle_t create_player_raw_stream(void)
275 | {
276 |     raw_stream_cfg_t raw_cfg = RAW_STREAM_CFG_DEFAULT();
277 |     raw_cfg.type = AUDIO_STREAM_READER;
278 |     raw_cfg.out_rb_size = 8 * 1024;
279 |     return raw_stream_init(&raw_cfg);
280 | }
281 | 
282 | static audio_element_handle_t create_player_i2s_stream(void)
283 | {
284 |     i2s_stream_cfg_t i2s_cfg = I2S_STREAM_CFG_DEFAULT_WITH_PARA(I2S_NUM_0, I2S_SAMPLE_RATE, ALGORITHM_STREAM_SAMPLE_BIT, AUDIO_STREAM_WRITER);
285 |     i2s_cfg.type = AUDIO_STREAM_WRITER;
286 | #ifdef CONFIG_ESP32_S3_KORVO2_V3_BOARD
287 |     i2s_cfg.need_expand = (16 != 32);
288 | #endif
289 |     i2s_cfg.out_rb_size = 8 * 1024;
290 |     i2s_cfg.buffer_len = 1416;//708
291 |     i2s_stream_set_channel_type(&i2s_cfg, CHANNEL_FORMAT);
292 |     audio_element_handle_t stream = i2s_stream_init(&i2s_cfg);
293 |     i2s_stream_set_clk(stream, I2S_SAMPLE_RATE, ALGORITHM_STREAM_SAMPLE_BIT, CHANNEL_NUM);
294 |     return stream;
295 | }
296 | 
297 | static audio_element_handle_t create_player_decoder_stream(void)
298 | {
299 | #ifdef RTC_DEMO_AUDIO_PIPELINE_CODEC_OPUS
300 |     raw_opus_dec_cfg_t opus_dec_cfg = RAW_OPUS_DEC_CONFIG_DEFAULT();
301 |     opus_dec_cfg.enable_frame_length_prefix = true;
302 |     opus_dec_cfg.sample_rate = DEC_SAMPLE_RATE;
303 |     opus_dec_cfg.channels = 1;
304 |     opus_dec_cfg.task_core = 1;
305 |     return raw_opus_decoder_init(&opus_dec_cfg);
306 | #elif RTC_DEMO_AUDIO_PIPELINE_CODEC_AAC
307 |     aac_decoder_cfg_t  aac_dec_cfg  = DEFAULT_AAC_DECODER_CONFIG();
308 |     return aac_decoder_init(&aac_dec_cfg);
309 | #elif RTC_DEMO_AUDIO_PIPELINE_CODEC_G711A
310 |     g711_decoder_cfg_t g711_dec_cfg = DEFAULT_G711_DECODER_CONFIG();
311 |     g711_dec_cfg.out_rb_size = 8 * 1024;
312 |     return g711_decoder_init(&g711_dec_cfg);
313 | #else
314 |     return NULL;
315 | #endif
316 | }
317 | 
318 | player_pipeline_handle_t player_pipeline_open(void) {
319 |     player_pipeline_handle_t player_pipeline = heap_caps_calloc(1, sizeof(player_pipeline_t), MALLOC_CAP_SPIRAM | MALLOC_CAP_DEFAULT);
320 |     esp_log_level_set("*", ESP_LOG_WARN);
321 |     esp_log_level_set(TAG, ESP_LOG_INFO);
322 |     assert(player_pipeline != 0);
323 | 
324 |     audio_pipeline_cfg_t pipeline_cfg = DEFAULT_AUDIO_PIPELINE_CONFIG();
325 |     player_pipeline->audio_pipeline = audio_pipeline_init(&pipeline_cfg);
326 |     mem_assert(pipeline);
327 | 
328 |     
329 |     player_pipeline->raw_writer = create_player_raw_stream();
330 |     audio_pipeline_register(player_pipeline->audio_pipeline, player_pipeline->raw_writer, "raw");
331 | 
332 |     player_pipeline->i2s_stream_writer = create_player_i2s_stream();
333 |     audio_pipeline_register(player_pipeline->audio_pipeline, player_pipeline->i2s_stream_writer, "i2s");
334 | 
335 | #ifndef RTC_DEMO_AUDIO_PIPELINE_CODEC_OPUS
336 |     player_pipeline->rsp = create_resample_stream(CODEC_SAMPLE_RATE, 1, I2S_SAMPLE_RATE, CHANNEL_NUM);
337 |     audio_element_set_output_timeout(player_pipeline->rsp, portMAX_DELAY);
338 |     audio_pipeline_register(player_pipeline->audio_pipeline, player_pipeline->rsp, "rsp");
339 | #endif
340 | 
341 |     player_pipeline->audio_decoder = create_player_decoder_stream();
342 |     if (player_pipeline->audio_decoder != NULL) {
343 |         audio_pipeline_register(player_pipeline->audio_pipeline, player_pipeline->audio_decoder, "dec");
344 |     }
345 |     
346 | #if defined (RTC_DEMO_AUDIO_PIPELINE_CODEC_PCM)
347 |     const char *link_tag[] = {"raw", "rsp", "i2s"};
348 | #elif defined(RTC_DEMO_AUDIO_PIPELINE_CODEC_OPUS)
349 | const char *link_tag[] = {"raw", "dec", "i2s"};
350 | #else
351 |     const char *link_tag[] = {"raw", "dec", "rsp", "i2s"};
352 | #endif
353 |     audio_pipeline_link(player_pipeline->audio_pipeline, &link_tag[0], sizeof(link_tag) / sizeof(link_tag[0]));
354 | 
355 |     return player_pipeline;
356 | }
357 | 
358 | 
359 | void player_pipeline_run(player_pipeline_handle_t player_pipeline){
360 |     audio_pipeline_run(player_pipeline->audio_pipeline);
361 | };
362 | 
363 | void player_pipeline_close(player_pipeline_handle_t player_pipeline){
364 |     audio_pipeline_stop(player_pipeline->audio_pipeline);
365 |     audio_pipeline_wait_for_stop(player_pipeline->audio_pipeline);
366 |     audio_pipeline_terminate(player_pipeline->audio_pipeline);
367 | 
368 |     if (player_pipeline->raw_writer) {
369 |         audio_pipeline_unregister(player_pipeline->audio_pipeline, player_pipeline->raw_writer);
370 |         audio_element_deinit(player_pipeline->raw_writer);
371 |     }
372 |     if (player_pipeline->audio_decoder) {
373 |         audio_pipeline_unregister(player_pipeline->audio_pipeline, player_pipeline->audio_decoder);
374 |         audio_element_deinit(player_pipeline->audio_decoder); 
375 |     }
376 |     if (player_pipeline->rsp) {
377 |         audio_pipeline_unregister(player_pipeline->audio_pipeline, player_pipeline->rsp);
378 |         audio_element_deinit(player_pipeline->rsp); 
379 |     }
380 |     if (player_pipeline->i2s_stream_writer) {
381 |         audio_pipeline_unregister(player_pipeline->audio_pipeline, player_pipeline->i2s_stream_writer);
382 |         audio_element_deinit(player_pipeline->i2s_stream_writer); 
383 |     }
384 | 
385 |     audio_pipeline_deinit(player_pipeline->audio_pipeline);
386 |     heap_caps_free(player_pipeline);
387 | };
388 | 
389 | int player_pipeline_write(player_pipeline_handle_t player_pipeline, char *buffer, int buf_size){
390 |     raw_stream_write(player_pipeline->raw_writer, buffer, buf_size);
391 |     return 0;
392 | };


--------------------------------------------------------------------------------
/client/espressif/esp32s3_demo/main/AudioPipeline.h:
--------------------------------------------------------------------------------
 1 | // Copyright (2025) Beijing Volcano Engine Technology Ltd.
 2 | // SPDX-License-Identifier: MIT
 3 | 
 4 | #ifndef __AUDIO_PIPELINE_H__
 5 | #define __AUDIO_PIPELINE_H__
 6 | 
 7 | #include <stdint.h>
 8 | #include <stddef.h>
 9 | #include <stdbool.h>
10 | #include "audio_pipeline.h"
11 | 
12 | #ifdef __cplusplus
13 | extern "C" {
14 | #endif
15 | 
16 | #ifdef CONFIG_AUDIO_CODEC_TYPE_OPUS
17 | #define RTC_DEMO_AUDIO_PIPELINE_CODEC_OPUS 1
18 | #elif defined(CONFIG_AUDIO_CODEC_TYPE_PCM)
19 | #define RTC_DEMO_AUDIO_PIPELINE_CODEC_PCM 1
20 | #elif defined(CONFIG_AUDIO_CODEC_TYPE_G711A)
21 | #define RTC_DEMO_AUDIO_PIPELINE_CODEC_G711A 1
22 | #elif defined(CONFIG_AUDIO_CODEC_TYPE_G722)
23 | #define RTC_DEMO_AUDIO_PIPELINE_CODEC_G722 1
24 | #elif defined(CONFIG_AUDIO_CODEC_TYPE_AAC)
25 | #define RTC_DEMO_AUDIO_PIPELINE_CODEC_AAC 1
26 | #endif
27 | 
28 | struct recorder_pipeline_t;
29 | typedef struct recorder_pipeline_t recorder_pipeline_t,*recorder_pipeline_handle_t;
30 | recorder_pipeline_handle_t recorder_pipeline_open();
31 | void recorder_pipeline_run(recorder_pipeline_handle_t);
32 | void recorder_pipeline_close(recorder_pipeline_handle_t);
33 | int recorder_pipeline_get_default_read_size(recorder_pipeline_handle_t);
34 | int recorder_pipeline_read(recorder_pipeline_handle_t,char *buffer, int buf_size);
35 | 
36 | struct  player_pipeline_t;
37 | typedef struct player_pipeline_t player_pipeline_t,*player_pipeline_handle_t;
38 | player_pipeline_handle_t player_pipeline_open();
39 | void player_pipeline_run(player_pipeline_handle_t);
40 | void player_pipeline_close(player_pipeline_handle_t);
41 | int player_pipeline_get_default_read_size(player_pipeline_handle_t);
42 | int player_pipeline_write(player_pipeline_handle_t,char *buffer, int buf_size);
43 | void player_pipeline_write_play_buffer_flag(player_pipeline_handle_t player_pipeline);
44 | 
45 | #ifdef __cplusplus
46 | }
47 | #endif
48 | #endif


--------------------------------------------------------------------------------
/client/espressif/esp32s3_demo/main/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | # Copyright (2025) Beijing Volcano Engine Technology Ltd.
2 | # SPDX-License-Identifier: MIT
3 | 
4 | set(COMPONENT_SRCS "VolcRTCDemo.c AudioPipeline.c RtcHttpUtils.c RtcBotUtils.c" )
5 | set(COMPONENT_ADD_INCLUDEDIRS .)
6 | 
7 | register_component()
8 | 


--------------------------------------------------------------------------------
/client/espressif/esp32s3_demo/main/Kconfig.projbuild:
--------------------------------------------------------------------------------
 1 | menu "Example Configuration"
 2 | 
 3 | config WIFI_SSID
 4 |     string "WiFi SSID"
 5 |     default "YOUR WIFI SSID"
 6 |     help
 7 |         SSID (network name) for the example to connect to.
 8 | 
 9 | config WIFI_PASSWORD
10 |     string "WiFi Password"
11 |     default "YOUR WIFI PASSWORD"
12 |     help
13 |         WiFi password (WPA or WPA2) for the example to use.
14 | 
15 |         Can be left blank if the network has no security set.
16 | 
17 | config RTC_APPID
18 |     string "RTC_APPID"
19 |     default "67582ac8******0174410bd1"
20 | 
21 | config AIGENT_SERVER_HOST
22 |     string "AIGC SERVER IP:PORT"
23 |     default "192.***.***.2:8080"
24 | 
25 | choice AUDIO_CODEC_SUPPORT
26 |     prompt "Audio Codec"
27 |     default AUDIO_CODEC_TYPE_PCM
28 | 
29 | config AUDIO_CODEC_TYPE_PCM
30 |     bool "audio codec is pcm, use internal audio codec instead"
31 | 
32 | config AUDIO_CODEC_TYPE_OPUS
33 |     bool "audio codec is opus"
34 | 
35 | config AUDIO_CODEC_TYPE_G711A
36 |     bool "audio codec is g711a"
37 | 
38 | config AUDIO_CODEC_TYPE_G722
39 |     bool "audio codec is g722, not support yet"
40 | 
41 | config AUDIO_CODEC_TYPE_AACLC
42 |     bool "audio codec is aaclc, not support yet"
43 | 
44 | endchoice
45 | endmenu
46 | 


--------------------------------------------------------------------------------
/client/espressif/esp32s3_demo/main/RtcBotUtils.c:
--------------------------------------------------------------------------------
  1 | // Copyright (2025) Beijing Volcano Engine Technology Ltd.
  2 | // SPDX-License-Identifier: MIT
  3 | 
  4 | #include "RtcBotUtils.h"
  5 | #include "RtcHttpUtils.h"
  6 | #include "cJSON.h"
  7 | #include "esp_log.h"
  8 | #include "esp_heap_caps.h"
  9 | #include <string.h>
 10 | 
 11 | static const char *TAG = "RTC_BOT_UTILS";
 12 | 
 13 | static void *impl_malloc_fn(size_t size) {
 14 |     uint32_t allocate_caps = 0;
 15 | #if CONFIG_PSRAM
 16 |     allocate_caps = MALLOC_CAP_SPIRAM;
 17 | #else
 18 |     allocate_caps = MALLOC_CAP_INTERNAL;
 19 | #endif
 20 |     return heap_caps_malloc(size, allocate_caps);
 21 | }
 22 | 
 23 | static void impl_free_fn(void *ptr) {
 24 |     heap_caps_free(ptr);
 25 | }
 26 | 
 27 | const char* common_headers[] = {
 28 |     "Content-Type", "application/json",
 29 |     "Authorization", "af78e30" CONFIG_RTC_APPID,
 30 |     NULL
 31 | };
 32 | 
 33 | int start_voice_bot(rtc_room_info_t* room_info) {
 34 |     char post_data[512];
 35 |     cJSON *post_jobj = cJSON_CreateObject();
 36 | #ifdef CONFIG_AUDIO_CODEC_TYPE_OPUS
 37 |     cJSON_AddStringToObject(post_jobj, "audio_codec", "OPUS");
 38 | #elif defined(CONFIG_AUDIO_CODEC_TYPE_PCM) || defined(CONFIG_AUDIO_CODEC_TYPE_G711A)
 39 |     cJSON_AddStringToObject(post_jobj, "audio_codec", "G711A");
 40 | #elif defined(CONFIG_AUDIO_CODEC_TYPE_G722)
 41 |     cJSON_AddStringToObject(post_jobj, "audio_codec", "G722");
 42 | #elif defined(CONFIG_AUDIO_CODEC_TYPE_AAC)
 43 |     cJSON_AddStringToObject(post_jobj, "audio_codec", "AAC");
 44 | #endif
 45 |     const char* json_str = cJSON_Print(post_jobj);
 46 |     strcpy(post_data, json_str);
 47 |     cJSON_Delete(post_jobj);
 48 | 
 49 |     rtc_post_config_t post_config = {
 50 |         .uri = "http://" CONFIG_AIGENT_SERVER_HOST "/startvoicechat",
 51 |         .headers = common_headers,
 52 |         .post_data = post_data  // 根据需要传入智能体id和音色id
 53 |     };
 54 |     rtc_req_result_t post_result = rtc_http_post(&post_config);
 55 |     if (post_result.code == 200 && post_result.response != NULL) {
 56 |         // parse json
 57 |         cJSON* root = cJSON_Parse(post_result.response);
 58 |         rtc_request_free(&post_result);
 59 |         if (root == NULL) {
 60 |             ESP_LOGE(TAG, "Error parsing JSON");
 61 |             return -1;
 62 |         }
 63 |         
 64 |         cJSON* data = cJSON_GetObjectItem(root, "data");
 65 | 
 66 |         if (data == NULL) {
 67 |             cJSON_Delete(root);
 68 |             ESP_LOGE(TAG, "Not found data object.");
 69 |             return -1;
 70 |         }
 71 |         
 72 |         cJSON* app_id_item = cJSON_GetObjectItem(data, "app_id");
 73 |         const char* app_id = cJSON_GetStringValue(app_id_item);
 74 |         strcpy(room_info->app_id, app_id);
 75 | 
 76 |         cJSON* uid_item = cJSON_GetObjectItem(data, "uid");
 77 |         const char* uid = cJSON_GetStringValue(uid_item);
 78 |         strcpy(room_info->uid, uid);
 79 |         
 80 |         cJSON* room_id_item = cJSON_GetObjectItem(data, "room_id");
 81 |         const char* room_id = cJSON_GetStringValue(room_id_item);
 82 |         strcpy(room_info->room_id, room_id);
 83 | 
 84 |         cJSON* task_id_item = cJSON_GetObjectItem(data, "task_id");
 85 |         const char* task_id = cJSON_GetStringValue(task_id_item);
 86 |         strcpy(room_info->task_id, task_id);
 87 | 
 88 |         cJSON* bot_uid_item = cJSON_GetObjectItem(data, "bot_uid");
 89 |         const char* bot_uid = cJSON_GetStringValue(bot_uid_item);
 90 |         strcpy(room_info->bot_uid, bot_uid);
 91 | 
 92 |         cJSON* token_item = cJSON_GetObjectItem(data, "token");
 93 |         const char* token = cJSON_GetStringValue(token_item);
 94 |         strcpy(room_info->token, token);
 95 |         
 96 |         cJSON_Delete(root);
 97 | 
 98 |         return 200;
 99 |     } else {
100 |         cJSON* root = cJSON_Parse(post_result.response);
101 |         if (root != NULL) {
102 |             cJSON* message_item = cJSON_GetObjectItem(root, "message");
103 |             const char* message = cJSON_GetStringValue(message_item);
104 |             ESP_LOGE(TAG, "Error: %s", message);
105 |             cJSON_Delete(root);
106 |         }
107 |         return post_result.code;
108 |     }
109 | }
110 | 
111 | int stop_voice_bot(const rtc_room_info_t* room_info) {
112 | 
113 |     char post_data[512];
114 |     cJSON *post_jobj = cJSON_CreateObject();
115 |     cJSON_AddStringToObject(post_jobj, "app_id", room_info->app_id);
116 |     cJSON_AddStringToObject(post_jobj, "room_id", room_info->room_id);
117 |     cJSON_AddStringToObject(post_jobj, "task_id", room_info->task_id);
118 |     
119 |     const char* json_str = cJSON_Print(post_jobj);
120 |     strcpy(post_data, json_str);
121 |     cJSON_Delete(post_jobj);
122 |     
123 |     rtc_post_config_t post_config = {
124 |         .uri = "http://" CONFIG_AIGENT_SERVER_HOST "/stopvoicechat",
125 |         .headers = common_headers,
126 |         .post_data = post_data
127 |     };
128 |     rtc_req_result_t post_result = rtc_http_post(&post_config);
129 |     if (post_result.code == 200 && post_result.response != NULL) {
130 |         // parse json
131 |         cJSON* root = cJSON_Parse(post_result.response);
132 |         rtc_request_free(&post_result);
133 |         if (root == NULL) {
134 |             ESP_LOGE(TAG, "Error parsing JSON");
135 |             return -1;
136 |         }
137 |         
138 |         cJSON* data = cJSON_GetObjectItem(root, "data");
139 | 
140 |         if (data == NULL) {
141 |             cJSON_Delete(root);
142 |             ESP_LOGE(TAG, "Not found data object.");
143 |             return -1;
144 |         }
145 |         
146 |         
147 |         cJSON_Delete(root);
148 | 
149 |         return 200;
150 |     } else {
151 |         cJSON* root = cJSON_Parse(post_result.response);
152 |         if (root != NULL) {
153 |             cJSON* message_item = cJSON_GetObjectItem(root, "message");
154 |             const char* message = cJSON_GetStringValue(message_item);
155 |             ESP_LOGE(TAG, "Error: %s", message);
156 |             cJSON_Delete(root);
157 |         }
158 |         return post_result.code;
159 |     }
160 | }
161 | 
162 | int update_voice_bot(const rtc_room_info_t* room_info, const char* command, const char* message) {
163 |     char post_data[1024];
164 |     cJSON *post_jobj = cJSON_CreateObject();
165 |     cJSON_AddStringToObject(post_jobj, "app_id", room_info->app_id);
166 |     cJSON_AddStringToObject(post_jobj, "room_id", room_info->room_id);
167 |     cJSON_AddStringToObject(post_jobj, "task_id", room_info->task_id);
168 |     cJSON_AddStringToObject(post_jobj, "command", command);
169 |     if (message) {
170 |         cJSON_AddStringToObject(post_jobj, "message", message);
171 |     }
172 |     
173 |     const char* json_str = cJSON_Print(post_jobj);
174 |     strcpy(post_data, json_str);
175 |     cJSON_Delete(post_jobj);
176 | 
177 |     
178 |     rtc_post_config_t post_config = {
179 |         .uri = "http://" CONFIG_AIGENT_SERVER_HOST "/updatevoicechat",
180 |         .headers = common_headers,
181 |         .post_data = post_data
182 |     };
183 |     rtc_req_result_t post_result = rtc_http_post(&post_config);
184 |     if (post_result.code == 200 && post_result.response != NULL) {
185 |         // parse json
186 |         cJSON* root = cJSON_Parse(post_result.response);
187 |         rtc_request_free(&post_result);
188 |         if (root == NULL) {
189 |             ESP_LOGE(TAG, "Error parsing JSON");
190 |             return -1;
191 |         }
192 |         
193 |         cJSON* data = cJSON_GetObjectItem(root, "data");
194 | 
195 |         if (data == NULL) {
196 |             cJSON_Delete(root);
197 |             ESP_LOGE(TAG, "Not found data object.");
198 |             return -1;
199 |         }
200 |         
201 |         cJSON_Delete(root);
202 |         return 200;
203 |     } else {
204 |         cJSON* root = cJSON_Parse(post_result.response);
205 |         if (root != NULL) {
206 |             cJSON* message_item = cJSON_GetObjectItem(root, "message");
207 |             const char* message = cJSON_GetStringValue(message_item);
208 |             ESP_LOGE(TAG, "Error: %s", message);
209 |             cJSON_Delete(root);
210 |         }
211 |         return post_result.code;
212 |     }
213 | 
214 | }
215 | 
216 | int interrupt_voice_bot(const rtc_room_info_t* room_info) {
217 |     return update_voice_bot(room_info, "interrupt", NULL);
218 | }
219 | 
220 | int voice_bot_function_calling(const rtc_room_info_t* room_info, const char* message) {
221 |     return update_voice_bot(room_info, "function", message);
222 | }


--------------------------------------------------------------------------------
/client/espressif/esp32s3_demo/main/RtcBotUtils.h:
--------------------------------------------------------------------------------
 1 | // Copyright (2025) Beijing Volcano Engine Technology Ltd.
 2 | // SPDX-License-Identifier: MIT
 3 | 
 4 | #ifndef __RTC_BOT_UTILS_H__
 5 | #define __RTC_BOT_UTILS_H__
 6 | typedef struct {
 7 |     char room_id[129];
 8 |     char uid[129];
 9 |     char app_id[25];
10 |     char task_id[129];
11 |     char bot_uid[129];
12 |     char token[257];
13 | } rtc_room_info_t;
14 | 
15 | int start_voice_bot(rtc_room_info_t* room_info);
16 | int stop_voice_bot(const rtc_room_info_t* room_info);
17 | int update_voice_bot(const rtc_room_info_t* room_info, const char* command, const char* message);
18 | int interrupt_voice_bot(const rtc_room_info_t* room_info);
19 | int voice_bot_function_calling(const rtc_room_info_t* room_info, const char* message);
20 | 
21 | 
22 | #endif // __RTC_BOT_UTILS_H__


--------------------------------------------------------------------------------
/client/espressif/esp32s3_demo/main/RtcHttpUtils.c:
--------------------------------------------------------------------------------
  1 | // Copyright (2025) Beijing Volcano Engine Technology Ltd.
  2 | // SPDX-License-Identifier: MIT
  3 | 
  4 | #include "RtcHttpUtils.h"
  5 | #include "esp_log.h"
  6 | #include "freertos/FreeRTOS.h"
  7 | #include "freertos/event_groups.h"
  8 | #include "esp_http_client.h"
  9 | #include <string.h>
 10 | 
 11 | #define HTTP_FINSH_BIT 1
 12 | 
 13 | static const char *TAG = "RTC_HTTP_UTILS";
 14 | #if CONFIG_PSRAM
 15 | static const unsigned int mem_flags =  MALLOC_CAP_SPIRAM;
 16 | #else
 17 | static const unsigned int mem_flags =  MALLOC_CAP_INTERNAL;
 18 | #endif // CONFIG_PSRAM
 19 | 
 20 | typedef struct {
 21 |     EventGroupHandle_t http_finish_event;
 22 |     int output_len; 
 23 |     rtc_req_result_t result;
 24 | } rtc_http_post_context_t;
 25 | 
 26 | static esp_err_t _http_event_handler(esp_http_client_event_t *evt)
 27 | {
 28 |     rtc_http_post_context_t *context = (rtc_http_post_context_t *) evt->user_data;
 29 |    
 30 |     switch(evt->event_id) {
 31 |         case HTTP_EVENT_ERROR:
 32 |             ESP_LOGD(TAG, "HTTP_EVENT_ERROR");
 33 |             break;
 34 |         case HTTP_EVENT_ON_CONNECTED:
 35 |             ESP_LOGD(TAG, "HTTP_EVENT_ON_CONNECTED");
 36 |             break;
 37 |         case HTTP_EVENT_HEADER_SENT:
 38 |             ESP_LOGD(TAG, "HTTP_EVENT_HEADER_SENT");
 39 |             break;
 40 |         case HTTP_EVENT_ON_HEADER:
 41 |             ESP_LOGD(TAG, "HTTP_EVENT_ON_HEADER, key=%s, value=%s", evt->header_key, evt->header_value);
 42 |             break;
 43 |         case HTTP_EVENT_ON_DATA:
 44 |             memcpy(context->result.response + context->output_len, evt->data, evt->data_len);
 45 |             context->output_len += evt->data_len;
 46 |             context->result.response[context->output_len] = 0;
 47 |             break;
 48 |         case HTTP_EVENT_ON_FINISH:            
 49 |             ESP_LOGD(TAG, "HTTP_EVENT_ON_FINISH");
 50 |             xEventGroupSetBits(context->http_finish_event, HTTP_FINSH_BIT);
 51 |             break;
 52 |         case HTTP_EVENT_DISCONNECTED:
 53 |             ESP_LOGD(TAG, "HTTP_EVENT_DISCONNECTED");
 54 |             break;
 55 |         case HTTP_EVENT_REDIRECT:
 56 |             break;
 57 |     }
 58 |     return ESP_OK;
 59 | }
 60 | 
 61 | rtc_req_result_t rtc_http_post(rtc_post_config_t* config) {
 62 |     if (!config || !config->uri || !config->post_data) {
 63 |         ESP_LOGE(TAG, "Invalid parameters: config");
 64 |     }
 65 |     
 66 |     rtc_http_post_context_t context = {0};
 67 |     context.http_finish_event = xEventGroupCreate();
 68 |     if (!context.http_finish_event) {
 69 |         ESP_LOGE(TAG, "http_finish_event create failed.");
 70 |         return context.result;
 71 |     }
 72 |     context.result.code = 0;
 73 |     context.result.response = heap_caps_malloc(2048, mem_flags);
 74 |     if (!context.result.response) {
 75 |         vEventGroupDelete(context.http_finish_event);
 76 |         ESP_LOGE(TAG, "http_finish_event create failed.");
 77 |         return context.result;
 78 |     }
 79 |     context.result.response[0] = 0;
 80 |     
 81 |     esp_http_client_config_t http_client_config = {
 82 |         .url = config->uri,
 83 |         .query = "",
 84 |         .event_handler = _http_event_handler,
 85 |         .user_data = &context,
 86 |         .disable_auto_redirect = true,
 87 |     };
 88 | 
 89 |     esp_http_client_handle_t client = esp_http_client_init(&http_client_config);
 90 |     esp_http_client_set_method(client, HTTP_METHOD_POST);
 91 |     if (config->headers) {
 92 |         int header_index = 0;
 93 |         while(config->headers[header_index]) {
 94 |             esp_http_client_set_header(client, config->headers[header_index], config->headers[header_index + 1]);
 95 |             header_index += 2;
 96 |         }
 97 |     }
 98 |     esp_http_client_set_post_field(client, config->post_data, strlen(config->post_data));
 99 |     esp_err_t err = esp_http_client_perform(client);
100 |     if (err != ESP_OK) {
101 |         ESP_LOGE(TAG, "request failed: %s", esp_err_to_name(err));
102 |     }
103 | 
104 |     EventBits_t ux_bits = xEventGroupWaitBits(context.http_finish_event, HTTP_FINSH_BIT , pdTRUE, pdFALSE, pdMS_TO_TICKS(10000));  // wait 10s
105 |     if ((ux_bits & HTTP_FINSH_BIT) == 0) {
106 |         ESP_LOGE(TAG, "request failed: %s", esp_err_to_name(err));
107 |     }
108 |     
109 |     context.result.code = esp_http_client_get_status_code(client);
110 |     ESP_LOGI(TAG, "context.result.code: %d, context.result.response: %s", context.result.code, context.result.response);
111 | 
112 |     esp_http_client_cleanup(client);
113 |     vEventGroupDelete(context.http_finish_event);
114 |     return context.result;
115 | }
116 | 
117 | void rtc_request_free(rtc_req_result_t *result) {
118 |      if (result && result->response) {
119 |         heap_caps_free(result->response);
120 |         result->response = NULL;
121 |     }
122 | }


--------------------------------------------------------------------------------
/client/espressif/esp32s3_demo/main/RtcHttpUtils.h:
--------------------------------------------------------------------------------
 1 | // Copyright (2025) Beijing Volcano Engine Technology Ltd.
 2 | // SPDX-License-Identifier: MIT
 3 | 
 4 | #ifndef __RTC_HTTP_UTILS_H__
 5 | #define __RTC_HTTP_UTILS_H__
 6 | 
 7 | typedef struct {
 8 |     int code;
 9 |     char* response;
10 | } rtc_req_result_t;
11 | 
12 | typedef struct {
13 |     const char* uri;
14 |     const char** headers;  // key1,value1,key2,value2....keyn,valuen,NULL
15 |     const char* post_data;
16 | } rtc_post_config_t;
17 | 
18 | rtc_req_result_t rtc_http_post(rtc_post_config_t* config);
19 | void rtc_request_free(rtc_req_result_t *result);
20 | 
21 | #endif // __RTC_HTTP_UTILS_H__


--------------------------------------------------------------------------------
/client/espressif/esp32s3_demo/main/VolcRTCDemo.c:
--------------------------------------------------------------------------------
  1 | // Copyright (2025) Beijing Volcano Engine Technology Ltd.
  2 | // SPDX-License-Identifier: MIT
  3 | 
  4 | #include <string.h>
  5 | #include <stdlib.h>
  6 | #include <stdio.h>
  7 | #include <stdint.h>
  8 | #include <unistd.h>
  9 | #include <time.h>
 10 | #include <inttypes.h>
 11 | 
 12 | #include "esp_event.h"
 13 | #include "esp_log.h"
 14 | #include "esp_system.h"
 15 | #include "nvs_flash.h"
 16 | #include "esp_netif.h"
 17 | #include "freertos/FreeRTOS.h"
 18 | #include "freertos/task.h"
 19 | #include "esp_heap_task_info.h"
 20 | #include "esp_random.h"
 21 | 
 22 | #include <VolcEngineRTCLite.h>
 23 | #include "freertos/semphr.h"
 24 | #include "esp_err.h"
 25 | #include "sdkconfig.h"
 26 | #include "audio_element.h"
 27 | #include "audio_pipeline.h"
 28 | #include "audio_event_iface.h"
 29 | #include "audio_common.h"
 30 | #include "audio_sys.h"
 31 | #include "board.h"
 32 | #include "esp_peripherals.h"
 33 | #include "periph_wifi.h"
 34 | #include "fatfs_stream.h"
 35 | #include "i2s_stream.h"
 36 | #include "AudioPipeline.h"
 37 | #include "RtcBotUtils.h"
 38 | #include "cJSON.h"
 39 | 
 40 | #define STATS_TASK_PRIO     5
 41 | 
 42 | static const char* TAG = "VolcRTCDemo";
 43 | static bool joined = false;
 44 | 
 45 | typedef struct {
 46 |     player_pipeline_handle_t player_pipeline;
 47 |     rtc_room_info_t* room_info;
 48 |     char remote_uid[128];
 49 | } engine_context_t;
 50 | // byte rtc lite callbacks
 51 | static void byte_rtc_on_join_room_success(byte_rtc_engine_t engine, const char* channel, int elapsed_ms, bool rejoin) {
 52 |     ESP_LOGI(TAG, "join channel success %s elapsed %d ms now %d ms\n", channel, elapsed_ms, elapsed_ms);
 53 |     joined = true;
 54 | };
 55 | 
 56 | static void byte_rtc_on_rejoin_room_success(byte_rtc_engine_t engine, const char* channel, int elapsed_ms){
 57 |     // g_byte_rtc_data.channel_joined = TRUE;
 58 |     ESP_LOGI(TAG, "rejoin channel success %s\n", channel);
 59 | };
 60 | 
 61 | static void byte_rtc_on_user_joined(byte_rtc_engine_t engine, const char* channel, const char* user_name, int elapsed_ms){
 62 |     ESP_LOGI(TAG, "remote user joined  %s:%s\n", channel, user_name);
 63 |     engine_context_t* context = (engine_context_t *) byte_rtc_get_user_data(engine);
 64 |     strcpy(context->remote_uid, user_name);
 65 | };
 66 | 
 67 | static void byte_rtc_on_user_offline(byte_rtc_engine_t engine, const char* channel, const char* user_name, int reason){
 68 |     ESP_LOGI(TAG, "remote user offline  %s:%s\n", channel, user_name);
 69 | };
 70 | 
 71 | static void byte_rtc_on_user_mute_audio(byte_rtc_engine_t engine, const char* channel, const char* user_name, int muted){
 72 |     ESP_LOGI(TAG, "remote user mute audio  %s:%s %d\n", channel, user_name, muted);
 73 | };
 74 | 
 75 | static void byte_rtc_on_user_mute_video(byte_rtc_engine_t engine, const char* channel, const char* user_name, int muted){
 76 |     ESP_LOGI(TAG, "remote user mute video  %s:%s %d\n", channel, user_name, muted);
 77 | };
 78 | 
 79 | static void byte_rtc_on_connection_lost(byte_rtc_engine_t engine, const char* channel){
 80 |     ESP_LOGI(TAG, "connection Lost  %s\n", channel);
 81 | };
 82 | 
 83 | static void byte_rtc_on_room_error(byte_rtc_engine_t engine, const char* channel, int code, const char* msg){
 84 |     ESP_LOGE(TAG, "error occur %s %d %s\n", channel, code, msg?msg:"");
 85 | };
 86 | 
 87 | // remote audio
 88 | static void byte_rtc_on_audio_data(byte_rtc_engine_t engine, const char* channel, const char*  uid , uint16_t sent_ts,
 89 |                       audio_data_type_e codec, const void* data_ptr, size_t data_len){
 90 |     // ESP_LOGI(TAG, "byte_rtc_on_audio_data... len %d\n", data_len);
 91 |     engine_context_t* context = (engine_context_t *) byte_rtc_get_user_data(engine);
 92 | #ifdef RTC_DEMO_AUDIO_PIPELINE_CODEC_OPUS
 93 |     static char opus_data_cache[1024]; 
 94 |     opus_data_cache[0] = (data_len >> 8) & 0xFF;
 95 |     opus_data_cache[1] = data_len & 0xFF;
 96 |     memcpy(opus_data_cache + 2, data_ptr, data_len);
 97 |     player_pipeline_write(context->player_pipeline, opus_data_cache, data_len + 2);
 98 | #endif
 99 |     player_pipeline_write(context->player_pipeline, data_ptr, data_len);
100 | }
101 | 
102 | // remote video
103 | static void byte_rtc_on_video_data(byte_rtc_engine_t engine, const char*  channel, const char* uid, uint16_t sent_ts,
104 |                       video_data_type_e codec, int is_key_frame,
105 |                       const void * data_ptr, size_t data_len){
106 |     ESP_LOGI(TAG, "byte_rtc_on_video_data... len %d\n", data_len);
107 | }
108 | 
109 | // remote message
110 | // 字幕消息 参考https://www.volcengine.com/docs/6348/1337284
111 | static void on_subtitle_message_received(byte_rtc_engine_t engine, const cJSON* root) {
112 |     /*
113 |         {
114 |             "data" : 
115 |             [
116 |                 {
117 |                     "definite" : false,
118 |                     "language" : "zh",
119 |                     "mode" : 1,
120 |                     "paragraph" : false,
121 |                     "sequence" : 0,
122 |                     "text" : "\\u4f60\\u597d",
123 |                     "userId" : "voiceChat_xxxxx"
124 |                 }
125 |             ],
126 |             "type" : "subtitle"
127 |         }
128 |     */
129 |     cJSON * type_obj = cJSON_GetObjectItem(root, "type");
130 |     if (type_obj != NULL && strcmp("subtitle", cJSON_GetStringValue(type_obj)) == 0) {
131 |         cJSON* data_obj_arr = cJSON_GetObjectItem(root, "data");
132 |         cJSON* obji = NULL;
133 |         cJSON_ArrayForEach(obji, data_obj_arr) {
134 |             cJSON* user_id_obj = cJSON_GetObjectItem(obji, "userId");
135 |             cJSON* text_obj = cJSON_GetObjectItem(obji, "text");
136 |             if (user_id_obj && text_obj) {
137 |                 ESP_LOGE(TAG, "subtitle:%s:%s", cJSON_GetStringValue(user_id_obj), cJSON_GetStringValue(text_obj));
138 |             }
139 |         }
140 |     }
141 | }
142 | 
143 | // function calling 消息 参考 https://www.volcengine.com/docs/6348/1359441
144 | static void on_function_calling_message_received(byte_rtc_engine_t engine, const cJSON* root, const char* json_str) {
145 |     /*
146 |         {
147 |             "subscriber_user_id" : "",
148 |             "tool_calls" : 
149 |             [
150 |                 {
151 |                     "function" : 
152 |                     {
153 |                         "arguments" : "{\\"location\\": \\"\\u5317\\u4eac\\u5e02\\"}",
154 |                         "name" : "get_current_weather"
155 |                     },
156 |                     "id" : "call_py400kek0e3pczrqdxgnb3lo",
157 |                     "type" : "function"
158 |                 }
159 |             ]
160 |         }
161 |     */
162 |     // 收到function calling 消息，需要根据具体情况要在服务端处理还是客户端处理
163 | 
164 |     engine_context_t* context = (engine_context_t *) byte_rtc_get_user_data(engine);
165 |     
166 |     // 服务端处理：
167 |     // voice_bot_function_calling(context->room_info, json_str);
168 | 
169 |     // 在客户端处理,通过byte_rtc_rts_send_message接口通知智能体
170 |     /*cJSON* tool_obj_arr = cJSON_GetObjectItem(root, "tool_calls");
171 |     cJSON* obji = NULL;
172 |     cJSON_ArrayForEach(obji, tool_obj_arr) {
173 |         cJSON* id_obj = cJSON_GetObjectItem(obji, "id");
174 |         cJSON* function_obj = cJSON_GetObjectItem(obji, "function");
175 |         if (id_obj && function_obj) {
176 |             cJSON* arguments_obj = cJSON_GetObjectItem(function_obj, "arguments");
177 |             cJSON* name_obj = cJSON_GetObjectItem(function_obj, "name");
178 |             cJSON* location_obj = cJSON_GetObjectItem(arguments_obj, "arguments");
179 |             const char* func_name = cJSON_GetStringValue(name_obj);
180 |             const char* loction = cJSON_GetStringValue(location_obj);
181 |             const char* func_id = cJSON_GetStringValue(id_obj);
182 | 
183 |             if (strcmp(func_name, "get_current_weather") == 0) {
184 |                 cJSON *fc_obj = cJSON_CreateObject();
185 |                 cJSON_AddStringToObject(fc_obj, "ToolCallID", func_id);
186 |                 cJSON_AddStringToObject(fc_obj, "Content", "今天白天风和日丽，天气晴朗，晚上阵风二级。");
187 |                 char *json_string = cJSON_Print(fc_obj);
188 |                 static char fc_message_buffer[256] = {'f', 'u', 'n', 'c'};
189 |                 int json_str_len = strlen(json_string);
190 |                 fc_message_buffer[4] = (json_str_len >> 24) & 0xff;
191 |                 fc_message_buffer[5] = (json_str_len >> 16) & 0xff;
192 |                 fc_message_buffer[6] = (json_str_len >> 8) & 0xff;
193 |                 fc_message_buffer[7] = (json_str_len >> 0) & 0xff;
194 |                 memcpy(fc_message_buffer + 8, json_string, json_str_len);
195 |                 ESP_LOGE(TAG, "send message: %s", json_string);
196 |                 cJSON_Delete(fc_obj);
197 | 
198 |                 byte_rtc_rts_send_message(engine, context->room_info->room_id, context->remote_uid, fc_message_buffer, json_str_len + 8, 1, RTS_MESSAGE_RELIABLE);
199 |             }
200 |         }
201 |     }*/
202 |    
203 | }
204 | 
205 | void on_message_received(byte_rtc_engine_t engine, const char*  room, const char* uid, const uint8_t* message, int size, bool binary) {
206 |     // 字幕消息，参考https://www.volcengine.com/docs/6348/1337284
207 |     // subv|length(4)|json str
208 |     //
209 |     // function calling 消息，参考https://www.volcengine.com/docs/6348/1359441
210 |     // tool|length(4)|json str
211 | 
212 |     static char message_buffer[4096];
213 |     if (size > 8) {
214 |         memcpy(message_buffer, message, size);
215 |         message_buffer[size] = 0;
216 |         message_buffer[size + 1] = 0;
217 |         cJSON *root = cJSON_Parse(message_buffer + 8);
218 |         if (root != NULL) {
219 |             if (message[0] == 's' && message[1] == 'u' && message[2] == 'b' && message[3] == 'v') {
220 |                 // 字幕消息
221 |                 on_subtitle_message_received(engine, root);
222 |             } else if (message[0] == 't' && message[1] == 'o' && message[2] == 'o' && message[3] == 'l') {
223 |                 // function calling 消息
224 |                 on_function_calling_message_received(engine, root, message_buffer + 8);
225 |             } else {
226 |                 ESP_LOGE(TAG, "unknown json message: %s", message_buffer + 8);
227 |             }
228 |             cJSON_Delete(root);
229 |         } else {
230 |             ESP_LOGE(TAG, "unknown message.");
231 |         }
232 |     } else {
233 |         ESP_LOGE(TAG, "unknown message.");
234 |     }
235 | }
236 | 
237 | static void on_key_frame_gen_req(byte_rtc_engine_t engine, const char*  channel, const char*  uid) {}
238 | // byte rtc lite callbacks end.
239 | 
240 | 
241 | static void byte_rtc_task(void *pvParameters) {
242 |     rtc_room_info_t* room_info = heap_caps_malloc(sizeof(rtc_room_info_t),  MALLOC_CAP_SPIRAM | MALLOC_CAP_8BIT);
243 |     // step 1: start ai agent & get room info
244 |     int start_ret = start_voice_bot(room_info);
245 |     if (start_ret != 200) {
246 |         ESP_LOGE(TAG, "Bot start Failed, ret = %d", start_ret);
247 |         return;
248 |     }
249 | 
250 |     // step 2: start audio capture & play
251 |     recorder_pipeline_handle_t pipeline = recorder_pipeline_open();
252 |     player_pipeline_handle_t player_pipeline = player_pipeline_open();
253 |     recorder_pipeline_run(pipeline);
254 |     player_pipeline_run(player_pipeline);
255 | 
256 |     // step 3: start byte rtc engine
257 |     byte_rtc_event_handler_t handler = {
258 |         .on_join_room_success       =   byte_rtc_on_join_room_success,
259 |         .on_room_error              =   byte_rtc_on_room_error,
260 |         .on_user_joined             =   byte_rtc_on_user_joined,
261 |         .on_user_offline            =   byte_rtc_on_user_offline,
262 |         .on_user_mute_audio         =   byte_rtc_on_user_mute_audio,
263 |         .on_user_mute_video         =   byte_rtc_on_user_mute_video,
264 |         .on_audio_data              =   byte_rtc_on_audio_data,
265 |         .on_video_data              =   byte_rtc_on_video_data,
266 |         .on_key_frame_gen_req       =   on_key_frame_gen_req,
267 |         .on_message_received        =   on_message_received,
268 |     };
269 | 
270 |     byte_rtc_engine_t engine = byte_rtc_create(room_info->app_id, &handler);
271 |     byte_rtc_set_log_level(engine, BYTE_RTC_LOG_LEVEL_ERROR);
272 |     byte_rtc_set_params(engine, "{\"debug\":{\"log_to_console\":1}}");
273 | #ifdef RTC_DEMO_AUDIO_PIPELINE_CODEC_PCM
274 |     byte_rtc_set_params(engine,"{\"audio\":{\"codec\":{\"internal\":{\"enable\":1}}}}");
275 | #endif
276 | 
277 |     byte_rtc_init(engine);
278 | #ifdef CONFIG_AUDIO_CODEC_TYPE_OPUS
279 |     byte_rtc_set_audio_codec(engine, AUDIO_CODEC_TYPE_OPUS);
280 | #elif defined(CONFIG_AUDIO_CODEC_TYPE_PCM) || defined(CONFIG_AUDIO_CODEC_TYPE_G711A)
281 |     byte_rtc_set_audio_codec(engine, AUDIO_CODEC_TYPE_G711A);
282 | #elif defined(CONFIG_AUDIO_CODEC_TYPE_G722)
283 |     byte_rtc_set_audio_codec(engine, AUDIO_CODEC_TYPE_G722);
284 | #elif defined(CONFIG_AUDIO_CODEC_TYPE_AAC)
285 |     byte_rtc_set_audio_codec(engine, AUDIO_CODEC_TYPE_AACLC);
286 | #endif
287 | 
288 |     // byte_rtc_set_video_codec(engine, VIDEO_CODEC_TYPE_H264); // 需要视频功能时设置
289 | 
290 |     engine_context_t engine_context = {
291 |         .player_pipeline = player_pipeline,
292 |         .room_info = room_info
293 |     };
294 |     byte_rtc_set_user_data(engine, &engine_context);
295 | 
296 |     // step 4: join room
297 |     byte_rtc_room_options_t options;
298 |     options.auto_subscribe_audio = 1; // 接收远端音频
299 |     options.auto_subscribe_video = 0; // 不接收远端视频
300 |     options.auto_publish_audio = 1;   // 发送音频
301 |     options.auto_publish_video = 0;   // 发送视频
302 |     byte_rtc_join_room(engine, room_info->room_id, room_info->uid, room_info->token, &options);
303 | 
304 |     const int DEFAULT_READ_SIZE = recorder_pipeline_get_default_read_size(pipeline);
305 |     uint8_t *audio_buffer = heap_caps_malloc(DEFAULT_READ_SIZE, MALLOC_CAP_SPIRAM | MALLOC_CAP_8BIT);
306 |     if (!audio_buffer) {
307 |         ESP_LOGE(TAG, "Failed to alloc audio buffer!");
308 |         return;
309 |     }
310 | 
311 |     // step 5: start sending audio data
312 |     while (true) {
313 |         int ret =  recorder_pipeline_read(pipeline, (char*) audio_buffer, DEFAULT_READ_SIZE);
314 |         if (ret == DEFAULT_READ_SIZE && joined) {
315 |             // push_audio data
316 | #ifdef RTC_DEMO_AUDIO_PIPELINE_CODEC_PCM
317 |             audio_frame_info_t audio_frame_info = {.data_type = AUDIO_DATA_TYPE_PCM};
318 | #elif defined(CONFIG_AUDIO_CODEC_TYPE_G711A)
319 |             audio_frame_info_t audio_frame_info = {.data_type = AUDIO_DATA_TYPE_PCMA};
320 | #elif defined(CONFIG_AUDIO_CODEC_TYPE_G722)
321 |             audio_frame_info_t audio_frame_info = {.data_type = AUDIO_DATA_TYPE_G722};
322 | #elif defined(CONFIG_AUDIO_CODEC_TYPE_AAC)
323 |             audio_frame_info_t audio_frame_info = {.data_type = AUDIO_DATA_TYPE_AAC};
324 | #elif defined(CONFIG_AUDIO_CODEC_TYPE_OPUS)
325 |             audio_frame_info_t audio_frame_info = {.data_type = AUDIO_DATA_TYPE_OPUS};
326 | #endif
327 |             byte_rtc_send_audio_data(engine, room_info->room_id, audio_buffer, DEFAULT_READ_SIZE, &audio_frame_info);
328 |         }
329 |     }
330 | 
331 |     // step 6: leave room and destroy engine
332 |     byte_rtc_leave_room(engine, room_info->room_id);
333 |     usleep(1000 * 1000);
334 |     byte_rtc_fini(engine);
335 |     usleep(1000 * 1000);
336 |     byte_rtc_destory(engine);
337 |     
338 |     // step 7: stop ai agent or it will not stop until 3 minutes
339 |     stop_voice_bot(room_info);
340 |     heap_caps_free(room_info);
341 | 
342 |     // step 8: stop audio capture & play
343 |     recorder_pipeline_close(pipeline);
344 |     player_pipeline_close(player_pipeline);
345 |     ESP_LOGI(TAG, "............. finished\n");
346 | }
347 | 
348 | void app_main(void)
349 | {
350 |     ESP_ERROR_CHECK(nvs_flash_init() );
351 |     ESP_ERROR_CHECK(esp_netif_init());
352 | 
353 |     esp_periph_config_t periph_cfg = DEFAULT_ESP_PERIPH_SET_CONFIG();
354 |     esp_periph_set_handle_t set = esp_periph_set_init(&periph_cfg);
355 | 
356 |     periph_wifi_cfg_t wifi_cfg = {
357 |         .wifi_config.sta.ssid = CONFIG_WIFI_SSID,
358 |         .wifi_config.sta.password = CONFIG_WIFI_PASSWORD,
359 |     };
360 |     esp_periph_handle_t wifi_handle = periph_wifi_init(&wifi_cfg);
361 |     esp_periph_start(set, wifi_handle);
362 |     periph_wifi_wait_for_connected(wifi_handle, portMAX_DELAY);
363 | 
364 |     audio_board_handle_t board_handle = audio_board_init();   
365 |     audio_hal_ctrl_codec(board_handle->audio_hal, AUDIO_HAL_CODEC_MODE_BOTH, AUDIO_HAL_CTRL_START);
366 |     audio_hal_set_volume(board_handle->audio_hal, 80);
367 |     ESP_LOGI(TAG, "Starting again!\n");
368 | 
369 |     // Allow other core to finish initialization
370 |     vTaskDelay(pdMS_TO_TICKS(100));
371 | 
372 |     // Create and start stats task
373 |     xTaskCreate(&byte_rtc_task, "byte_rtc_task", 8192, NULL, STATS_TASK_PRIO, NULL);
374 | }
375 | 


--------------------------------------------------------------------------------
/client/espressif/esp32s3_demo/partitions.csv:
--------------------------------------------------------------------------------
1 | # Name,   Type, SubType, Offset,  Size, Flags
2 | # Note: if you have increased the bootloader size, make sure to update the offsets to avoid overlap
3 | nvs,        data, nvs,      0x9000,  0x6000,
4 | phy_init,   data, phy,      0xf000,  0x1000,
5 | factory,    app,  factory,  0x10000, 5M,
6 | coredump, data, coredump,,       64K
7 | storage,  data, littlefs,      ,  2M,
8 | 


--------------------------------------------------------------------------------
/client/espressif/esp32s3_demo/sdkconfig.defaults:
--------------------------------------------------------------------------------
 1 | #
 2 | # Serial flasher config
 3 | #
 4 | CONFIG_ESPTOOLPY_FLASHMODE_QIO=y
 5 | CONFIG_ESPTOOLPY_FLASH_SAMPLE_MODE_STR=y
 6 | CONFIG_ESPTOOLPY_FLASHFREQ_80M=y
 7 | CONFIG_ESPTOOLPY_FLASHFREQ="80m"
 8 | CONFIG_ESPTOOLPY_HEADER_FLASHSIZE_UPDATE=y
 9 | CONFIG_ESPTOOLPY_FLASHSIZE_8MB=y
10 | # end of Serial flasher config
11 | 
12 | 
13 | #
14 | # Partition Table
15 | #
16 | # CONFIG_PARTITION_TABLE_SINGLE_APP is not set
17 | # CONFIG_PARTITION_TABLE_SINGLE_APP_LARGE is not set
18 | # CONFIG_PARTITION_TABLE_TWO_OTA is not set
19 | CONFIG_PARTITION_TABLE_CUSTOM=y
20 | CONFIG_PARTITION_TABLE_CUSTOM_FILENAME="partitions.csv"
21 | CONFIG_PARTITION_TABLE_FILENAME="partitions.csv"
22 | CONFIG_PARTITION_TABLE_MD5=y
23 | # end of Partition Table
24 | 
25 | #
26 | # Compiler options
27 | #
28 | CONFIG_COMPILER_OPTIMIZATION_PERF=y
29 | 
30 | #
31 | # ESP WDT CONFIG
32 | #
33 | # CONFIG_TASK_WDT_PANIC is not set
34 | CONFIG_TASK_WDT_TIMEOUT_S=10
35 | 
36 | #
37 | # ESP System Settings
38 | #
39 | CONFIG_ESP_MAIN_TASK_STACK_SIZE=4096
40 | 
41 | #
42 | # ESP-TLS
43 | #
44 | CONFIG_ESP_TLS_INSECURE=y
45 | CONFIG_ESP_TLS_SKIP_SERVER_CERT_VERIFY=y
46 | 
47 | #
48 | # FREERTOS
49 | #
50 | CONFIG_FREERTOS_ENABLE_BACKWARD_COMPATIBILITY=y
51 | CONFIG_FREERTOS_TIMER_TASK_STACK_DEPTH=4096
52 | CONFIG_FREERTOS_GENERATE_RUN_TIME_STATS=y
53 | CONFIG_FREERTOS_VTASKLIST_INCLUDE_COREID=y
54 | CONFIG_FREERTOS_HZ=1000
55 | 


--------------------------------------------------------------------------------
/client/espressif/esp32s3_demo/sdkconfig.defaults.esp32s3:
--------------------------------------------------------------------------------
  1 | CONFIG_IDF_CMAKE=y
  2 | CONFIG_IDF_TARGET_ARCH_XTENSA=y
  3 | CONFIG_IDF_TARGET="esp32s3"
  4 | CONFIG_IDF_TARGET_ESP32S3=y
  5 | CONFIG_IDF_FIRMWARE_CHIP_ID=0x0009
  6 | 
  7 | #
  8 | # Make experimental features visible
  9 | #
 10 | CONFIG_IDF_EXPERIMENTAL_FEATURES=y
 11 | 
 12 | #
 13 | # Serial flasher config
 14 | #
 15 | CONFIG_BOOTLOADER_FLASH_DC_AWARE=y
 16 | CONFIG_ESPTOOLPY_FLASHFREQ_120M=y
 17 | CONFIG_SPI_FLASH_HPM_ENA=y
 18 | # end of Serial flasher config
 19 | 
 20 | #
 21 | # Audio HAL
 22 | #
 23 | CONFIG_ESP32_S3_KORVO2_V3_BOARD=y
 24 | # end of Audio HAL
 25 | 
 26 | #
 27 | # Audio Recorder
 28 | #
 29 | CONFIG_AFE_MIC_NUM=2
 30 | # end of Audio Recorder
 31 | 
 32 | #
 33 | # ESP Speech Recognition
 34 | #
 35 | CONFIG_MODEL_IN_FLASH=y
 36 | CONFIG_AFE_INTERFACE_V1=y
 37 | CONFIG_SR_WN_WN9_HILEXIN=n
 38 | # end of ESP Speech Recognition
 39 | 
 40 | #
 41 | # Component config
 42 | #
 43 | 
 44 | #
 45 | # Driver configurations
 46 | #
 47 | 
 48 | #
 49 | # mbedTLS
 50 | #
 51 | # CONFIG_MBEDTLS_INTERNAL_MEM_ALLOC is not set
 52 | CONFIG_MBEDTLS_EXTERNAL_MEM_ALLOC=y
 53 | # CONFIG_MBEDTLS_HARDWARE_AES is not set
 54 | # CONFIG_MBEDTLS_HARDWARE_SHA is not set
 55 | # end of mbedTLS
 56 | 
 57 | 
 58 | #
 59 | # ESP32s3-PSRAM
 60 | #
 61 | CONFIG_SPIRAM_XIP_FROM_PSRAM=y
 62 | 
 63 | #
 64 | # ESP32S3-Specific
 65 | #
 66 | # CONFIG_ESP32S3_DEFAULT_CPU_FREQ_80 is not set
 67 | # CONFIG_ESP32S3_DEFAULT_CPU_FREQ_160 is not set
 68 | CONFIG_ESP32S3_DEFAULT_CPU_FREQ_240=y
 69 | CONFIG_ESP32S3_DEFAULT_CPU_FREQ_MHZ=240
 70 | 
 71 | #
 72 | # Cache config
 73 | #
 74 | # CONFIG_ESP32S3_INSTRUCTION_CACHE_16KB is not set
 75 | CONFIG_ESP32S3_INSTRUCTION_CACHE_32KB=y
 76 | CONFIG_ESP32S3_INSTRUCTION_CACHE_SIZE=0x8000
 77 | # CONFIG_ESP32S3_INSTRUCTION_CACHE_4WAYS is not set
 78 | CONFIG_ESP32S3_INSTRUCTION_CACHE_8WAYS=y
 79 | CONFIG_ESP32S3_ICACHE_ASSOCIATED_WAYS=8
 80 | CONFIG_ESP32S3_INSTRUCTION_CACHE_LINE_32B=y
 81 | CONFIG_ESP32S3_INSTRUCTION_CACHE_LINE_SIZE=32
 82 | # CONFIG_ESP32S3_INSTRUCTION_CACHE_WRAP is not set
 83 | # CONFIG_ESP32S3_DATA_CACHE_16KB is not set
 84 | # CONFIG_ESP32S3_DATA_CACHE_32KB is not set
 85 | CONFIG_ESP32S3_DATA_CACHE_64KB=y
 86 | CONFIG_ESP32S3_DATA_CACHE_SIZE=0x10000
 87 | # CONFIG_ESP32S3_DATA_CACHE_4WAYS is not set
 88 | CONFIG_ESP32S3_DATA_CACHE_8WAYS=y
 89 | CONFIG_ESP32S3_DCACHE_ASSOCIATED_WAYS=8
 90 | # CONFIG_ESP32S3_DATA_CACHE_LINE_32B is not set
 91 | CONFIG_ESP32S3_DATA_CACHE_LINE_64B=y
 92 | CONFIG_ESP32S3_DATA_CACHE_LINE_SIZE=64
 93 | # CONFIG_ESP32S3_DATA_CACHE_WRAP is not set
 94 | # end of Cache config
 95 | 
 96 | CONFIG_ESP32S3_SPIRAM_SUPPORT=y
 97 | 
 98 | #
 99 | # SPI RAM config
100 | #
101 | CONFIG_SPIRAM_MODE_OCT=y
102 | CONFIG_SPIRAM_TYPE_AUTO=y
103 | CONFIG_SPIRAM_SPEED_120M=y
104 | CONFIG_SPIRAM=y
105 | CONFIG_SPIRAM_BOOT_INIT=y
106 | CONFIG_SPIRAM_USE_MALLOC=y
107 | CONFIG_SPIRAM_MEMTEST=y
108 | CONFIG_SPIRAM_MALLOC_ALWAYSINTERNAL=4096
109 | CONFIG_SPIRAM_TRY_ALLOCATE_WIFI_LWIP=y
110 | CONFIG_SPIRAM_MALLOC_RESERVE_INTERNAL=32768
111 | # end of SPI RAM config
112 | # end of ESP32S3-Specific
113 | 


--------------------------------------------------------------------------------
/resource/image/tech_support.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/volcengine/rtc-aigc-embedded-demo/9076c6c76592bef14f33b019f9441875ffc5b69c/resource/image/tech_support.png


--------------------------------------------------------------------------------
/server/src/AccessToken.py:
--------------------------------------------------------------------------------
  1 | # Copyright (2025) Beijing Volcano Engine Technology Ltd.
  2 | # SPDX-License-Identifier: MIT
  3 | 
  4 | # token生成代码来自 https://www.volcengine.com/docs/6348/70121
  5 | # 其它语言可以通过上面的链接获取
  6 | import base64
  7 | import hmac
  8 | import random
  9 | import struct
 10 | import time
 11 | from hashlib import sha256
 12 | from collections import OrderedDict
 13 | 
 14 | VERSION = "001"
 15 | VERSION_LENGTH = 3
 16 | 
 17 | APP_ID_LENGTH = 24
 18 | 
 19 | PrivPublishStream = 0
 20 | 
 21 | # not exported, do not use directly
 22 | privPublishAudioStream = 1
 23 | privPublishVideoStream = 2
 24 | privPublishDataStream = 3
 25 | 
 26 | PrivSubscribeStream = 4
 27 | 
 28 | class AccessToken:
 29 |     # Initializes token struct by required parameters.
 30 |     def __init__(self, app_id, app_key, room_id, user_id):
 31 |         random.seed(time.time())
 32 |         self.app_id = app_id
 33 |         self.app_key = app_key
 34 |         self.room_id = room_id
 35 |         self.user_id = user_id
 36 |         self.issued_at = int(time.time())
 37 |         self.nonce = random.randint(1, 99999999)
 38 |         self.expire_at = 0
 39 |         self.privileges = {}
 40 | 
 41 |     # AddPrivilege adds permission for token with an expiration.
 42 |     def add_privilege(self, privilege, expire_ts):
 43 |         if self.privileges is None:
 44 |             self.privileges = {}
 45 | 
 46 |         self.privileges[privilege] = expire_ts
 47 |         if privilege == PrivPublishStream:
 48 |             self.privileges[privPublishVideoStream] = expire_ts
 49 |             self.privileges[privPublishAudioStream] = expire_ts
 50 |             self.privileges[privPublishDataStream] = expire_ts
 51 | 
 52 |     # ExpireTime sets token expire time, won't expire by default.
 53 |     # The token will be invalid after expireTime no matter what privilege's expireTime is.
 54 |     def expire_time(self, expire_ts):
 55 |         self.expire_at = expire_ts
 56 | 
 57 |     def pack_msg(self):
 58 |         m = pack_uint32(self.nonce)
 59 |         m += pack_uint32(self.issued_at)
 60 |         m += pack_uint32(self.expire_at)
 61 |         m += pack_string(self.room_id)
 62 |         m += pack_string(self.user_id)
 63 |         m += pack_map_uint32(self.privileges)
 64 |         return m
 65 | 
 66 |     # Serialize generates the token string
 67 |     def serialize(self):
 68 |         m = self.pack_msg()
 69 |         signature = hmac.new(self.app_key.encode('utf-8'), m, sha256).digest()
 70 |         content = pack_bytes(m) + pack_bytes(signature)
 71 | 
 72 |         return VERSION + self.app_id + base64.b64encode(content).decode('utf-8')
 73 | 
 74 |     # Verify checks if this token valid, called by server side.
 75 |     def verify(self, key):
 76 |         if 0 < self.expire_at < int(time.time()):
 77 |             return False
 78 | 
 79 |         self.app_key = key
 80 |         return hmac.new(self.app_key.encode('utf-8'), self.pack_msg(), sha256).digest() == self.signature
 81 | 
 82 | # Parse retrieves token information from raw string
 83 | def parse(raw):
 84 |     try:
 85 |         if len(raw) <= VERSION_LENGTH:
 86 |             return
 87 |         if raw[:VERSION_LENGTH] != VERSION:
 88 |             return
 89 | 
 90 |         token = AccessToken("", "", "", "")
 91 |         token.app_id = raw[VERSION_LENGTH:VERSION_LENGTH + APP_ID_LENGTH]
 92 | 
 93 |         content_buf = base64.b64decode(raw[VERSION_LENGTH + APP_ID_LENGTH:])
 94 |         readbuf = ReadByteBuffer(content_buf)
 95 | 
 96 |         msg = readbuf.unpack_bytes()
 97 |         token.signature = readbuf.unpack_bytes()
 98 | 
 99 |         msgbuf = ReadByteBuffer(msg)
100 |         token.nonce = msgbuf.unpack_uint32()
101 |         token.issued_at = msgbuf.unpack_uint32()
102 |         token.expire_at = msgbuf.unpack_uint32()
103 |         token.room_id = msgbuf.unpack_string()
104 |         token.user_id = msgbuf.unpack_string()
105 |         token.privileges = msgbuf.unpack_map_uint32()
106 |         return token
107 | 
108 |     except Exception as e:
109 |         print("parse error:", str(e))
110 |         return
111 | 
112 | 
113 | def pack_uint16(x):
114 |     return struct.pack('<H', int(x))
115 | 
116 | 
117 | def pack_uint32(x):
118 |     return struct.pack('<I', int(x))
119 | 
120 | 
121 | def pack_int32(x):
122 |     return struct.pack('<i', int(x))
123 | 
124 | 
125 | def pack_string(string):
126 |     return pack_bytes(string.encode('utf-8'))
127 | 
128 | 
129 | def pack_bytes(b):
130 |     return pack_uint16(len(b)) + b
131 | 
132 | 
133 | def pack_map_uint32(m):
134 |     m = OrderedDict(sorted(m.items(), key=lambda x: int(x[0])))
135 | 
136 |     ret = pack_uint16(len(m.items()))
137 | 
138 |     for k, v in m.items():
139 |         ret += pack_uint16(k) + pack_uint32(v)
140 |     return ret
141 | 
142 | 
143 | class ReadByteBuffer:
144 | 
145 |     def __init__(self, bytes):
146 |         self.buffer = bytes
147 |         self.position = 0
148 | 
149 |     def unpack_uint16(self):
150 |         len = struct.calcsize('H')
151 |         buff = self.buffer[self.position: self.position + len]
152 |         ret = struct.unpack('<H', buff)[0]
153 |         self.position += len
154 |         return ret
155 | 
156 |     def unpack_uint32(self):
157 |         len = struct.calcsize('I')
158 |         buff = self.buffer[self.position: self.position + len]
159 |         ret = struct.unpack('<I', buff)[0]
160 |         self.position += len
161 |         return ret
162 | 
163 |     def unpack_string(self):
164 |         return self.unpack_bytes().decode('utf-8')
165 | 
166 |     def unpack_bytes(self):
167 |         strlen = self.unpack_uint16()
168 |         buff = self.buffer[self.position: self.position + strlen]
169 |         ret = struct.unpack('<' + str(strlen) + 's', buff)[0]
170 |         self.position += strlen
171 |         return ret
172 | 
173 |     def unpack_map_uint32(self):
174 |         messages = {}
175 |         maplen = self.unpack_uint16()
176 | 
177 |         for index in range(maplen):
178 |             key = self.unpack_uint16()
179 |             value = self.unpack_uint32()
180 |             messages[key] = value
181 |         return messages
182 | 


--------------------------------------------------------------------------------
/server/src/README.md:
--------------------------------------------------------------------------------
  1 | # 服务端示例接口说明
  2 | 
  3 | 1. http 请求头说明
  4 | 
  5 |     ```shell
  6 |     # headers
  7 |     # Content-Type 固定值 application/json
  8 |     # Authorization af78e30 + ${CONFIG_RTC_APPID} 
  9 |     ```
 10 | 
 11 | 2. 启动智能体
 12 | - 请求示例
 13 |     ```shell
 14 |     curl --location 'http://127.0.0.1:8080/startvoicechat' \
 15 |     --header 'Content-Type: application/json' \
 16 |     --header 'Authorization: af78e3067******' \
 17 |     --data '{"end_point_id":"ep-2024*****", "audio_codec":"G711A"}'
 18 |     ```
 19 | 
 20 | - 请求体说明
 21 |     ```shell
 22 |     # 请求体是一个 json 字符串
 23 | 
 24 |     # audio_codec 
 25 |     # 非必填，字符串，默认值 G711A
 26 |     # 和智能体对话使用的音频传输格式，支持G711A，G722，OPUS，AAC。
 27 | 
 28 |     # end_point_id 
 29 |     # 非必填，字符串，默认值 ep-20250122160517-hlnzt
 30 |     # 非必填，对话的智能体的end point id
 31 | 
 32 |     # asr_type 
 33 |     # 非必填，整数，默认值 0
 34 |     # 语音识别类型
 35 |     # 0 小模型 ASR
 36 |     # 1 大模型 ASR 小时版-流式输入流式输出
 37 |     # 2 大模型 ASR 并发版-流式输入流式输出
 38 |     # 3 大模型 ASR 小时版-流式输入非流式输出
 39 |     # 4 大模型 ASR 并发版-流式输入非流式输出
 40 | 
 41 |     # interrupt_speech_duration
 42 |     # 非必填，整数，默认值 0
 43 |     # 自动打断触发阈值。房间内真人用户持续说话时间达到该参数设定值后，智能体自动停止输出。取值范围为0，[200，3000]，单位为 ms，值越大智能体说话越不容易被打断。默认值为 0，表示用户发出声音且包含真实语义时即打断智能体输出。
 44 | 
 45 |     # vad_silence_time
 46 |     # 非必填，整数，默认值 0
 47 |     # 人声检查判停时间。停顿时间若高于该值设定时间，则认为一句话结束。取值范围为 [500，3000)，单位为 ms，默认值为 600
 48 | 
 49 |     # tts_is_bidirection
 50 |     # 非必填，布尔值，默认值 false
 51 |     # 是否是双向流式语音合成
 52 | 
 53 |     # voice_type
 54 |     # 非必填，字符串，默认值 BV007_streaming
 55 |     # 语音合成的声音音色类型
 56 | 
 57 |     # llm_prefill
 58 |     # 非必填，布尔值，默认值 false
 59 |     # 将 ASR 中间结果提前送入大模型进行处理以降低延时。开启后会产生额外模型消耗。
 60 | 
 61 |     # disable_rts_subtitle
 62 |     # 非必填，布尔值，默认值 false
 63 |     # 禁用rts字幕回调
 64 | 
 65 |     # enable_conversation_state_callback
 66 |     # 非必填，布尔值，默认值 false
 67 |     # 是否接收任务状态变化回调
 68 | 
 69 |     # fc_tools
 70 |     # 非必填， json数组，默认值 []
 71 |     # function call 工具列表，格式参考 https://www.volcengine.com/docs/6348/1359441
 72 | 
 73 |     ```
 74 | - 返回示例及说明
 75 |     ```json
 76 |     // 成功返回示例
 77 |     {
 78 |         "code": 200,
 79 |         "msg": "",
 80 |         "data": {
 81 |             "room_id": "G711Ad2ae*****",
 82 |             "uid": "userd2ae*****",
 83 |             "app_id": "67*****",
 84 |             "token": "00167*****CzVoPW/3AhM8*****T4bQ==",
 85 |             "task_id": "d2ae*****",
 86 |             "bot_uid": "botd2ae*****"
 87 |         }
 88 |     }
 89 | 
 90 |     // 失败返回示例
 91 |     {
 92 |         "code": 400,
 93 |         "msg": "header Authorization error, Bad Authorization."
 94 |     }
 95 |     ```
 96 | 
 97 |     ```bash
 98 |     # code： 状态码 只有200为成功，其他为失败
 99 |     # msg： 其它状态码的错误提示信息
100 |     
101 |     # data： 房间信息
102 |     # room_id： rtc 房间 id
103 |     # uid： rtc 客户端用户 id
104 |     # app_id： rtc app id
105 |     # token： rtc 加入房间的鉴权 token
106 |     # task_id： 智能体任务 id
107 |     # bot_uid： 智能体用户 id
108 |     ```
109 | 3. 更新智能体体
110 | - 请求示例
111 |     ```shell
112 |     curl --location 'http://127.0.0.1:8080/updatevoicechat' \
113 |     --header 'Content-Type: application/json' \
114 |     --header 'Authorization: af78e3067*****' \
115 |     --data '{
116 |         "app_id": "af78e3067******",
117 |         "room_id": "G711Abf4106*****",
118 |         "task_id": "bf4106*****",
119 |         "command": "function",
120 |         "message": "{\"subscriber_user_id\":\"\",\"tool_calls\":[{\"function\":{\"arguments\":\"{\\\"location\\\": \\\"\\u5317\\u4eac\\u5e02\\\"}\",\"name\":\"get_current_weather\"},\"id\":\"call_py400kek0*****\",\"type\":\"function\"}]}"
121 |     }'
122 | 
123 |     ```
124 | - 请求体说明
125 |     ```shell
126 |     # app_id： rtc app id
127 | 
128 |     # room_id： rtc 房间 id
129 | 
130 |     # task_id： 智能体任务 id
131 | 
132 |     # command： 命令类型，目前支持 function, interrupt, external_text_to_speech, external_prompts_for_llm, external_text_to_llm, finish_speech_recognition
133 |     #           参考：https://www.volcengine.com/docs/6348/1404671  除了 function 命令外，其它命令的message会直接透传给open api
134 |     #           function: function calling 命令, message 需要传入客户端的 function calling 消息，服务端会做一个假的处理
135 |     #           interrupt: 打断命令，使用此命令不需要填 message
136 |     #           external_text_to_speech: 传入文本信息供 TTS 音频播放，使用此命令时必须提供 message
137 |     #           external_prompts_for_llm: 传入自定义文本结合用户问题送入 LLM，使用此命令时必须提供 message    
138 |     #           external_text_to_llm: 传入外部问题送入LLM，使用此命令时必须提供 message
139 |     #           finish_speech_recognition: 触发新一轮对话，使用此命令不需要 message
140 | 
141 |     # message： 命令消息，参考 command 的说明
142 | 
143 |     # interrupt_mode: 传入文本信息或外部问题时，处理的优先级。
144 |     #                 当 command 为 ExternalTextToSpeech 或 ExternalTextToLLM 时为该参数必填。
145 |     #                 1：高优先级。传入信息直接打断交互，进行处理。
146 |     #                 2：中优先级。等待当前交互结束后，进行处理。
147 |     #                 3：低优先级。如当前正在发生交互，直接丢弃 Message 传入的信息。
148 | 
149 |     ```
150 | - 返回示例及说明
151 |     ```json
152 |     // 成功返回示例
153 |     {
154 |         "code": 200,
155 |         "msg": "",
156 |         "data": {
157 |             "app_id": "66b****",
158 |             "room_id": "G711Abf4*****",
159 |             "task_id": "bf4*****",
160 |             "command": "function",
161 |             "message": "{\"subscriber_user_id\":\"\",\"tool_calls\":[{\"function\":{\"arguments\":\"{\\\"location\\\": \\\"\北\京\市\\\"}\",\"name\":\"get_current_weather\"},\"id\":\"call_py400ke*****\",\"type\":\"function\"}]}"
162 |         }
163 |     }
164 |     // 失败返回示例
165 |     {
166 |         "code": 400,
167 |         "msg": "update_voice_chat: your command == function, \"message\" must be in json"
168 |     }
169 |     ```
170 |     
171 | 4. 停止智能体
172 | - 请求示例
173 |     ```shell
174 |     curl --location 'http://127.0.0.1:8080/stopvoicechat' \
175 |     --header 'Content-Type: application/json' \
176 |     --header 'Authorization: af78e30675*****' \
177 |     --data '{
178 |         "app_id": "66b*****",
179 |         "room_id": "G711Abf4*****",
180 |         "task_id": "bf4*****"
181 |     }'
182 |     ```
183 | - 请求体说明
184 |     ```shell
185 |     # app_id： rtc app id
186 | 
187 |     # room_id： rtc 房间 id 
188 | 
189 |     # task_id： 智能体任务 id
190 |     ```
191 | - 返回示例及说明
192 |     ```json
193 |     // 成功返回示例
194 |     {
195 |         "code": 200,
196 |         "msg": "",
197 |         "data": {
198 |             "app_id": "66b*****",
199 |             "room_id": "G711Abf4*****",
200 |             "task_id": "bf4*****"
201 |         }
202 |     }
203 | 
204 |     // 失败返回示例
205 |     {
206 |         "code": 400,
207 |         "msg": "header Authorization error, Bad Authorization."
208 |     }
209 |     ```
210 | 


--------------------------------------------------------------------------------
/server/src/RtcAigcConfig.py:
--------------------------------------------------------------------------------
 1 | # Copyright (2025) Beijing Volcano Engine Technology Ltd.
 2 | # SPDX-License-Identifier: MIT
 3 | 
 4 | # 鉴权 AK/SK。前往 https://console.volcengine.com/iam/keymanage 获取
 5 | SK = "WmpCbVl6Y3hOR1JrT************1tTTRZalF4WW1FeE56WQ=="
 6 | AK = "AKLTNWQyODQ1MDM5Y***********WRmM2Y2NTJlMTQyZjI"
 7 | 
 8 | # 实时音视频 App ID。前往 https://console.volcengine.com/rtc/listRTC 获取或创建
 9 | RTC_APP_ID = "67582ac8******0174410bd1"
10 | # 实时音视频 APP KEY。前往 https://console.volcengine.com/rtc/listRTC 获取
11 | RTC_APP_KEY = "1a6a03723c******222ada877ee13b"
12 | 
13 | # 大模型推理接入点 EndPointId 前往 https://console.volcengine.com/ark/region:ark+cn-beijing/endpoint?config=%7B%7D 创建
14 | DEFAULT_END_POINT_ID = "ep-2025******160517-hlnzt"
15 | # 音频生成-语音合成 Voice_type，前往 https://console.volcengine.com/speech/service/8 获取
16 | DEFAULT_VOICE_TYPE = "BV007_******ming"
17 | 
18 | # 语音识别-流式语音识别 APPID 前往 https://console.volcengine.com/speech/service/16 获取
19 | ASR_APP_ID = "884***621"
20 | # 语音识别-流式语音识别 ACCESS TOKEN 前往 https://console.volcengine.com/speech/service/16 获取
21 | ASR_ACCESS_TOKEN = "M_X6X***BeXa1"
22 | 
23 | # 音频生成-语音合成 APPID，前往 https://console.volcengine.com/speech/service/8 获取
24 | TTS_APP_ID = "884***9621"
25 | # 音频生成-语音合成 ACCESS TOKEN，前往 https://console.volcengine.com/speech/service/8 获取
26 | TTS_ACCESS_TOKEN = "M_X6X***BeXa1"
27 | 
28 | # 服务端监听端口号,你可以根据实际业务需求设置端口号
29 | PORT = 8080
30 | 


--------------------------------------------------------------------------------
/server/src/RtcAigcService.py:
--------------------------------------------------------------------------------
  1 | # Copyright (2025) Beijing Volcano Engine Technology Ltd.
  2 | # SPDX-License-Identifier: MIT
  3 | 
  4 | import http.server
  5 | import socketserver
  6 | import json
  7 | import uuid
  8 | import time
  9 | 
 10 | import AccessToken
 11 | import RtcApiRequester
 12 | 
 13 | from RtcAigcConfig import *
 14 | 
 15 | RESPONSE_CODE_SUCCESS = 200
 16 | RESPONSE_CODE_REQUEST_ERROR = 400
 17 | RESPONSE_CODE_SERVER_ERROR = 500
 18 | # START_VOICE_CHAT_URL = "https://rtc.volcengineapi.com?Action=StartVoiceChat&Version=2024-12-01"
 19 | # STOP_VOICE_CHAT_URL = "https://rtc.volcengineapi.com?Action=StopVoiceChat&Version=2024-12-01"
 20 | # UPDATE_VOICE_CHAT_URL = "https://rtc.volcengineapi.com?Action=UpdateVoiceChat&Version=2024-12-01"
 21 | RTC_API_HOST = "rtc.volcengineapi.com"
 22 | RTC_API_START_VOICE_CHAT_ACTION = "StartVoiceChat"
 23 | RTC_API_STOP_VOICE_CHAT_ACTION = "StopVoiceChat"
 24 | RTC_API_UPDATE_VOICE_CHAT_ACTION = "UpdateVoiceChat"
 25 | RTC_API_VERSION = "2024-12-01"
 26 | 
 27 | def parse_json(json_str):
 28 |     try:
 29 |         json_obj = json.loads(json_str)
 30 |         return json_obj
 31 |     except json.JSONDecodeError as e:
 32 |         return None
 33 | 
 34 | class RtcAigcHTTPRequestHandler(http.server.BaseHTTPRequestHandler):
 35 |     '''
 36 |     StartVoiceChat
 37 |     curl --location 'http://127.0.0.1:8080/startvoicechat' \
 38 |     --header 'Content-Type: application/json' \
 39 |     --header 'Authorization: af78e30${RTC_APP_ID}' \
 40 |     --data '{
 41 |         "end_point_id": "ep-20240729172503-mmg9b",
 42 |         "voice_type": "zh_female_meilinvyou_moon_bigtts"
 43 |     }'
 44 | 
 45 | 
 46 |     StopVoiceChat
 47 |     curl --location 'http://127.0.0.1:8080/stopvoicechat' \
 48 |     --header 'Content-Type: application/json' \
 49 |     --header 'Authorization: af78e30${RTC_APP_ID}' \
 50 |     --data '{
 51 |         "app_id": "66bb6632f55d550120fb5c94",
 52 |         "room_id": "G711Abf410694b3a34a3aa980b6e85613200d",
 53 |         "task_id" : "bf410694b3a34a3aa980b6e85613200d"
 54 |     }'
 55 | 
 56 | 
 57 |     UpdateVoiceChat
 58 |     打断智能体说话
 59 |     curl --location 'http://127.0.0.1:8080/updatevoicechat' \
 60 |     --header 'Content-Type: application/json' \
 61 |     --header 'Authorization: af78e30${RTC_APP_ID}' \
 62 |     --data '{
 63 |         "app_id": "66bb6632f55d550120fb5c94",
 64 |         "room_id": "G711Abf410694b3a34a3aa980b6e85613200d",
 65 |         "task_id" : "bf410694b3a34a3aa980b6e85613200d",
 66 |         "command": "interrupt"
 67 |     }'
 68 | 
 69 |     处理 function calling
 70 |     curl --location 'http://127.0.0.1:8080/updatevoicechat' \
 71 |     --header 'Content-Type: application/json' \
 72 |     --header 'Authorization: hehehe' \
 73 |     --data '{
 74 |         "app_id": "66bb6632f55d550120fb5c94",
 75 |         "room_id": "bf410694b3a34a3aa980b6e85613200d",
 76 |         "task_id" : "bf410694b3a34a3aa980b6e85613200d",
 77 |         "command": "function",
 78 |         "message": "{\"ToolCallID\":\"call_cx\",\"Content\":\"上海天气是台风\"}"
 79 |     }'
 80 | 
 81 |     '''
 82 | 
 83 |     def do_POST(self):
 84 |         json_obj = self.parse_post_data()
 85 |         if json_obj == None:
 86 |             return
 87 |         
 88 |         if self.path == "/startvoicechat":
 89 |             self.start_voice_chat(json_obj)
 90 |         elif self.path == "/stopvoicechat":
 91 |             self.stop_voice_chat(json_obj)
 92 |         elif self.path == "/updatevoicechat":
 93 |             self.update_voice_chat(json_obj)
 94 |         else:
 95 |             self.response_data(404, "path error, unknown path: " + self.path)
 96 |             return
 97 | 
 98 | ###################################### start voice chat ######################################
 99 |     def start_voice_chat(self, json_obj):
100 |         room_info = self.generate_rtc_room_info(json_obj)
101 |         ret = self.request_start_voice_chat(room_info, json_obj)
102 |         if ret == None:
103 |             resp_obj = {
104 |                 "data" : room_info
105 |             }
106 |             self.response_data(RESPONSE_CODE_SUCCESS, "", resp_obj)
107 |         else:
108 |             self.response_data(RESPONSE_CODE_SERVER_ERROR, ret)
109 |     
110 |     def generate_rtc_room_info(self, json_obj):
111 |         # 音频编码格式
112 |         audio_codec = "G711A"
113 |         if "audio_codec" in json_obj:
114 |             audio_codec = json_obj["audio_codec"]
115 | 
116 |         if audio_codec not in {"OPUS", "G711A", "G722", "AAC"}:
117 |             audio_codec = "G711A"
118 |         
119 |         # 根据业务情况，生成 room_id，用户id 或者 从客户端请求中获取
120 |         # 这里简单生成一个随机的 room_id 和 user_id
121 |         uuid_str = uuid.uuid4().hex
122 |         room_id = audio_codec + uuid_str # 加入aigc策略组后，根据房间id前缀配置rtc音视频传输格式
123 |         user_id = "user" + uuid_str
124 |         bot_user_id = "bot" + uuid_str
125 |         expire_time = int(time.time()) + 3600 * 48 # rtc token 48h
126 |         token = AccessToken.AccessToken(RTC_APP_ID, RTC_APP_KEY, room_id, user_id)
127 |         token.add_privilege(AccessToken.PrivSubscribeStream, expire_time)
128 |         token.add_privilege(AccessToken.PrivPublishStream, expire_time)
129 |         token.expire_time(expire_time)
130 | 
131 |         token_str = token.serialize()
132 |         room_info = {
133 |             "room_id" : room_id,
134 |             "uid" : user_id,
135 |             "app_id" : RTC_APP_ID,
136 |             "token" : token_str,
137 |             "task_id" : uuid_str,
138 |             "bot_uid" : bot_user_id
139 |         }
140 |         print(room_info)
141 |         return room_info
142 |     
143 |     def request_start_voice_chat(self, room_info, json_obj):
144 |         # request_body 内容含义请参考 https://www.volcengine.com/docs/6348/1404673
145 |         # 小模型 ASR，速度相对大模型 ASR 更快一些，识别精度低于大模型 ASR
146 |         
147 |         # 读取客户端传来的 end_point_id
148 |         if "end_point_id" in json_obj:
149 |             end_point_id = json_obj["end_point_id"]
150 |         else:
151 |             end_point_id = DEFAULT_END_POINT_ID
152 |         
153 |         volcano_asr_config_provider_params = {
154 |             "Mode" : "smallmodel",                                       # 模型类型。取值固定为 smallmodel
155 |             "AppId" : ASR_APP_ID,                                        # ASR App ID
156 |             "Cluster" : "volcengine_streaming_common"                    # 非必填，具体流式语音识别服务对应的 Cluster ID，可在流式语音服务控制台开通对应服务后查询。默认为通用-中文的 Cluster ID：volcengine_streaming_common
157 |         }
158 | 
159 |         # 读取客户端传来的 asr_type，根据 type 设置 asr_provider_params
160 |         # 大模型ASR，速度相对小模型 ASR 慢一些，识别精度高于小模型 ASR
161 |         volcano_lm_asr_config_provider_params = {
162 |             "Mode" : "bigmodel",                                         # 模型类型。取值固定为 bigmodel
163 |             "AppId" : ASR_APP_ID,                                        # ASR App ID
164 |             "AccessToken" : ASR_ACCESS_TOKEN,                            # ASR Access Token
165 |             "ApiResourceId" : "volc.bigasr.sauc.duration",               # 流式语音识别大模型开通的服务类型：volc.bigasr.sauc.duration：小时版；volc.bigasr.sauc.concurrent：并发版。默认小时版
166 |             "StreamMode" : 0                                             # 语音识别输出模式: 0：流式输入流式输出; 1：流式输入非流式输出。默认 0
167 |             # "context" : "{\"hotwords\": [{\"word\": \"CO2\"},{\"word\": \"雨伞\"},{\"word\": \"鱼\"}]}" # 设置热词用于提高识别精度。最多设置200 tokens
168 |         }
169 |         asr_provider_params = volcano_asr_config_provider_params
170 |         if "asr_type" in json_obj:
171 |             if json_obj["asr_type"] == 0:
172 |                 # 0 小模型 ASR
173 |                 asr_provider_params = volcano_asr_config_provider_params
174 |             elif json_obj["asr_type"] == 1:
175 |                 # 1 大模型 ASR 小时版-流式输入流式输出
176 |                 asr_provider_params = volcano_lm_asr_config_provider_params
177 |             elif json_obj["asr_type"] == 2:
178 |                 # 2 大模型 ASR 并发版-流式输入流式输出
179 |                 asr_provider_params = volcano_lm_asr_config_provider_params
180 |                 asr_provider_params["ApiResourceId"] = "volc.bigasr.sauc.concurrent"
181 |             elif json_obj["asr_type"] == 3:
182 |                 # 3 大模型 ASR 小时版-流式输入非流式输出
183 |                 asr_provider_params = volcano_lm_asr_config_provider_params
184 |                 asr_provider_params["StreamMode"] = 1
185 |             elif json_obj["asr_type"] == 4:
186 |                 # 4 大模型 ASR 并发版-流式输入非流式输出
187 |                 asr_provider_params = volcano_lm_asr_config_provider_params
188 |                 asr_provider_params["ApiResourceId"] = "volc.bigasr.sauc.concurrent"
189 |                 asr_provider_params["StreamMode"] = 1
190 |         
191 |         # 读取客户端传来的 interrupt_speech_duration
192 |         interrupt_speech_duration = 0
193 |         if "interrupt_speech_duration" in json_obj:
194 |             interrupt_speech_duration_client = int(json_obj["interrupt_speech_duration"])
195 |             if interrupt_speech_duration_client >= 200 and interrupt_speech_duration_client <= 3000:
196 |                 interrupt_speech_duration = interrupt_speech_duration_client
197 |         
198 | 
199 |         
200 |         # 读取客户端传来的 vad_silence_time
201 |         vad_silence_time = 600
202 |         if "vad_silence_time" in json_obj:
203 |             vad_silence_time = int(json_obj["vad_silence_time"])
204 |             if vad_silence_time < 500:
205 |                 vad_silence_time = 500
206 |             elif vad_silence_time >= 3000:
207 |                 vad_silence_time = 2999
208 |         
209 |         # 读取客户端传来的 tts_is_bidirection 和 voice_type，设置 tts_provider_params
210 |         tts_provider = "volcano"
211 |         volcano_tts_config = {
212 |             "app" : {
213 |                 "appid" : TTS_APP_ID,                                    # 语音合成服务的app id
214 |                 "cluster" : "volcano_tts"                                # 具体语音合成服务对应的 Cluster ID
215 |             },
216 |             "audio" : {
217 |                 "voice_type" : DEFAULT_VOICE_TYPE,                       # 音色 id
218 |                 "speed_ratio" : 1.0,                                     # 语速。
219 |                 "volume_ratio" : 1.0,                                    # 音量。
220 |                 "pitch_ratio" : 1.0                                      # 声调
221 |             }
222 |         }
223 | 
224 |         volcano_bi_tts_config = {
225 |             "app" : {
226 |                 "appid" : TTS_APP_ID,                                    # 语音合成服务的app id
227 |                 "token" : TTS_ACCESS_TOKEN                               # 语音合成服务的token
228 |             },
229 |             "audio" : {
230 |                 "voice_type" : DEFAULT_VOICE_TYPE,                         # 音色 id
231 |                 "pitch_rate" : 0,                                        # 音调 取值范围为 [-12,12]。默认值为 0
232 |                 "speech_rate" : 0                                        # 语速。取值范围为[-50,100]，100代表2.0倍速，-50代表0.5倍速。默认值为 0
233 |             },
234 |             "Additions" : {
235 |                 "enable_latex_tn" : True,                                # 是否可以播报 latex公式
236 |                 "disable_markdown_filter" : True,                        # 是否关闭 markdown 格式过滤。
237 |                 "enable_language_detector" : False                       # 是否自动识别语种。
238 |             },
239 |             "ResourceId": "volc.service_type.10029"
240 |         }
241 |         tts_provider_params = volcano_tts_config
242 |         if "tts_is_bidirection" in json_obj:
243 |             if json_obj["tts_is_bidirection"] == True:
244 |                 tts_provider = "volcano_bidirection"
245 |                 tts_provider_params = volcano_bi_tts_config
246 |             else:
247 |                 tts_provider = "volcano"
248 |                 tts_provider_params = volcano_tts_config
249 |         
250 |         if "voice_type" in json_obj:
251 |             voice_type = str(json_obj["voice_type"])
252 |         else:
253 |             voice_type = DEFAULT_VOICE_TYPE
254 |         tts_provider_params["audio"]["voice_type"] = voice_type
255 |         
256 |         # 读取客户端传来的 llm_prefill
257 |         llm_prefill = False
258 |         if "llm_prefill" in json_obj and json_obj["llm_prefill"] == True:
259 |             llm_prefill = True
260 |         
261 |         # 读取客户端传来的 disable_rts_subtitle
262 |         disable_rts_subtitle = False
263 |         if "disable_rts_subtitle" in json_obj and json_obj["disable_rts_subtitle"] == True:
264 |             disable_rts_subtitle = True
265 |         
266 |         # 读取客户端传来的 enable_conversation_state_callback
267 |         enable_conversation_state_callback = False
268 |         if "enable_conversation_state_callback" in json_obj and json_obj["enable_conversation_state_callback"] == True:
269 |             enable_conversation_state_callback = True
270 |         
271 |         fc_tools = None
272 |         if "fc_tools" in json_obj:
273 |             fc_tools = json_obj["fc_tools"]
274 | 
275 |         request_body = {
276 |             "AppId" : room_info["app_id"],                                      # RTC App id
277 |             "RoomId" : room_info["room_id"],                                    # RTC 房间 id
278 |             "TaskId" : room_info["task_id"],                                    # 智能体任务id，你必须对每个智能体任务设定 TaskId，且在后续进行任务更新和结束时也须使用该 TaskId。
279 |             "Config" : {
280 |                 "ASRConfig" : {
281 |                     "Provider" : "volcano",                                     # 语音识别服务提供商。volcano：火山引擎语音识别。
282 |                     "ProviderParams" : asr_provider_params,                     # 参考 VolcanoASRConfig 和 VolcanoLMASRConfig
283 |                     "VADConfig" : {
284 |                         "SilenceTime" : vad_silence_time                        # 人声检查判停时间。停顿时间若高于该值设定时间，则认为一句话结束。取值范围为 [500，3000)，单位为 ms，默认值为 600
285 |                     },
286 |                     "VolumeGain" : 0.3,                                         # 音量增益值。降低采集音量，以减少噪音引起的 ASR 错误识别。默认值 1.0，推荐值 0.3
287 |                     "InterruptConfig" : {
288 |                         "InterruptSpeechDuration" : interrupt_speech_duration,  # 自动打断触发阈值。房间内真人用户持续说话时间达到该参数设定值后，智能体自动停止输出。取值范围为0，[200，3000]，单位为 ms，值越大智能体说话越不容易被打断。默认值为 0，表示用户发出声音且包含真实语义时即打断智能体输出。
289 |                     },
290 |                     "TurnDetectionMode" : 0                                     # 新一轮对话的触发方式。0：服务端检测到完整的一句话后，自动触发新一轮对话。1：收到输入结束信令或说话字幕结果后，你自行决定是否触发新一轮会话。
291 |                 },
292 |                 "TTSConfig" : {
293 |                     "IgnoreBracketText" : [1, 2, 3, 4, 5],                      # 非必填， 过滤大模型生成的文本中符号 1:"（）" 2:"()", 3:"【】", 4:"[]", 5:"{}".默认不过滤
294 |                     "Provider" : tts_provider,                                  # TTS 服务供应商
295 |                     "ProviderParams" : tts_provider_params
296 |                 },
297 |                 "LLMConfig" : {
298 |                     "Mode" : "ArkV3",                                           # 大模型名称，该参数固定取值： ArkV3
299 |                     "EndPointId" : end_point_id,                                # 推理接入点。使用方舟大模型时必填。
300 |                     "MaxTokens" : 1024,                                         # 非必填，输出文本的最大token数，默认 1024
301 |                     "Temperature" : 0.1,                                        # 非必填，用于控制生成文本的随机性和创造性，值越大随机性越高。取值范围为（0,1]，默认值为 0.1
302 |                     "TopP" : 0.3,                                               # 非必填，用于控制输出tokens的多样性，值越大输出的tokens类型越丰富。取值范围为（0,1]，默认值为 0.3
303 |                     "SystemMessages" : [                                        # 非必填，大模型 System 角色预设指令，可用于控制模型输出。
304 |                         "你是一个语言大模型，你只能接收文本数据。用户的语音通过语音识别服务转换成文本，发送给你。有时候语音识别服务会有错误，你可以根据具体语境判断用户的真实意图，遇到实在理解不了的错误语句，可以引导用户换种方式表达。同样的，你输出的文本会通过语音合成服务转换成音频，然后发送给用户。有多种方式可以打断合成的音频，如果你发现用户不记得你前面一句说的是什么，请不要疑惑。在和用户对话时，请牢记：你的名字是小宁，性格幽默又善解人意。你在表达时需简明扼要，有自己的观点。"
305 |                     ],
306 |                     "UserMessages" : [                                          # 非必填，大模型 User 角色预设 Prompt，可用于增强模型的回复质量，模型回复时会参考此处内容。
307 |                         "user:\"你是谁\"",
308 |                         "assistant:\"我是问答助手\"",
309 |                         "user:\"你能干什么\"",
310 |                         "user:\"我能回答问题\""
311 |                     ],
312 |                     "Prefill" : llm_prefill,                                    # 非必填, 将 ASR 中间结果提前送入大模型进行处理以降低延时。开启后会产生额外模型消耗。默认值 false
313 |                     "HistoryLength" : 3,                                        # 非必填，大模型上下文长度，默认 3。
314 |                     # "Tools" : [...]                                           # 非必填，使用 Function calling 功能时，模型可以调用的工具列表 参考：https://www.volcengine.com/docs/6348/1359441
315 |                     # "VisionConfig" : {}                                       # 视觉理解能力配置。仅在推理点选择模型为 doubao-vision-pro 和 doubao-vision-lite 时生效。该功能使用说明参看 https://www.volcengine.com/docs/6348/1408245
316 |                 },
317 |                 "SubtitleConfig" : {
318 |                     "DisableRTSSubtitle" : disable_rts_subtitle,                # 非必填，是否关闭房间内字幕回调，默认 false
319 |                     # "ServerMessageUrl" : "Your url",                          # 非必填，用于服务端接收字幕回调
320 |                     # "ServerMessageSignature" : "",                            # 用于你的服务端字幕回调鉴权
321 |                     "SubtitleMode" : 0                                          # 字幕回调时是否需要对齐音频时间戳。0 对齐，1 不对齐。默认 0
322 |                 },
323 |                 "InterruptMode" : 0                                             # 非必填，智能体对话打断模式。 0: 智能体语音可以被用户语音打断 1: 不能被用户语音打断
324 |                 # "FunctionCallingConfig" : {                                   # 服务端接收 Function calling 函数工具调用的信息指令配置。
325 |                 #     "ServerMessageUrl" : "Your URL",                          # 服务端接收 Function calling 函数工具调用的信息指令的 URL 地址。功能使用详情参看 https://www.volcengine.com/docs/6348/1359441#callingconfig 
326 |                 #     "ServerMessageSignature" : ""                             # 鉴权签名。
327 |                 # }
328 |             },
329 |             "AgentConfig" : {
330 |                 "TargetUserId" : [room_info["uid"]],                            # 房间内客户端 SDK 用户对应的 UserId。仅支持传入一个 UserId。注意该值是一个数组
331 |                 "WelcomeMessage" : "你好,有什么可以帮到你的吗",                   # 智能体启动后的欢迎词。
332 |                 "UserId" : room_info["bot_uid"],                                # 智能体的user id
333 |                 "EnableConversationStateCallback" : enable_conversation_state_callback # 是否接收任务状态变化回调。默认值为 false
334 |             }
335 |         }
336 |         if fc_tools != None and len(fc_tools) > 0 :
337 |             request_body["Config"]["LLMConfig"]["Tools"] = fc_tools
338 | 
339 |         request_body_str = json.dumps(request_body)
340 |         canonical_query_string = "Action=%s&Version=%s" % (RTC_API_START_VOICE_CHAT_ACTION, RTC_API_VERSION)
341 |         code, response = RtcApiRequester.request_rtc_api(RTC_API_HOST, "POST", "/", canonical_query_string, None, request_body_str, AK, SK)
342 |         print("request_rtc_api start code:", code)
343 |         print("request_rtc_api start response:", response)
344 |         if code == RESPONSE_CODE_SUCCESS:
345 |             if "Result" in response and response["Result"] == "ok":
346 |                 return None
347 |             else:
348 |                 return response["ResponseMetadata"]["Error"]["Message"]
349 |         else:
350 |             if response != None:
351 |                 return response["ResponseMetadata"]["Error"]["Message"]
352 |             else:
353 |                 return "request rtc api response code " + str(code)
354 |         return None
355 | 
356 | ###################################### stop voice chat #######################################
357 |     def stop_voice_chat(self, json_obj):
358 |         # 参考 https://www.volcengine.com/docs/6348/1404672
359 |         if "room_id" not in json_obj or "task_id" not in json_obj or "app_id" not in json_obj:
360 |             self.response_data(RESPONSE_CODE_REQUEST_ERROR, "stop_voice_chat: \"room_id\", \"task_id\", \"app_id\" must be in json")
361 |             return
362 |         
363 |         ret = self.request_stop_voice_chat(json_obj)
364 |         if ret == None:
365 |             resp_obj = {
366 |                 "data" : json_obj
367 |             }
368 |             self.response_data(RESPONSE_CODE_SUCCESS, "", resp_obj)
369 |         else:
370 |             self.response_data(RESPONSE_CODE_SERVER_ERROR, ret)
371 |     
372 |     def request_stop_voice_chat(self, json_obj):
373 |         # 参考 https://www.volcengine.com/docs/6348/1404672
374 |         request_body = {
375 |             "AppId" : json_obj["app_id"],      # rtc app id
376 |             "RoomId" : json_obj["room_id"],    # rtc 房间 id
377 |             "TaskId" : json_obj["task_id"]     # rtc 客户端用户id
378 |         }
379 | 
380 |         request_body_str = json.dumps(request_body)
381 |         canonical_query_string = "Action=%s&Version=%s" % (RTC_API_STOP_VOICE_CHAT_ACTION, RTC_API_VERSION)
382 |         code, response = RtcApiRequester.request_rtc_api(RTC_API_HOST, "POST", "/", canonical_query_string, None, request_body_str, AK, SK)
383 |         print("request_rtc_api stop code:", code)
384 |         print("request_rtc_api stop response:", response)
385 |         if code == RESPONSE_CODE_SUCCESS:
386 |             if "Result" in response and response["Result"] == "ok":
387 |                 return None
388 |             else:
389 |                 return response["ResponseMetadata"]["Error"]["Message"]
390 |         else:
391 |             if response != None:
392 |                 return response["ResponseMetadata"]["Error"]["Message"]
393 |             else:
394 |                 return "request rtc api response code " + str(code)
395 |         return None
396 | 
397 | ###################################### update voice chat #####################################
398 |     def update_voice_chat(self, json_obj):
399 |         # 更新智能体详细信息请参考 https://www.volcengine.com/docs/6348/1404671
400 |         if "room_id" not in json_obj or "task_id" not in json_obj or "app_id" not in json_obj or "command" not in json_obj:
401 |             self.response_data(RESPONSE_CODE_REQUEST_ERROR, "update_voice_chat: \"room_id\", \"task_id\", \"app_id\", \"command\" must be in json")
402 |             return
403 |         update_commands = {"interrupt", "function", "external_text_to_speech", "external_prompts_for_llm", "external_text_to_llm", "finish_speech_recognition"}
404 |         if json_obj["command"] not in update_commands:
405 |             self.response_data(RESPONSE_CODE_REQUEST_ERROR, "update_voice_chat: your command == " + json_obj["command"] + ", command must be in " + str(update_commands))
406 |             return
407 |         required_message_commands = {"function", "external_text_to_speech", "external_prompts_for_llm", "external_text_to_llm"}
408 |         if json_obj["command"] in required_message_commands and "message" not in json_obj:
409 |             self.response_data(RESPONSE_CODE_REQUEST_ERROR, "update_voice_chat: your command == " + json_obj["command"] + ", \"message\" must be in json")
410 |             return
411 |         
412 |         required_interrupt_mode_commands = {"external_text_to_speech", "external_text_to_llm"}
413 |         if json_obj["command"] in required_interrupt_mode_commands and "interrupt_mode" not in json_obj:
414 |             self.response_data(RESPONSE_CODE_REQUEST_ERROR, "update_voice_chat: your command == " + json_obj["command"] + ", \"interrupt_mode\" must be in json, interrupt_mode == 1, 2, or 3")
415 |             return
416 |         if "interrupt_mode" in json_obj:
417 |             if json_obj["interrupt_mode"] not in {1, 2, 3}:
418 |                 self.response_data(RESPONSE_CODE_REQUEST_ERROR, "update_voice_chat: your command == " + json_obj["command"] + ", \"interrupt_mode\" must be in json, interrupt_mode == 1, 2, or 3")
419 |                 return
420 |         ret = self.request_update_voice_chat(json_obj)
421 |         if ret == None:
422 |             resp_obj = {
423 |                 "data" : json_obj
424 |             }
425 |             self.response_data(RESPONSE_CODE_SUCCESS, "", resp_obj)
426 |         else:
427 |             self.response_data(RESPONSE_CODE_SERVER_ERROR, ret)
428 |     
429 |     def request_update_voice_chat(self, json_obj):
430 |         # 参考 https://www.volcengine.com/docs/6348/1404671
431 |         update_commands_map = {
432 |             "interrupt" : "Interrupt",
433 |             "function" : "Function",
434 |             "external_text_to_speech" : "ExternalTextToSpeech",
435 |             "external_prompts_for_llm" : "ExternalPromptsForLLM",
436 |             "external_text_to_llm" : "ExternalTextToLLM",
437 |             "finish_speech_recognition" : "FinishSpeechRecognition"
438 |         }
439 |         parsed_command = update_commands_map[json_obj["command"]]
440 |         request_body = {
441 |             "AppId" : json_obj["app_id"],      # rtc app id
442 |             "RoomId" : json_obj["room_id"],    # rtc 房间 id
443 |             "TaskId" : json_obj["task_id"],    # 创建智能体时用的TaskId
444 |             "Command" : parsed_command,        # 更新指令 interrupt： 打断智能体说话；function：传回工具调用信息指令。
445 |             # "Message" : "..."                # 工具调用信息指令，格式为 Json 转译字符串。Command 取值为 function时，Message必填。
446 |             # "InterruptMode" : 1              # 打断模式。取值范围为 1, 2, 3. 当 command 为 ExternalTextToSpeech 或 ExternalTextToLLM 时为该参数必填。
447 |         }
448 |         if "interrupt_mode" in json_obj:
449 |             request_body["InterruptMode"] = json_obj["interrupt_mode"]
450 |         if json_obj["command"] == "function":
451 |             # function calling 数据， 参考 https://www.volcengine.com/docs/6348/1359441
452 |             # 客户端传来的message数据是一个json字符串，内容如下：
453 |             # {
454 |             #     "subscriber_user_id" : "",
455 |             #     "tool_calls" : 
456 |             #     [
457 |             #         {
458 |             #             "function" : 
459 |             #             {
460 |             #                 "arguments" : "{\\"location\\": \\"\\u5317\\u4eac\\u5e02\\"}",
461 |             #                 "name" : "get_current_weather"
462 |             #             },
463 |             #             "id" : "call_py400kek0e3pczrqdxgnb3lo",
464 |             #             "type" : "function"
465 |             #         }
466 |             #     ]
467 |             # }
468 |             
469 |             print(json_obj["message"])
470 |             message_json_obj = parse_json(json_obj["message"])
471 |             if message_json_obj == None:
472 |                 self.response_data(RESPONSE_CODE_REQUEST_ERROR, "Post data is not a json string.")
473 |                 return
474 |             # 下面代码只是示例，要根据实际情况，解析函数名称和参数，做出真实的响应
475 |             if "tool_calls" not in message_json_obj or len(message_json_obj["tool_calls"]) <= 0 or "id" not in message_json_obj["tool_calls"][0]:
476 |                 self.response_data(RESPONSE_CODE_REQUEST_ERROR, "function calling message is error.")
477 |                 return
478 |             message_body = {
479 |                 "ToolCallID" : message_json_obj["tool_calls"][0]["id"],
480 |                 "Content" : "今天天气很好，阳光明媚，偶尔有微风。"
481 |             }
482 |             
483 |             request_body["Message"] = json.dumps(message_body)
484 |         elif "message" in json_obj:
485 |             request_body["Message"] = json_obj["message"]
486 |         
487 |         request_body_str = json.dumps(request_body)
488 |         canonical_query_string = "Action=%s&Version=%s" % (RTC_API_UPDATE_VOICE_CHAT_ACTION, RTC_API_VERSION)
489 |         code, response = RtcApiRequester.request_rtc_api(RTC_API_HOST, "POST", "/", canonical_query_string, None, request_body_str, AK, SK)
490 |         print("request_rtc_api update code:", code)
491 |         print("request_rtc_api update response:", response)
492 |         if code == RESPONSE_CODE_SUCCESS:
493 |             if "Result" in response and response["Result"] == "ok":
494 |                 return None
495 |             else:
496 |                 return response["ResponseMetadata"]["Error"]["Message"]
497 |         else:
498 |             if response != None:
499 |                 return response["ResponseMetadata"]["Error"]["Message"]
500 |             else:
501 |                 return "request rtc api response code " + str(code)
502 |         return None
503 | 
504 | 
505 | ##############################################################################################
506 |     def response_data(self, code, msg, extra_data = None):
507 |         self.send_response(code)
508 |         self.send_header('Content-type', 'application/json')
509 |         self.end_headers()
510 |         ret_data = {
511 |             "code": code,
512 |             "msg" : msg
513 |         }
514 | 
515 |         if extra_data != None:
516 |             for k, v in extra_data.items():
517 |                 ret_data[k] = v
518 |         self.wfile.write(json.dumps(ret_data).encode())
519 | 
520 | 
521 |     def parse_post_data(self):
522 |         # check headers
523 |         content_type = self.headers.get("Content-Type")
524 |         authorization = self.headers.get("Authorization")
525 |         if content_type != "application/json":
526 |             self.response_data(RESPONSE_CODE_REQUEST_ERROR, "header Content-Type error, must be application/json.")
527 |             return None
528 |         if authorization == None or authorization == "":
529 |             self.response_data(RESPONSE_CODE_REQUEST_ERROR, "header Authorization error, Authorization not be set.")
530 |             return None
531 |         if authorization != ("af78e30" +  RTC_APP_ID):
532 |             self.response_data(RESPONSE_CODE_REQUEST_ERROR, "header Authorization error, Bad Authorization.")
533 |             return None
534 |         
535 |         # check post_data is json
536 |         content_length = int(self.headers['Content-Length'])
537 |         post_data = self.rfile.read(content_length).decode('utf-8')
538 |         json_obj = None
539 |         try:
540 |             json_obj = json.loads(post_data)
541 |         except Exception as e:
542 |             self.response_data(RESPONSE_CODE_REQUEST_ERROR, "post data is not json string.")
543 |             return None
544 |         return json_obj
545 | 
546 | 
547 | 
548 | # 启动服务
549 | with socketserver.TCPServer(("", PORT), RtcAigcHTTPRequestHandler) as httpd:
550 |     print("serving at port", PORT)
551 |     httpd.serve_forever()
552 | 


--------------------------------------------------------------------------------
/server/src/RtcApiRequester.py:
--------------------------------------------------------------------------------
 1 | # Copyright (2025) Beijing Volcano Engine Technology Ltd.
 2 | # SPDX-License-Identifier: MIT
 3 | 
 4 | import datetime
 5 | import hashlib
 6 | import hmac
 7 | import requests
 8 | 
 9 | def hash_sha256(content):
10 |     return hashlib.sha256(content.encode("utf-8")).hexdigest()
11 | 
12 | def hmac_sha256(key, content):
13 |     return hmac.new(key, content.encode("utf-8"), hashlib.sha256).digest()
14 | 
15 | def request_rtc_api(http_host, http_request_method, canonical_uri, canonical_query_string, http_headers, http_body, AK, SK):
16 |     now = datetime.datetime.utcnow()
17 | 
18 |     # 步骤1：创建规范请求
19 |     x_content_sha256 = hash_sha256(http_body)
20 |     x_date = now.strftime("%Y%m%dT%H%M%SZ")
21 |     content_type = "application/json"
22 |     signed_headers_vec = (
23 |         ("content-type", content_type), 
24 |         ("host", http_host), 
25 |         ("x-content-sha256", x_content_sha256), 
26 |         ("x-date", x_date)
27 |     )
28 |     canonical_headers = "\n".join((":".join(x) for x in signed_headers_vec)) + "\n"
29 |     signed_headers = ";".join((x[0] for x in signed_headers_vec))
30 |     canonical_request = http_request_method + "\n" + canonical_uri + "\n" + canonical_query_string + "\n" + canonical_headers + "\n" + signed_headers + "\n" + x_content_sha256
31 |     
32 |     # 步骤2：创建待签字符串
33 |     credential_scope = x_date[0:8] + "/cn-north-1/rtc/request"
34 |     string_to_sign = "HMAC-SHA256" + "\n" + x_date + "\n" + credential_scope + "\n" + hash_sha256(canonical_request)
35 | 
36 |     # 步骤3：构建签名
37 |     hmac_contents = credential_scope.split("/")
38 |     hmac_contents.append(string_to_sign)
39 |     
40 |     signature = SK.encode("utf-8")
41 |     for hmac_content in hmac_contents:
42 |         signature = hmac_sha256(signature, hmac_content)
43 |     signature = signature.hex()
44 |     
45 |     # 步骤4：生成Authorization
46 |     authorization = "HMAC-SHA256 Credential=%s/%s, SignedHeaders=%s, Signature=%s" % (AK, credential_scope, signed_headers, signature)
47 | 
48 |     # 步骤5：发起http请求
49 |     if canonical_uri == "/":
50 |         canonical_uri = ""
51 |     url = 'https://' + http_host + canonical_uri + "?" + canonical_query_string
52 |     headers = {
53 |         "Content-Type" : content_type, 
54 |         "Host" : http_host, 
55 |         "X-Content-Sha256": x_content_sha256, 
56 |         "X-Date": x_date,
57 |         "Authorization" : authorization
58 |     }
59 | 
60 |     if http_headers != None:
61 |         headers.update(http_headers)
62 |     
63 |     if http_request_method == "POST":
64 |         response = requests.post(url, headers=headers, data=http_body)
65 |     else:
66 |         response = requests.get(url, headers=headers)
67 |     
68 |     return (response.status_code, response.json())
69 | 


--------------------------------------------------------------------------------