├── .gitignore ├── 0001-feat-disable-volc-esp-libs.patch ├── LICENSE.txt ├── README.md ├── client └── espressif │ └── esp32s3_demo │ ├── CMakeLists.txt │ ├── components │ ├── README.md │ └── VolcEngineRTCLite │ │ ├── CMakeLists.txt │ │ ├── include │ │ ├── PLACEHOLDER │ │ └── VolcEngineRTCLite.h │ │ └── libs │ │ └── esp32s3 │ │ ├── LICENSE │ │ ├── PLACEHOLDER │ │ └── libVolcEngineRTCLite.a │ ├── main │ ├── AudioPipeline.c │ ├── AudioPipeline.h │ ├── CMakeLists.txt │ ├── Kconfig.projbuild │ ├── RtcBotUtils.c │ ├── RtcBotUtils.h │ ├── RtcHttpUtils.c │ ├── RtcHttpUtils.h │ └── VolcRTCDemo.c │ ├── partitions.csv │ ├── sdkconfig │ ├── sdkconfig.defaults │ └── sdkconfig.defaults.esp32s3 ├── resource └── image │ └── tech_support.png └── server └── src ├── AccessToken.py ├── README.md ├── RtcAigcConfig.py ├── RtcAigcService.py └── RtcApiRequester.py /.gitignore: -------------------------------------------------------------------------------- 1 | __pycache__ -------------------------------------------------------------------------------- /0001-feat-disable-volc-esp-libs.patch: -------------------------------------------------------------------------------- 1 | From 6c66cfb0606ed9efe7fc72495a8293aec86e6bf5 Mon Sep 17 00:00:00 2001 2 | From: yuhuawei 3 | Date: Sat, 24 May 2025 22:53:54 +0800 4 | Subject: [PATCH] feat: disable volc esp libs 5 | 6 | --- 7 | components/clouds/CMakeLists.txt | 9 +++++---- 8 | 1 file changed, 5 insertions(+), 4 deletions(-) 9 | 10 | diff --git a/components/clouds/CMakeLists.txt b/components/clouds/CMakeLists.txt 11 | index ad734a1c..30efc462 100644 12 | --- a/components/clouds/CMakeLists.txt 13 | +++ b/components/clouds/CMakeLists.txt 14 | @@ -3,7 +3,8 @@ if (DEFINED ENV{DUER_PATH}) 15 | return() 16 | endif() 17 | 18 | -set(COMPONENT_ADD_INCLUDEDIRS ./dueros/lightduer/include ./volc_engine_rtc/include) 19 | +# set(COMPONENT_ADD_INCLUDEDIRS ./dueros/lightduer/include ./volc_engine_rtc/include) 20 | +set(COMPONENT_ADD_INCLUDEDIRS ./dueros/lightduer/include) 21 | 22 | # Edit following two lines to set component requirements (see docs) 23 | set(COMPONENT_SRCS) 24 | @@ -14,9 +15,9 @@ target_link_libraries(${COMPONENT_TARGET} INTERFACE "-L${CMAKE_CURRENT_LIST_DIR} 25 | 26 | IF ((CONFIG_IDF_TARGET STREQUAL "esp32s3") OR (CONFIG_IDF_TARGET STREQUAL "esp32") OR (CONFIG_IDF_TARGET STREQUAL "esp32c5")) 27 | IF (IDF_VERSION_MAJOR EQUAL 5) 28 | -add_prebuilt_library(VolcEngineRTCLite "${CMAKE_CURRENT_LIST_DIR}/volc_engine_rtc/libs/${CONFIG_IDF_TARGET}/libVolcEngineRTCLite.a" 29 | - REQUIRES mbedtls espressif__zlib json lwip) 30 | -target_link_libraries(${COMPONENT_LIB} INTERFACE VolcEngineRTCLite) 31 | +# add_prebuilt_library(VolcEngineRTCLite "${CMAKE_CURRENT_LIST_DIR}/volc_engine_rtc/libs/${CONFIG_IDF_TARGET}/libVolcEngineRTCLite.a" 32 | +# REQUIRES mbedtls espressif__zlib json lwip) 33 | +# target_link_libraries(${COMPONENT_LIB} INTERFACE VolcEngineRTCLite) 34 | ENDIF() 35 | ENDIF() 36 | 37 | -- 38 | 2.44.0 39 | 40 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (2025) Beijing Volcano Engine Technology Ltd. 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 |

2 |

IoT RTC AIGC Demo

3 | 欢迎使用IoT RTC AIGC Demo,本文档为您介绍如何使用本Demo。 4 | 5 | 6 | ## 快速入门 7 | 8 | ### 前置准备 9 | - Linux服务器,且开发环境满足Python 3.8及以上版本。 10 | - 乐鑫 ESP32-S3-Korvo-2 或 AtomS3R 开发板。 11 | - 参考如下流程开通硬件服务。 12 | - 开通火山引擎实时音视频、语音识别、音频合成、火山方舟大模型服务。参看[开通服务](https://www.volcengine.com/docs/6348/1315561)开通相关产品、配置角色策略并获取以下参数值: 13 | - 火山引擎 AK 14 | - 火山引擎 SK 15 | - 实时音视频应用 APPID 16 | - 实时音视频应用 APPKEY 17 | - 语音技术-语音识别-流式语音识别 APPID 18 | - 语音技术-音频生成-语音合成 APPID 19 | - 语音技术-音频生成-语音合成 Voice_type 20 | - 火山方舟大模型 EndPointId 21 | - 配置不同权限账号调用智能体, [创建角色](https://www.volcengine.com/docs/6348/1315561) 22 | - [启用硬件场景配置](https://console.volcengine.com/rtc/aigc/cloudRTC),并使用相应的房间规则 23 | 24 | ### 运行服务端 25 | 26 | > 服务端示例仅供开发者快速体验和演示,请勿在生产环境中使用。生产环境的服务端需要你自行开发。 27 | 28 | 29 | #### 硬件要求 30 | 31 | - PC服务器(Linux 建议使用 ubuntu18.04 及以上版本, 服务端示例程序在 Windows 11 python 3.12, MacOs python 3.9, Ubuntu 24.04 python 3.12实测可以正常运行) 32 | 33 | #### 安装服务依赖 34 | 35 | 36 | ```shell 37 | pip install requests 38 | ``` 39 | 40 | #### 下载并配置工程 41 | 42 | 1. 克隆实时对话式 AI 硬件 Demo 示例 43 | 44 | 45 | ```shell 46 | git clone https://github.com/volcengine/rtc-aigc-embedded-demo.git 47 | ``` 48 | 49 | 2. 进入服务端 Demo 目录 50 | 51 | 52 | ```shell 53 | cd rtc-aigc-embedded-demo/server/src 54 | ``` 55 | 56 | 3. 设置配置文件 57 | 58 | 进入服务端配置文件 `rtc-aigc-embedded-demo/server/src/RtcAigcConfig.py`,设置如下参数 59 | 60 | 61 | ```python 62 | # 鉴权 AK/SK。前往 https://console.volcengine.com/iam/keymanage 获取 63 | SK = "WmpCbVl6Y3hOR1JrT************1tTTRZalF4WW1FeE56WQ==" 64 | AK = "AKLTNWQyODQ1MDM5Y***********WRmM2Y2NTJlMTQyZjI" 65 | 66 | # 实时音视频 App ID。前往 https://console.volcengine.com/rtc/listRTC 获取或创建 67 | CONFIG_RTC_APPID = "67582ac8******0174410bd1" 68 | # 实时音视频 APP KEY。前往 https://console.volcengine.com/rtc/listRTC 获取 69 | RTC_APP_KEY = "1a6a03723c******222ada877ee13b" 70 | 71 | # 大模型推理接入点 EndPointId 前往 https://console.volcengine.com/ark/region:ark+cn-beijing/endpoint?config=%7B%7D 创建 72 | DEFAULT_END_POINT_ID = "ep-2025******160517-hlnzt" 73 | # 音频生成-语音合成 Voice_type,前往 https://console.volcengine.com/speech/service/8 获取 74 | DEFAULT_VOICE_TYPE = "BV007_******ming" 75 | 76 | # 语音识别-流式语音识别 APPID 前往 https://console.volcengine.com/speech/service/16 获取 77 | ASR_APP_ID = "884***621" 78 | # 语音识别-流式语音识别 ACCESS TOKEN 前往 https://console.volcengine.com/speech/service/16 获取 79 | ASR_ACCESS_TOKEN = "M_X6X***BeXa1" 80 | 81 | # 音频生成-语音合成 APPID,前往 https://console.volcengine.com/speech/service/8 获取 82 | TTS_APP_ID = "884***9621" 83 | # 音频生成-语音合成 ACCESS TOKEN,前往 https://console.volcengine.com/speech/service/8 获取 84 | TTS_ACCESS_TOKEN = "M_X6X***BeXa1" 85 | 86 | # 服务端监听端口号,你可以根据实际业务需求设置端口号 87 | PORT = 8080 88 | ``` 89 | 90 | #### 运行服务 91 | 92 | 在 `rtc-aigc-embedded-demo/server/src`目录下运行服务 93 | 94 | 95 | ```python 96 | python3 RtcAigcService.py 97 | ``` 98 | 99 | ### 运行设备端 100 | 101 | 本文以 Mac 操作系统为例。 102 | 103 | #### 硬件要求 104 | 105 | - 乐鑫 ESP32-S3-Korvo-2 开发板。 106 | 107 | - USB数据线(两条 A 转Micro-B 数据线,一条作为电源线,一条作为串口线)。 108 | 109 | - PC(Windows、Linux 或者 macOS)。 110 | 111 | #### 乐鑫环境配置 112 | 详见[开发环境配置文档](https://docs.espressif.com/projects/esp-idf/zh_CN/stable/esp32s3/get-started/index.html) 113 | 1. 安装 CMake 和 Ninja 编译工具 114 | 115 | ```shell 116 | brew install cmake ninja dfu-util 117 | ``` 118 | 119 | 2. 将 乐鑫 ADF 框架克隆到本地,并同步各子仓(submodule)代码 120 | > **注意:** demo 中使用的 ADF 版本为 [eca11f20e56f9b5321b714da4305e123672d92a9], 对应 IDF 版本为 [v5.4], 请确保 ADF 版本与 IDF 版本匹配。 121 | 1. clone 乐鑫ADF 框架 122 | 123 | ```shell 124 | git clone https://github.com/espressif/esp-adf.git // cloneADF框架 125 | ``` 126 | 2. 进入esp-adf目录 127 | 128 | ```shell 129 | cd esp-adf 130 | ``` 131 | 3. 切换到乐鑫ADF指定版本 132 | ```shell 133 | git reset --hard eca11f20e56f9b5321b714da4305e123672d92a9 134 | ``` 135 | 4. 同步各子仓代码 136 | 137 | ```shell 138 | git submodule update --init --recursive 139 | ``` 140 | 141 | 3. 安装乐鑫 esp32s3 开发环境相关依赖 142 | 143 | ```shell 144 | ./install.sh esp32s3 145 | ``` 146 | 147 | 成功安装所有依赖后,命令行会出现如下提示 148 | 149 | ```shell 150 | All done! You can now run: 151 | . ./export.sh 152 | ``` 153 | 154 | > 对于 macOS 用户,如在上述任何步骤中遇到以下错误: 155 | > 156 | > ` 158 | > 可前往访达->应用程序->Python 文件夹,点击`Install Certificates.command` 安装证书。了解更多信息,请参考 安装 ESP-IDF 工具时出现的下载错误。 159 | 160 | 161 | 4. 设置环境变量 162 | 163 | > **每次打开命令行窗口均需要运行该命令进行设置** 164 | 165 | ```shell 166 | . ./export.sh 167 | ``` 168 | 169 | #### 下载并配置工程 170 | 171 | 1. 将实时对话式 AI 硬件示例工程 clone 到 乐鑫 ADF examples 目录下 172 | 1. 进入 esp-adf/examples 目录 173 | 174 | ```shell 175 | cd $ADF_PATH/examples 176 | ``` 177 | 1. clone 实时对话式 AI 硬件示例工程 178 | 179 | ```shell 180 | git clone https://github.com/volcengine/rtc-aigc-embedded-demo.git 181 | ``` 182 | 183 | 2. 禁用乐鑫工程中的火山组件 184 | 1. 进入 esp-adf 目录 185 | 186 | ```shell 187 | cd $ADF_PATH 188 | ``` 189 | 2. 禁用乐鑫工程中的火山组件 190 | 191 | ```shell 192 | git apply $ADF_PATH/examples/rtc-aigc-embedded-demo/0001-feat-disable-volc-esp-libs.patch 193 | ``` 194 | 195 | #### 编译固件 196 | 197 | 1. 进入`esp-adf/examples/rtc-aigc-embedded-demo/client/espressif/esp32s3_demo` 目录下编译固件 198 | 1. 进入 esp32s3_demo 目录 199 | 200 | ```shell 201 | cd $ADF_PATH/examples/rtc-aigc-embedded-demo/client/espressif/esp32s3_demo 202 | ``` 203 | 2. 设置编译目标平台 204 | 205 | ```shell 206 | idf.py set-target esp32s3 207 | ``` 208 | 3. 设置WIFI账号密码、RTC APPID、服务端地址和端口号 209 | ```shell 210 | idf.py menuconfig 211 | ``` 212 | 进入 `Example Configuration` 菜单,在 `WiFi SSID` 及 `WiFi Password` 中填入你的 WIFI 账号和密码,在 `RTC APPID` 中填入你的 RTC APPID (前往 https://console.volcengine.com/rtc/listRTC 获取),在 `AIGENT Server Host` 中填入你的服务端地址和端口号,并保存。 213 | 214 | 4. 设置开发板型号 215 | ```shell 216 | idf.py menuconfig 217 | ``` 218 | 进入 `Audio HAL` 菜单,在 `Audio board` 中选择你的开发板型号。(例如: 方舟开发板选择 `M5STACK-ATOMS3R`),并保存。 219 | 220 | 5. 编译固件 221 | 222 | ```shell 223 | idf.py build 224 | ``` 225 | 226 | #### 烧录并运行示例 Demo 227 | 228 | 1. 打开乐鑫开发板电源开关 229 | 230 | 2. 烧录固件 231 | 232 | 233 | ```shell 234 | idf.py flash 235 | ``` 236 | 237 | 3. 运行示例 Demo 并查看串口日志输出 238 | 239 | 240 | ```shell 241 | idf.py monitor 242 | ``` 243 | ## 进阶阅读 244 | [服务端示例接口说明](server/src/README.md) 245 | 246 | ## 技术交流 247 | 欢迎加入我们的技术交流群或提出Issue,一起探讨技术,一起学习进步。 248 |
-------------------------------------------------------------------------------- /client/espressif/esp32s3_demo/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # For more information about build system see 2 | # https://docs.espressif.com/projects/esp-idf/en/latest/api-guides/build-system.html 3 | # The following five lines of boilerplate have to be in your project's 4 | # CMakeLists in this exact order for cmake to work correctly 5 | cmake_minimum_required(VERSION 3.5) 6 | 7 | # include($ENV{IDF_PATH}/tools/cmake/project.cmake) 8 | 9 | include($ENV{ADF_PATH}/CMakeLists.txt) 10 | include($ENV{IDF_PATH}/tools/cmake/project.cmake) 11 | project(VolcRTCDemo) 12 | 13 | # add_compile_definitions(LWIP_DEBUG=1) 14 | -------------------------------------------------------------------------------- /client/espressif/esp32s3_demo/components/README.md: -------------------------------------------------------------------------------- 1 | 此文件夹中需要放置VolcEngineRTCLite库 2 | -------------------------------------------------------------------------------- /client/espressif/esp32s3_demo/components/VolcEngineRTCLite/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | idf_component_register( 2 | INCLUDE_DIRS "include" 3 | ) 4 | 5 | add_prebuilt_library(VolcEngineRTCLite "${CMAKE_CURRENT_SOURCE_DIR}/libs/${CONFIG_IDF_TARGET}/libVolcEngineRTCLite.a" 6 | REQUIRES mbedtls espressif__zlib json lwip) 7 | target_link_libraries(${COMPONENT_LIB} INTERFACE VolcEngineRTCLite) 8 | -------------------------------------------------------------------------------- /client/espressif/esp32s3_demo/components/VolcEngineRTCLite/include/PLACEHOLDER: -------------------------------------------------------------------------------- 1 | 放置VolcEngineRTCLite.h 2 | -------------------------------------------------------------------------------- /client/espressif/esp32s3_demo/components/VolcEngineRTCLite/include/VolcEngineRTCLite.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2024 The VolcEngineRTCLite project authors. All Rights Reserved. 3 | * @brief VolcEngineRTCLite Interface Lite 4 | */ 5 | 6 | #ifndef __BYTE_RTC_API_H__ 7 | #define __BYTE_RTC_API_H__ 8 | 9 | #include 10 | #include 11 | #include 12 | 13 | #ifdef __cplusplus 14 | extern "C" { 15 | #endif 16 | 17 | #if defined(_MSC_VER) 18 | #if defined(__BUILDING_BYTE_RTC_SDK__) 19 | #define __byte_rtc_api__ __declspec(dllexport) 20 | #else 21 | #define __byte_rtc_api__ __declspec(dllimport) 22 | #endif 23 | #else 24 | #define __byte_rtc_api__ __attribute__((visibility("default"))) 25 | #endif 26 | 27 | #define BYTE_RTC_API_VERSION "1.0.3" 28 | #define BYTE_RTC_API_VERSION_NUM 0x1003 29 | 30 | 31 | /** 32 | * @locale zh 33 | * @type errorcode 34 | * @brief 回调错误码。
35 | * SDK 内部遇到不可恢复的错误时,会通过 `on_global_error` 或 `on_room_error` 回调通知用户。 36 | */ 37 | 38 | typedef enum { 39 | /** 40 | * @locale zh 41 | * @brief Token 无效。 42 | * 加入房间时使用的 Token 无效或过期失效。需要用户重新获取 Token,并调用 `byte_rtc_renew_token` 方法更新 Token。 43 | */ 44 | ERR_INVALID_TOKEN = -1000, 45 | /** 46 | * @locale zh 47 | * @brief 加入房间错误。 48 | * 加入房间时发生未知错误导致加入房间失败。需要用户重新加入房间。 49 | */ 50 | ERR_JOIN_ROOM = -1001, 51 | /** 52 | * @locale zh 53 | * @brief 没有发布音视频流权限。 54 | * 用户在所在房间中发布音视频流失败,失败原因为用户没有发布流的权限。 55 | */ 56 | ERR_NO_PUBLISH_PERMISSION = -1002, 57 | /** 58 | * @locale zh 59 | * @brief 没有订阅音视频流权限。 60 | * 用户订阅所在房间中的音视频流失败,失败原因为用户没有订阅流的权限。 61 | */ 62 | ERR_NO_SUBSCRIBE_PERMISSION = -1003, 63 | /** 64 | * @locale zh 65 | * @brief 相同用户 ID 的用户加入本房间,当前用户被踢出房间 66 | */ 67 | ERR_NO_DUPLICATE_LOGIN = -1004, 68 | /** 69 | * @locale zh 70 | * @brief 服务端调用 OpenAPI 将当前用户踢出房间 71 | */ 72 | ERR_KICKED_OUT = -1006, 73 | /** 74 | * @locale zh 75 | * @brief 当调用 `byte_rtc_join_room` ,如果room 非法,会返回null,并抛出该error 76 | */ 77 | ERR_ROOMID_ILLEGAL = -1007, 78 | /** 79 | * @locale zh 80 | * @brief Token 过期。调用 `byte_rtc_join_room` 使用新的 Token 重新加入房间。 81 | */ 82 | ERR_ROOM_TOKEN_EXPIRED = -1009, 83 | /** 84 | * @locale zh 85 | * @brief 调用 `updateToken` 传入的 Token 无效 86 | */ 87 | ERR_UPDATE_TOKEN_WITH_INVALID_TOKEN = -1010, 88 | /** 89 | * @locale zh 90 | * @brief 服务端调用 OpenAPI 解散房间,所有用户被移出房间。 91 | */ 92 | ERR_ROOM_DISMISS = -1011, 93 | /** 94 | * @locale zh 95 | * @brief 加入房间错误。
96 | * 加入房间时, LICENSE 计费账号未使用 LICENSE_AUTHENTICATE SDK,加入房间错误。 97 | */ 98 | ERR_JOIN_ROOM_WITHOUT_LICENSE_AUTHENTICATE_SDK = -1012, 99 | /** 100 | * @locale zh 101 | * @brief 通话回路检测已经存在同样 room 的房间了 102 | */ 103 | ERR_ROOM_ALREADY_EXIST = -1013, 104 | /** 105 | * @locale zh 106 | * @brief 加入多个房间时使用了不同的 uid。
107 | * 同一个引擎实例实例中,用户需使用同一个 uid 加入不同的房间。 108 | */ 109 | ERR_USERID_DIFFERENT = -1014, 110 | /** 111 | * @locale zh 112 | * @brief 服务端license过期,拒绝加入房间。
113 | */ 114 | ERR_JOIN_ROOM_SERVER_LICENSE_EXPIRED = -1017, 115 | /** 116 | * @locale zh 117 | * @brief 超过服务端license许可的并发量上限,拒绝加入房间。
118 | */ 119 | ERR_JOIN_ROOM_EXCEEDS_THE_UPPER_LIMIT = -1018, 120 | /** 121 | * @locale zh 122 | * @brief license参数错误,拒绝加入房间。
123 | */ 124 | ERR_JOIN_ROOM_LICENSE_PARAMETER_ERROR = -1019, 125 | /** 126 | * @locale zh 127 | * @brief license 证书路径错误。
128 | */ 129 | ERR_JOIN_ROOM_LICENSE_FILE_PATH = -1020, 130 | /** 131 | * @locale zh 132 | * @brief license 证书不合法。
133 | */ 134 | ERR_JOIN_ROOM_LICENSE_ILLEGAL = -1021, 135 | /** 136 | * @locale zh 137 | * @brief license 证书已经过期,拒绝加入房间。
138 | */ 139 | ERR_JOIN_ROOM_LICENSE_EXPIRED = -1022, 140 | /** 141 | * @locale zh 142 | * @brief license 证书内容不匹配。
143 | */ 144 | ERR_JOIN_ROOM_LICENSE_INFORMATION_NOT_MATCH = -1023, 145 | /** 146 | * @locale zh 147 | * @brief license 当前证书与缓存证书不匹配。
148 | */ 149 | ERR_JOIN_ROOM_LICENSE_NOT_MATCH_WITH_CACHE = -1024, 150 | /** 151 | * @locale zh 152 | * @brief 房间被封禁。
153 | */ 154 | ERR_JOIN_ROOM_FORBIDDEN = -1025, 155 | /** 156 | * @locale zh 157 | * @brief 用户被封禁。
158 | */ 159 | ERR_JOIN_ROOM_USER_FORBIDDEN = -1026, 160 | /** 161 | * @locale zh 162 | * @brief 订阅音视频流失败,订阅音视频流总数超过上限。 163 | */ 164 | ERR_OVER_STREAM_SUBSCRIBE_LIMIT = -1070, 165 | /** 166 | * @locale zh 167 | * @brief 发布流失败,发布流总数超过上限。 168 | * RTC 系统会限制单个房间内发布的总流数,总流数包括视频流、音频流和屏幕流。如果房间内发布流数已达上限时,本地用户再向房间中发布流时会失败,同时会收到此错误通知。 169 | */ 170 | ERR_OVER_STREAM_PUBLISH_LIMIT = -1080, 171 | /** 172 | * @locale zh 173 | * @brief 服务端异常状态导致退出房间。
174 | * SDK与信令服务器断开,并不再自动重连,可联系技术支持。
175 | */ 176 | ERR_ABNORMAL_SERVER_STATUS = -1084, 177 | 178 | } error_code_e; 179 | 180 | /** Error code. */ 181 | 182 | /** 183 | * @locale zh 184 | * @type keytype 185 | * @brief 用户离线原因。 186 | */ 187 | typedef enum { 188 | /** 189 | * @locale zh 190 | * @brief 用户主动离线 191 | */ 192 | USER_OFFLINE_QUIT = 0, 193 | /** 194 | * @locale zh 195 | * @brief 用户超时掉线 196 | */ 197 | USER_OFFLINE_DROPPED = 1, 198 | 199 | } user_offline_reason_e; 200 | 201 | /** 202 | * @locale zh 203 | * @type keytype 204 | * @brief 视频数据类型。 205 | */ 206 | typedef enum { 207 | /** 208 | * @locale zh 209 | 210 | * @brief 未知视频数据类型 211 | */ 212 | VIDEO_DATA_TYPE_UNKNOWN = 0, 213 | 214 | /** 215 | * @locale zh 216 | 217 | * @brief H264 218 | */ 219 | VIDEO_DATA_TYPE_H264 = 1, 220 | /** 221 | * @locale zh 222 | * @brief BYTEVC1 223 | */ 224 | VIDEO_DATA_TYPE_BYTEVC1 = 2, 225 | 226 | 227 | } video_data_type_e; 228 | 229 | /** 230 | * @locale zh 231 | * @type keytype 232 | * @brief 视频帧类型。 233 | */ 234 | typedef enum { 235 | /** 236 | * @locale zh 237 | * @brief 未知类型
238 | * 如果设置为 `VIDEO_FRAME_AUTO_DETECT`,SDK 会自行判断视频帧类型。 239 | */ 240 | VIDEO_FRAME_AUTO_DETECT = 0, 241 | /** 242 | * @locale zh 243 | * @brief 关键帧 244 | */ 245 | VIDEO_FRAME_KEY = 1, 246 | /** 247 | * @locale zh 248 | * @brief P 帧 249 | */ 250 | VIDEO_FRAME_DELTA = 2, 251 | } video_frame_type_e; 252 | 253 | 254 | /** 255 | * @locale zh 256 | * @type keytype 257 | * @brief 视频流类型。 258 | */ 259 | typedef enum { 260 | /** 261 | * @locale zh 262 | * @brief 主流 263 | */ 264 | VIDEO_STREAM_HIGH = 0, 265 | /** 266 | * @locale zh 267 | * @brief 辅流 268 | */ 269 | VIDEO_STREAM_LOW = 1, 270 | 271 | } video_stream_type_e; 272 | 273 | /** 274 | * @locale zh 275 | * @type keytype 276 | * @brief 视频帧信息。 277 | */ 278 | typedef struct { 279 | /** 280 | * @locale zh 281 | * @brief 视频数据类型,参看 video_data_type_e{@link #video_data_type_e}。 282 | */ 283 | video_data_type_e data_type; 284 | /** 285 | * @locale zh 286 | * @brief 视频流类型,参看 video_data_type_e{@link #video_data_type_e}。 287 | */ 288 | video_stream_type_e stream_type; 289 | /** 290 | * @locale zh 291 | * @brief 视频帧类型,参看 video_frame_type_e{@link #video_frame_type_e}。 292 | */ 293 | video_frame_type_e frame_type; 294 | /** 295 | * @locale zh 296 | * @brief 视频帧率 297 | */ 298 | int frame_rate; 299 | 300 | } video_frame_info_t; 301 | 302 | /** 303 | * @locale zh 304 | * @type keytype 305 | * @brief 音频编码类型。 306 | */ 307 | typedef enum { 308 | /** 309 | * @locale zh 310 | * @brief OPUS 311 | */ 312 | AUDIO_CODEC_TYPE_OPUS = 1, 313 | /** 314 | * @locale zh 315 | * @brief G722 316 | */ 317 | AUDIO_CODEC_TYPE_G722 = 2, 318 | /** 319 | * @locale zh 320 | * @brief AACLC 321 | */ 322 | AUDIO_CODEC_TYPE_AACLC = 3, 323 | 324 | /** 325 | * @locale zh 326 | * @brief G711A 327 | */ 328 | AUDIO_CODEC_TYPE_G711A = 4, 329 | 330 | } audio_codec_type_e; 331 | 332 | 333 | /** 334 | * @locale zh 335 | * @type keytype 336 | * @brief 视频编码类型。 337 | */ 338 | /** 339 | * @locale en 340 | * @type keytype 341 | * @brief video codec type list. 342 | */ 343 | typedef enum { 344 | /** 345 | * @locale zh 346 | * @brief 编码类型H264 347 | */ 348 | /** 349 | * @locale en 350 | * @brief codec type H264 351 | */ 352 | VIDEO_CODEC_TYPE_H264 = 0, 353 | 354 | /** 355 | * @locale zh 356 | * @brief 编码类型ByteVC1 357 | */ 358 | /** 359 | * @locale en 360 | * @brief codec type ByteVC1 361 | */ 362 | VIDEO_CODEC_TYPE_BYTEVC1 = 1, 363 | 364 | } video_codec_type_e; 365 | 366 | /** 367 | * @locale zh 368 | * @type keytype 369 | * @brief 音频数据类型。 370 | */ 371 | typedef enum { 372 | /** 373 | * @locale zh 374 | * @brief OPUS 375 | */ 376 | AUDIO_DATA_TYPE_UNKNOWN = 0, 377 | /** 378 | * @locale zh 379 | * @brief OPUS 380 | */ 381 | AUDIO_DATA_TYPE_OPUS = 1, 382 | /** 383 | * @locale zh 384 | * @brief G722 385 | */ 386 | AUDIO_DATA_TYPE_G722 = 2, 387 | /** 388 | * @locale zh 389 | * @brief AACLC 390 | */ 391 | AUDIO_DATA_TYPE_AACLC = 3, 392 | 393 | /** 394 | * @locale zh 395 | * @brief PCMA 396 | */ 397 | AUDIO_DATA_TYPE_PCMA = 4, 398 | 399 | /** 400 | * @locale zh 401 | * @brief PCM 402 | */ 403 | AUDIO_DATA_TYPE_PCM = 5, 404 | } audio_data_type_e; 405 | 406 | /** 407 | * @locale zh 408 | * @type keytype 409 | * @brief 音频帧信息。 410 | */ 411 | typedef struct { 412 | /** 413 | * @locale zh 414 | * @brief 音频数据类型,参看 audio_data_type_e{@link #audio_data_type_e}。 415 | */ 416 | audio_data_type_e data_type; 417 | 418 | } audio_frame_info_t; 419 | 420 | /** 421 | * @locale zh 422 | * @type keytype 423 | * @brief SDK 日志等级。 424 | */ 425 | typedef enum { 426 | /** 427 | * @locale zh 428 | * @brief 信息级别。 429 | */ 430 | BYTE_RTC_LOG_LEVEL_INFO = 1, 431 | /** 432 | * @locale zh 433 | * @brief 警告级别。 434 | */ 435 | BYTE_RTC_LOG_LEVEL_WARN = 2, 436 | /** 437 | * @locale zh 438 | * @brief 错误级别。 439 | */ 440 | BYTE_RTC_LOG_LEVEL_ERROR = 3, 441 | /** 442 | * @locale zh 443 | * @brief 严重错误级别。 444 | */ 445 | BYTE_RTC_LOG_LEVEL_FATAL = 4 446 | 447 | } byte_rtc_log_level_e; 448 | 449 | /** 450 | * @locale zh 451 | * @type keytype 452 | * @brief 房间音视频自动订阅选项。 453 | */ 454 | typedef struct { 455 | /** 456 | * @locale zh 457 | * @brief 是否自动订阅远端用户的音频流。
458 | * - true: 是 459 | * - false:否 460 | */ 461 | bool auto_subscribe_audio; 462 | /** 463 | * @locale zh 464 | * @brief 是否自动订阅远端用户的视频流。
465 | * - true: 是 466 | * - false:否 467 | */ 468 | bool auto_subscribe_video; 469 | 470 | /** 471 | * @locale zh 472 | * @brief 是否自动发布本端用户的音频流。
473 | * - true: 是 474 | * - false:否 475 | */ 476 | 477 | bool auto_publish_audio; 478 | 479 | /** 480 | * @locale zh 481 | * @brief 是否自动发布本端用户的视频流。
482 | * - true: 是 483 | * - false:否 484 | */ 485 | bool auto_publish_video; 486 | 487 | } byte_rtc_room_options_t; 488 | 489 | /** 490 | * @locale zh 491 | * @type keytype 492 | * @brief 网络事件类型。 493 | * @hidden 494 | */ 495 | typedef enum { 496 | NETWORK_EVENT_DOWN = 0, 497 | NETWORK_EVENT_UP, 498 | NETWORK_EVENT_CHANGE, 499 | } network_event_type_e; 500 | 501 | /** 502 | * @locale zh 503 | * @type keytype 504 | * @brief 实时信令消息类型。 505 | */ 506 | typedef enum { 507 | /** 508 | * @locale zh 509 | * @brief 可靠消息 510 | */ 511 | RTS_MESSAGE_RELIABLE = 0, 512 | 513 | } rts_message_type; 514 | 515 | typedef void * byte_rtc_engine_t; 516 | 517 | 518 | /** 519 | * @locale zh 520 | * @type callback 521 | * @brief SDK 事件回调结构体 522 | * @note 回调函数是在 SDK 内部线程同步抛出来的,请不要做耗时操作,否则可能导致 SDK 运行异常。 523 | */ 524 | typedef struct { 525 | 526 | /** 527 | * @locale zh 528 | * @type callback 529 | * @list 回调 530 | * @order 0 531 | * @brief SDK 错误信息回调
532 | * @param engine 通过byte_rtc_create{@link #byte_rtc_create}创建的引擎实例 533 | * @param code 错误码,参看 error_code_e{@link #error_code_e} 534 | * @param msg 错误信息 535 | */ 536 | void (*on_global_error)(byte_rtc_engine_t engine,int code, const char * msg); 537 | 538 | /** 539 | * @locale zh 540 | * @type callback 541 | * @list 回调 542 | * @order 2 543 | * @brief 加入房间成功回调。 544 | * @param engine 通过byte_rtc_create{@link #byte_rtc_create}创建的引擎实例 545 | * @param room 房间名 546 | * @param elapsed_ms 从开始加入房间到加入房间成功的耗时,单位:毫秒 547 | * @param rejoin 当网络断开时,重连后自动触发重新加入房间 548 | * - True:重新加入房间 549 | * - False:首次加入房间 550 | */ 551 | void (*on_join_room_success)(byte_rtc_engine_t engine,const char * room, int elapsed_ms, bool rejoin); 552 | 553 | 554 | /** 555 | * @locale zh 556 | * @type callback 557 | * @list 回调 558 | * @order 1 559 | * @brief 加入房间失败或异常退出房间回调 560 | * @param engine 通过byte_rtc_create{@link #byte_rtc_create}创建的引擎实例 561 | * @param room 房间名 562 | * @param code 错误码,参看 error_code_e{@link #error_code_e} 563 | * @param msg 错误信息 564 | */ 565 | void (*on_room_error)(byte_rtc_engine_t engine,const char * room, int code, const char * msg); 566 | 567 | 568 | /** 569 | * @locale zh 570 | * @type callback 571 | * @list 回调 572 | * @order 3 573 | * @brief 远端用户加入房间回调
574 | 远端用户断网后重新连入房间时,房间内其他用户将收到该事件
575 | 新加入房间用户也会收到加入房间前已在房间内的用户的入房间回调通知 576 | * @param engine 通过byte_rtc_create{@link #byte_rtc_create}创建的引擎实例 577 | * @param room 房间名 578 | * @param uid 远端用户名 579 | * @param elapsed_ms 加入房间耗时(保留字段) 580 | */ 581 | void (*on_user_joined)(byte_rtc_engine_t engine,const char * room, const char * uid,int elapsed_ms); 582 | 583 | /** 584 | * @locale zh 585 | * @type callback 586 | * @list 回调 587 | * @order 4 588 | * @brief 远端用户离开房间
589 | * 房间内其他用户会收到此事件 590 | * @param engine 通过byte_rtc_create{@link #byte_rtc_create}创建的引擎实例 591 | * @param room 房间名 592 | * @param uid 远端用户名 593 | * @param reason 用户离开房间的原因, 参看 user_offline_reason_e{@link #user_offline_reason_e} 594 | */ 595 | void (*on_user_offline)(byte_rtc_engine_t engine,const char * room, const char * uid , int reason); 596 | 597 | /** 598 | * @locale zh 599 | * @type callback 600 | * @list 回调 601 | * @order 8 602 | * @brief 房间内用户暂停/恢复发送音频流时,房间内其他用户会收到此回调 603 | * @param engine 通过byte_rtc_create{@link #byte_rtc_create}创建的引擎实例 604 | * @param room 房间名 605 | * @param uid 远端用户名 606 | * @param muted 发送状态
607 | * - true(1):不发送
608 | * - false(0):发送 609 | */ 610 | void (*on_user_mute_audio)(byte_rtc_engine_t engine,const char * room, const char * uid ,int muted); 611 | 612 | /** 613 | * @locale zh 614 | * @type callback 615 | * @list 回调 616 | * @order 7 617 | * @brief 房间内用户暂停/恢复发送视频流时,房间内其他用户会收到此回调 618 | * @param engine 通过byte_rtc_create{@link #byte_rtc_create}创建的引擎实例 619 | * @param room 房间名 620 | * @param uid 远端用户名 621 | * @param muted 发送状态
622 | * - true(1):不发送
623 | * - false(0):发送 624 | */ 625 | void (*on_user_mute_video)(byte_rtc_engine_t engine,const char * room, const char * uid ,int muted); 626 | 627 | /** 628 | * @locale zh 629 | * @type callback 630 | * @list 回调 631 | * @order 9 632 | * @brief 提示流发布端需重新生成关键帧的回调 633 | * @param engine 通过byte_rtc_create{@link #byte_rtc_create}创建的引擎实例 634 | * @param room 房间名 635 | * @param uid 远端用户名 636 | */ 637 | void (*on_key_frame_gen_req)(byte_rtc_engine_t engine,const char * room, const char * uid); 638 | 639 | /** 640 | * @locale zh 641 | * @type callback 642 | * @list 回调 643 | * @order 6 644 | * @brief 返回远端单个用户的音频数据 645 | * @param engine 通过byte_rtc_create{@link #byte_rtc_create}创建的引擎实例 646 | * @param room 房间名 647 | * @param uid 远端用户名 648 | * @param sent_ts 发送时间 (暂不支持) 649 | * @param data 音频数据类型,参看 audio_data_type_e{@link #audio_data_type_e} 650 | * @param data_ptr 音频数据 651 | * @param data_len 音频数据长度,单位字节 652 | */ 653 | void (*on_audio_data)(byte_rtc_engine_t engine,const char * room, const char * uid ,uint16_t sent_ts, 654 | audio_data_type_e codec, const void * data_ptr, size_t data_len); 655 | 656 | /** 657 | * @locale zh 658 | * @type callback 659 | * @list 回调 660 | * @order 5 661 | * @brief 返回远端单个用户的视频数据 662 | * @param engine 通过byte_rtc_create{@link #byte_rtc_create}创建的引擎实例 663 | * @param room 房间名 664 | * @param uid 远端用户名 665 | * @param sent_ts 发送时间(暂不支持) 666 | * @param codec 视频编码类型,参看 video_data_type_e{@link #video_data_type_e} 667 | * @param is_key_frame 帧类型 668 | * - 0: 非关键帧 669 | * - !0: 关键帧 670 | * @param data_ptr 视频数据 671 | * @param data_len 视频数据长度,单位字节 672 | */ 673 | void (*on_video_data)(byte_rtc_engine_t engine,const char * room, const char * uid ,uint16_t sent_ts, 674 | video_data_type_e codec, int is_key_frame, 675 | const void * data_ptr, size_t data_len); 676 | 677 | /** 678 | * @locale zh 679 | * @type callback 680 | * @list 回调 681 | * @order 10 682 | * @brief 当带宽估计码率发生变化时,触发该回调。
683 | * 此时你需要将编码器码率调至目标码率 684 | * @param engine 通过byte_rtc_create{@link #byte_rtc_create}创建的引擎实例 685 | * @param room 房间名 686 | * @param target_bps 目标码率,单位 bps 687 | */ 688 | void (*on_target_bitrate_changed)(byte_rtc_engine_t engine,const char * room, uint32_t target_bps); 689 | 690 | 691 | /** 692 | * @locale zh 693 | * @type callback 694 | * @list 回调 695 | * @order 12 696 | * @brief 返回远端用户发送的实时信令消息 697 | * @param engine 通过byte_rtc_create{@link #byte_rtc_create}创建的引擎实例 698 | * @param room 房间名 699 | * @param src 远端用户名 700 | * @param message 实时信令消息 701 | * @param size 实时信令消息长度 702 | * @param binary 是否未二进制消息 703 | */ 704 | 705 | void (*on_message_received)(byte_rtc_engine_t engine,const char * room,const char * src, const uint8_t * message,int size,bool binary); 706 | 707 | 708 | /** 709 | * @locale zh 710 | * @type callback 711 | * @list 回调 712 | * @order 11 713 | * @brief 实时信令消息发送结果通知 714 | * @param engine 通过byte_rtc_create{@link #byte_rtc_create}创建的引擎实例 715 | * @param room 房间名 716 | * @param msgid 发送消息的id,用来和发送的消息匹配 717 | * @param error 发送消息错误码,0表示发送成功 718 | * @param extencontent 扩展信息,暂时未使用 719 | */ 720 | void (*on_message_send_result)(byte_rtc_engine_t engine,const char * room,int64_t msgid, int error,const char * extencontent); 721 | 722 | /** 723 | * @locale zh 724 | * @type callback 725 | * @list 回调 726 | * @order 13 727 | * @brief Token 加入房间权限过期前 30 秒,触发该回调。
728 | * 收到该回调后,你需调用 `byte_rtc_renew_token` 更新 Token 加入房间权限 729 | * @param engine 通过byte_rtc_create{@link #byte_rtc_create}创建的引擎实例 730 | * @param room 房间名 731 | */ 732 | void (*on_token_privilege_will_expire)(byte_rtc_engine_t engine,const char * room); 733 | 734 | /** 735 | * @locale zh 736 | * @type callback 737 | * @list 回调 738 | * @order 14 739 | * @brief license 过期提醒。在剩余天数低于 30 天时,收到此回调 740 | * @param engine 通过byte_rtc_create{@link #byte_rtc_create}创建的引擎实例 741 | * @param daysleft license 剩余有效天数 742 | */ 743 | 744 | void (*on_license_expire_warning)(byte_rtc_engine_t engine,int daysleft); 745 | 746 | /** 747 | * @locale zh 748 | * @type callback 749 | * @list 回调 750 | * @order 15 751 | * @brief engine 实例清理(byte_rtc_fini)结束通知,只有收到该通知之后,重新创建实例(byte_rtc_init)才是安全的 752 | * @param engine 通过byte_rtc_create{@link #byte_rtc_create}创建的引擎实例 753 | */ 754 | 755 | void (*on_fini_notify)(byte_rtc_engine_t engine); 756 | 757 | } byte_rtc_event_handler_t; 758 | 759 | 760 | /** 761 | * @locale zh 762 | * @type api 763 | * @list 方法 764 | * @order 1 765 | * @brief 获取 SDK 版本号 766 | * @return SDK 版本号 767 | */ 768 | extern __byte_rtc_api__ const char * byte_rtc_get_version(void); 769 | 770 | /** 771 | * @locale zh 772 | * @type api 773 | * @hidden 774 | * @brief 错误码转对应字符串 775 | * @note 不必释放此字符串 776 | * @param err 错误码 777 | * @return 错误信息 778 | */ 779 | extern __byte_rtc_api__ const char * byte_rtc_err_2_str(int err); 780 | 781 | /** 782 | * @locale zh 783 | * @type api 784 | * @list 方法 785 | * @order 7 786 | * @brief 设置 SDK 日志等级 787 | * @param engine 通过byte_rtc_create{@link #byte_rtc_create}创建的引擎实例 788 | * @param level 日志等级,参看 byte_rtc_log_level_e{@link #byte_rtc_log_level_e} 789 | */ 790 | extern __byte_rtc_api__ void byte_rtc_set_log_level(byte_rtc_engine_t engine,int level); 791 | 792 | /** 793 | * @locale zh 794 | * @type api 795 | * @list 方法 796 | * @order 5 797 | * @brief 设置 SDK 日志文件路径、大小和数目 798 | * @param engine 通过byte_rtc_create{@link #byte_rtc_create}创建的引擎实例 799 | * @param log_path 日志文件存放路径 800 | * @param size_per_file 单个日志文件大小,单位字节 801 | * @param max_file_count 日志文件最大个数 802 | * @return 方法调用结果:
803 | * - 0:成功 804 | * - -1:失败,路径参数无效 805 | */ 806 | extern __byte_rtc_api__ int byte_rtc_config_log(byte_rtc_engine_t engine,const char * log_path,int size_per_file, int max_file_count); 807 | 808 | /** 809 | * @locale zh 810 | * @type api 811 | * @list 方法 812 | * @order 3 813 | * @brief 创建引擎实例,该方法是整个SDK调用的第一个方法 814 | * @param app_id 应用 ID 815 | * @param event_handler 回调方法,参看 byte_rtc_event_handler_t{@link #byte_rtc_event_handler_t} 816 | * @return 引擎实例 817 | */ 818 | extern __byte_rtc_api__ byte_rtc_engine_t byte_rtc_create(const char * app_id, const byte_rtc_event_handler_t * event_handler); 819 | 820 | /** 821 | * @locale zh 822 | * @type api 823 | * @list 方法 824 | * @order 13 825 | * @brief 初始化引擎实例 826 | * @note 仅能被初始化一次 827 | * @param engine 通过byte_rtc_create{@link #byte_rtc_create}创建的引擎实例 828 | * @return 方法调用结果:
829 | * - 0:成功
830 | * - -1: appid 或 event_handler 为空
831 | * - -2:引擎实例已被初始化
832 | * - -3:引擎实例创建失败,请检查是否有可用内存 833 | */ 834 | extern __byte_rtc_api__ int byte_rtc_init(byte_rtc_engine_t engine); 835 | 836 | /** 837 | * @locale zh 838 | * @type api 839 | * @list 方法 840 | * @order 35 841 | * @brief 销毁引擎实例,VolcEngineRTCLite内部完成销毁操作之后,通过回调(on_fini_notify)通知上层 参看 byte_rtc_event_handler_t{@link #byte_rtc_event_handler_t} 842 | * @param engine 通过byte_rtc_create{@link #byte_rtc_create}创建的引擎实例 843 | * @return 方法调用结果: 844 | * - 0:成功
845 | * - -1: 引擎实例不存在 846 | */ 847 | extern __byte_rtc_api__ int byte_rtc_fini(byte_rtc_engine_t engine); 848 | 849 | /** 850 | * @locale zh 851 | * @type api 852 | * @list 方法 853 | * @order 37 854 | * @brief 销毁引擎实例,只有在收到on_fini_notify的回调之后,调用此方法才是安全的 855 | * @param engine 通过byte_rtc_create{@link #byte_rtc_create}创建的引擎实例 856 | */ 857 | extern __byte_rtc_api__ void byte_rtc_destory(byte_rtc_engine_t engine); 858 | 859 | 860 | /** 861 | * @locale zh 862 | * @type api 863 | * @list 方法 864 | * @order 9 865 | * @brief 将自定义的数据与引擎实例关联起来 866 | * @param engine 通过byte_rtc_create{@link #byte_rtc_create}创建的引擎实例 867 | * @param user_data 设置用户自定义数据 868 | */ 869 | extern __byte_rtc_api__ void byte_rtc_set_user_data(byte_rtc_engine_t engine,void * user_data); 870 | 871 | 872 | /** 873 | * @locale zh 874 | * @type api 875 | * @list 方法 876 | * @order 11 877 | * @brief 获取与引擎实例相关联的自定义数据 878 | * @param engine 通过byte_rtc_create{@link #byte_rtc_create}创建的引擎实例 879 | */ 880 | extern __byte_rtc_api__ void * byte_rtc_get_user_data(byte_rtc_engine_t engine); 881 | 882 | /** 883 | * @locale zh 884 | * @type api 885 | * @list 方法 886 | * @order 15 887 | * @brief 设置音频编码格式 888 | * @param engine 通过byte_rtc_create{@link #byte_rtc_create}创建的引擎实例 889 | * @param audio_codec_type 音频编码类型,参看 audio_codec_type_e{@link #audio_codec_type_e} 890 | * @return 方法调用结果:
891 | * - 0:成功
892 | * - -1: 引擎实例不存在
893 | * - -2:编码格式暂不被支持 894 | */ 895 | extern __byte_rtc_api__ int byte_rtc_set_audio_codec(byte_rtc_engine_t engine,audio_codec_type_e audio_codec_type); 896 | 897 | /** 898 | * @locale zh 899 | * @type api 900 | * @list 方法 901 | * @order 17 902 | * @brief 设置视频编码格式,暂仅支持 `VIDEO_CODEC_TYPE_H264`{@link #VIDEO_CODEC_TYPE_H264} 和 `VIDEO_CODEC_TYPE_BYTEVC1`{@link #VIDEO_CODEC_TYPE_BYTEVC1} 903 | * @param engine 通过byte_rtc_create{@link #byte_rtc_create}创建的引擎实例 904 | * @param video_codec_type 视频编码类型,参看 video_codec_type{@link #video_codec_type} 905 | * @return 方法调用结果:
906 | * - 0:成功。
907 | * - -1: 引擎不存在。
908 | * - -2:编码格式暂不被支持。 909 | */ 910 | extern __byte_rtc_api__ int byte_rtc_set_video_codec(byte_rtc_engine_t engine,video_codec_type_e video_codec_type); 911 | 912 | /** 913 | * @locale zh 914 | * @type api 915 | * @list 方法 916 | * @order 19 917 | * @brief 加入房间 918 | * @param engine 通过byte_rtc_create{@link #byte_rtc_create}创建的引擎实例 919 | * @param room 房间名 920 | * @param uid 用户名 921 | * @param token 动态密钥,用于对加入房间用户进行鉴权验证
922 | * @param options 房间音视频自动订阅设置,参看 byte_rtc_room_options_t{@link #byte_rtc_room_options_t}
923 | * 此版本无效,默认使用自动订阅 924 | * @return 方法调用结果:
925 | * - 0:成功
926 | * - -1:引擎实例不存在
927 | * - -2:输入参数为空
928 | * - -3:已加入过房间 929 | */ 930 | extern __byte_rtc_api__ int byte_rtc_join_room(byte_rtc_engine_t engine,const char * room, const char * uid, 931 | const char * token, byte_rtc_room_options_t * options); 932 | 933 | 934 | /** 935 | * @locale zh 936 | * @type api 937 | * @order 33 938 | * @list 方法 939 | * @brief 退出房间 940 | * @param engine 通过byte_rtc_create{@link #byte_rtc_create}创建的引擎实例 941 | * @param room 房间名 942 | * @return 方法调用结果:
943 | * - 0:成功
944 | * - -1:引擎实例不存在
945 | * - -2:输入参数为空 946 | */ 947 | extern __byte_rtc_api__ int byte_rtc_leave_room(byte_rtc_engine_t engine,const char * room); 948 | 949 | /** 950 | * @locale zh 951 | * @type api 952 | * @list 方法 953 | * @order 31 954 | * @brief 更新 Token
955 | * 收到 on_token_privilege_will_expire{@link #byte_rtc_event_handler_t#on_token_privilege_will_expire} 时,必须重新获取 Token,调用此方法更新 Token,以保证通话的正常进行 956 | * @param engine 通过byte_rtc_create{@link #byte_rtc_create}创建的引擎实例 957 | * @param room 房间名 958 | * @param token 动态密钥。用于对加入房间用户进行鉴权验证
959 | * @return 方法调用结果:
960 | * - 0:成功
961 | * - -1:引擎实例不存在
962 | * - -2:输入参数为空 963 | */ 964 | extern __byte_rtc_api__ int byte_rtc_renew_token(byte_rtc_engine_t engine,const char * room,const char * token); 965 | 966 | /** 967 | * @locale zh 968 | * @type api 969 | * @list 方法 970 | * @order 27 971 | * @brief 控制媒体流(本端 &远端)(音频 & 视频)流状态 972 | * @param engine 通过byte_rtc_create{@link #byte_rtc_create}创建的引擎实例 973 | * @param room 房间名 974 | * @param uid 用户Id,非空控制的是远端的用户,空值控制的是本端用户 975 | * @param video 媒体类型,true:视频流,false:音频流 976 | * @param mute 接收状态 977 | * @return 方法调用结果:
978 | * - 0:成功
979 | * - -1:引擎实例不存在
980 | * - -2:输入参数为空 981 | */ 982 | extern __byte_rtc_api__ int byte_rtc_mute(byte_rtc_engine_t engine,const char * room, const char * uid, bool video, bool mute); 983 | 984 | /** 985 | * @locale zh 986 | * @type api 987 | * @list 方法 988 | * @order 29 989 | * @brief 请求远端用户重编关键帧 990 | * @param engine 通过byte_rtc_create{@link #byte_rtc_create}创建的引擎实例 991 | * @param room 房间名 992 | * @param remote_uid 远端用户名 993 | * @return 方法调用结果:
994 | * - 0:成功
995 | * - -1:引擎实例不存在
996 | * - -2:输入参数为空 997 | */ 998 | extern __byte_rtc_api__ int byte_rtc_request_video_key_frame(byte_rtc_engine_t engine,const char * room, const char * remote_uid); 999 | 1000 | /** 1001 | * @locale zh 1002 | * @type api 1003 | * @list 方法 1004 | * @order 21 1005 | * @brief 发送音频帧 1006 | * @param engine 通过byte_rtc_create{@link #byte_rtc_create}创建的引擎实例 1007 | * @param room 房间名 1008 | * @param data_ptr 音频数据 1009 | * @param data_len 数据长度,单位字节 1010 | * @param info_ptr 音频帧信息,参看 audio_frame_info_t{@link #audio_frame_info_t} 1011 | * @return 方法调用结果:
1012 | * - 0:成功
1013 | * - -1:引擎实例不存在
1014 | * - -2:输入参数为空 1015 | */ 1016 | extern __byte_rtc_api__ int byte_rtc_send_audio_data(byte_rtc_engine_t engine,const char * room,const void * data_ptr, size_t data_len, 1017 | audio_frame_info_t * info_ptr); 1018 | 1019 | /** 1020 | * @locale zh 1021 | * @type api 1022 | * @list 方法 1023 | * @order 23 1024 | * @brief 发送视频帧 1025 | * @param engine 通过byte_rtc_create{@link #byte_rtc_create}创建的引擎实例 1026 | * @param room 房间名 1027 | * @param data_ptr 视频数据 1028 | * @param data_len 数据长度 1029 | * @param info_ptr 视频帧信息,参看 video_frame_info_t{@link #video_frame_info_t} 1030 | * @note - 仅支持 `VIDEO_DATA_TYPE_H264`
1031 | * - 每个用户仅支持一路流,仅使用 `VIDEO_STREAM_HIGH` 1032 | * @return 方法调用结果:
1033 | * - 0:成功
1034 | * - -1:引擎实例不存在
1035 | * - -2:输入参数为空 1036 | */ 1037 | extern __byte_rtc_api__ int byte_rtc_send_video_data(byte_rtc_engine_t engine,const char * room,const void *data_ptr, size_t data_len, 1038 | video_frame_info_t * info_ptr); 1039 | 1040 | 1041 | /** 1042 | * @locale zh 1043 | * @type api 1044 | * @list 方法 1045 | * @order 25 1046 | * @brief 发送实时信令消息 1047 | * @param engine 通过byte_rtc_create{@link #byte_rtc_create}创建的引擎实例 1048 | * @param room 房间名 1049 | * @param target 接收消息的目标用户,如果target传NULL表示发送的是房间内广播消息 1050 | * @param data_ptr 实时信令消息数据 1051 | * @param data_len 实时信令消息长度 1052 | * @param binary 指定消息是否是二进制消息 1053 | * @param type 用于指定实时信令消息类型,目前仅支持RTS_MESSAGE_RELIABLE 1054 | 1055 | * @return 方法调用结果:
1056 | * - < 0:失败
1057 | * - >= 0:消息id
1058 | */ 1059 | 1060 | extern __byte_rtc_api__ int64_t byte_rtc_rts_send_message(byte_rtc_engine_t engine,const char * room,const char * target,const void * data_ptr, 1061 | size_t data_len,bool binary,rts_message_type type); 1062 | 1063 | /** 1064 | * @locale zh 1065 | * @type api 1066 | * @hidden 1067 | * @brief 设置 SDK 参数 1068 | * @param engine 通过byte_rtc_create{@link #byte_rtc_create}创建的引擎实例 1069 | * @param params json 格式参数 1070 | * @return 方法调用结果:
1071 | * - 0:成功
1072 | * - <0:失败 1073 | */ 1074 | extern __byte_rtc_api__ int byte_rtc_set_params(byte_rtc_engine_t engine,const char * params); 1075 | 1076 | #ifdef __cplusplus 1077 | } 1078 | #endif 1079 | #endif /* __BYTE_RTC_API_H__ */ 1080 | -------------------------------------------------------------------------------- /client/espressif/esp32s3_demo/components/VolcEngineRTCLite/libs/esp32s3/LICENSE: -------------------------------------------------------------------------------- 1 | Copyright 2025 Beijing Volcano Engine Technology Co., Ltd. All Rights Reserved. 2 | 3 | The VolcEngineRTCLite was developed by Beijing Volcano Engine Technology Co., Ltd. (hereinafter “Volcano Engine”). Any rights, including but not limited to any copyrights or patent rights, are owned by and proprietary material of the Volcano Engine. 4 | 5 | VolcEngineRTCLite is licensed solely for integration within Volcengine Real Time Communication which shall be used under a valid commercial license. Customers can contact conversational_ai@bytedance.com for such commercial license. Here is also a link to Volcengine Real Time Communication: https://www.volcengine.com/docs/6348/1310537. 6 | 7 | Without Volcano Engine's prior written permission, any use of VolcEngineRTCLite beyond its incorporation into Volcengine Real Time Communication, in particular any use for commercial purposes, is strictly prohibited. This includes, without limitation, incorporation in a commercial product, use in a commercial service, or production of other artefacts for commercial purposes. 8 | 9 | Without Volcano Engine's prior written permission, you shall not reproduce, modify, decompile, disassemble, reverse-engineer, or create derivative works of VolcEngineRTCLite, nor access, extract, or obtain its source code by any means, or make it available in any form to any third party in any form. Any unauthorized modifications, derivative works, or adaptations of VolcEngineRTCLite, along with all associated intellectual property rights, shall automatically and exclusively vest in Volcano Engine. 10 | 11 | Except otherwise provided by Volcano Engine, the VolcEngineRTCLite is provided “AS IS” without any warranties. Volcano Engine disclaims all liability for damages arising from its use. -------------------------------------------------------------------------------- /client/espressif/esp32s3_demo/components/VolcEngineRTCLite/libs/esp32s3/PLACEHOLDER: -------------------------------------------------------------------------------- 1 | 放置libVolcEngineRTCLite.a 2 | -------------------------------------------------------------------------------- /client/espressif/esp32s3_demo/components/VolcEngineRTCLite/libs/esp32s3/libVolcEngineRTCLite.a: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/rtc-aigc-embedded-demo/9076c6c76592bef14f33b019f9441875ffc5b69c/client/espressif/esp32s3_demo/components/VolcEngineRTCLite/libs/esp32s3/libVolcEngineRTCLite.a -------------------------------------------------------------------------------- /client/espressif/esp32s3_demo/main/AudioPipeline.c: -------------------------------------------------------------------------------- 1 | // Copyright (2025) Beijing Volcano Engine Technology Ltd. 2 | // SPDX-License-Identifier: MIT 3 | 4 | #include "AudioPipeline.h" 5 | #include 6 | #include "esp_log.h" 7 | #include "sdkconfig.h" 8 | #include "audio_element.h" 9 | #include "audio_pipeline.h" 10 | #include "audio_event_iface.h" 11 | #include "audio_common.h" 12 | #include "audio_sys.h" 13 | #include "board.h" 14 | #include "algorithm_stream.h" 15 | #include "filter_resample.h" 16 | #include "i2s_stream.h" 17 | #include "pthread.h" 18 | #ifdef CONFIG_ESP32_S3_KORVO2_V3_BOARD 19 | #include "es7210.h" 20 | #elif CONFIG_M5STACK_ATOMS3R_BOARD 21 | #include "es8311.h" 22 | #endif 23 | 24 | #include "esp_timer.h" 25 | 26 | 27 | #if defined (RTC_DEMO_AUDIO_PIPELINE_CODEC_OPUS) 28 | #include "raw_opus_encoder.h" 29 | #include "raw_opus_decoder.h" 30 | #elif defined (RTC_DEMO_AUDIO_PIPELINE_CODEC_AAC) 31 | #include "aac_encoder.h" 32 | #include "aac_decoder.h" 33 | #elif defined (RTC_DEMO_AUDIO_PIPELINE_CODEC_G711A) 34 | #include "g711_encoder.h" 35 | #include "g711_decoder.h" 36 | #endif 37 | #include "audio_idf_version.h" 38 | #include "raw_stream.h" 39 | 40 | 41 | #define CHANNEL 1 42 | static const char *TAG = "AUDIO_PIPELINE"; 43 | #define I2S_SAMPLE_RATE 16000 44 | #define ALGO_SAMPLE_RATE 16000 45 | #ifdef CONFIG_ESP32_S3_KORVO2_V3_BOARD 46 | #define ALGORITHM_STREAM_SAMPLE_BIT 32 47 | #define CHANNEL_FORMAT I2S_CHANNEL_TYPE_ONLY_LEFT 48 | #define ALGORITHM_INPUT_FORMAT "RM" 49 | #define CHANNEL_NUM 1 50 | #elif CONFIG_M5STACK_ATOMS3R_BOARD 51 | #define ALGORITHM_STREAM_SAMPLE_BIT 16 52 | #define CHANNEL_FORMAT I2S_CHANNEL_TYPE_RIGHT_LEFT 53 | #define ALGORITHM_INPUT_FORMAT "MR" 54 | #define CHANNEL_NUM 2 55 | #endif 56 | 57 | #if (RTC_DEMO_AUDIO_PIPELINE_CODEC_OPUS) 58 | #define CODEC_NAME "opus" 59 | #define CODEC_SAMPLE_RATE 16000 60 | #define BIT_RATE 32000 61 | #define COMPLEXITY 10 62 | #define FRAME_TIME_MS 20 63 | 64 | #define DEC_SAMPLE_RATE 16000 65 | #define DEC_BIT_RATE 16000 66 | #elif (RTC_DEMO_AUDIO_PIPELINE_CODEC_AAC) 67 | #define CODEC_NAME "aac" 68 | #define CODEC_SAMPLE_RATE 16000 69 | #define BIT_RATE 80000 70 | #elif (RTC_DEMO_AUDIO_PIPELINE_CODEC_G711A) 71 | #define CODEC_NAME "g711a" 72 | #define CODEC_SAMPLE_RATE 8000 73 | #elif (RTC_DEMO_AUDIO_PIPELINE_CODEC_PCM) 74 | #define CODEC_NAME "g711a" 75 | #define CODEC_SAMPLE_RATE 8000 76 | #endif 77 | 78 | struct recorder_pipeline_t { 79 | audio_pipeline_handle_t audio_pipeline; 80 | audio_element_handle_t i2s_stream_reader; 81 | audio_element_handle_t audio_encoder; 82 | audio_element_handle_t raw_reader; 83 | audio_element_handle_t rsp; 84 | audio_element_handle_t algo_aec; 85 | }; 86 | 87 | 88 | struct player_pipeline_t { 89 | audio_pipeline_handle_t audio_pipeline; 90 | audio_element_handle_t raw_writer; 91 | audio_element_handle_t audio_decoder; 92 | audio_element_handle_t rsp; 93 | audio_element_handle_t i2s_stream_writer; 94 | }; 95 | 96 | static audio_element_handle_t create_resample_stream(int src_rate, int src_ch, int dest_rate, int dest_ch) 97 | { 98 | rsp_filter_cfg_t rsp_cfg = DEFAULT_RESAMPLE_FILTER_CONFIG(); 99 | rsp_cfg.src_rate = src_rate; 100 | rsp_cfg.src_ch = src_ch; 101 | rsp_cfg.dest_rate = dest_rate; 102 | rsp_cfg.dest_ch = dest_ch; 103 | rsp_cfg.complexity = 5; 104 | audio_element_handle_t stream = rsp_filter_init(&rsp_cfg); 105 | return stream; 106 | } 107 | 108 | static audio_element_handle_t create_record_i2s_stream(void) 109 | { 110 | #if CONFIG_ESP32_S3_KORVO2_V3_BOARD 111 | es7210_adc_set_gain(ES7210_INPUT_MIC3, GAIN_30DB); 112 | #elif CONFIG_M5STACK_ATOMS3R_BOARD 113 | es8311_set_mic_gain(ES8311_MIC_GAIN_36DB); 114 | #endif 115 | i2s_stream_cfg_t i2s_cfg = I2S_STREAM_CFG_DEFAULT_WITH_PARA(CODEC_ADC_I2S_PORT, I2S_SAMPLE_RATE, ALGORITHM_STREAM_SAMPLE_BIT, AUDIO_STREAM_READER); // 参数需要仔细检查 116 | i2s_cfg.type = AUDIO_STREAM_READER; 117 | i2s_stream_set_channel_type(&i2s_cfg, CHANNEL_FORMAT); 118 | i2s_cfg.std_cfg.clk_cfg.sample_rate_hz = I2S_SAMPLE_RATE; 119 | return i2s_stream_init(&i2s_cfg); 120 | } 121 | 122 | static audio_element_handle_t create_record_encoder_stream(void) 123 | { 124 | #ifdef RTC_DEMO_AUDIO_PIPELINE_CODEC_OPUS 125 | raw_opus_enc_config_t opus_cfg = RAW_OPUS_ENC_CONFIG_DEFAULT(); 126 | opus_cfg.sample_rate = CODEC_SAMPLE_RATE; 127 | opus_cfg.channel = CHANNEL; 128 | opus_cfg.bitrate = BIT_RATE; 129 | opus_cfg.complexity = 0; // COMPLEXITY; 130 | opus_cfg.task_core = 1; 131 | return raw_opus_encoder_init(&opus_cfg); 132 | #elif defined (RTC_DEMO_AUDIO_PIPELINE_CODEC_AAC) 133 | aac_encoder_cfg_t aac_cfg = DEFAULT_AAC_ENCODER_CONFIG(); 134 | aac_cfg.sample_rate = CODEC_SAMPLE_RATE; 135 | aac_cfg.channel = CHANNEL; 136 | aac_cfg.bitrate = BIT_RATE; 137 | pipeline->audio_encoder = aac_encoder_init(&aac_cfg); 138 | return audio_pipeline_register(pipeline->audio_pipeline, pipeline->audio_encoder, CODEC_NAME); 139 | #elif defined (RTC_DEMO_AUDIO_PIPELINE_CODEC_G711A) 140 | g711_encoder_cfg_t g711_cfg = DEFAULT_G711_ENCODER_CONFIG(); 141 | return g711_encoder_init(&g711_cfg); 142 | #else 143 | return NULL; 144 | #endif 145 | } 146 | 147 | static audio_element_handle_t create_record_raw_stream(void) 148 | { 149 | audio_element_handle_t raw_stream = NULL; 150 | raw_stream_cfg_t raw_cfg = RAW_STREAM_CFG_DEFAULT(); 151 | raw_cfg.type = AUDIO_STREAM_WRITER; 152 | raw_cfg.out_rb_size = 2 * 1024; 153 | raw_stream = raw_stream_init(&raw_cfg); 154 | audio_element_set_output_timeout(raw_stream, portMAX_DELAY); 155 | return raw_stream; 156 | } 157 | 158 | static audio_element_handle_t create_record_algo_stream(void) 159 | { 160 | ESP_LOGI(TAG, "[3.1] Create algorithm stream for aec"); 161 | algorithm_stream_cfg_t algo_config = ALGORITHM_STREAM_CFG_DEFAULT(); 162 | // algo_config.swap_ch = true; 163 | algo_config.sample_rate = ALGO_SAMPLE_RATE; 164 | algo_config.out_rb_size = 256; 165 | algo_config.algo_mask = ALGORITHM_STREAM_DEFAULT_MASK | ALGORITHM_STREAM_USE_AGC; 166 | algo_config.input_format = ALGORITHM_INPUT_FORMAT; 167 | audio_element_handle_t element_algo = algo_stream_init(&algo_config); 168 | audio_element_set_music_info(element_algo, ALGO_SAMPLE_RATE, 1, 16); 169 | audio_element_set_input_timeout(element_algo, portMAX_DELAY); 170 | return element_algo; 171 | } 172 | 173 | recorder_pipeline_handle_t recorder_pipeline_open() 174 | { 175 | recorder_pipeline_handle_t pipeline = heap_caps_calloc(1, sizeof(recorder_pipeline_t), MALLOC_CAP_SPIRAM | MALLOC_CAP_DEFAULT); 176 | esp_log_level_set("*", ESP_LOG_WARN); 177 | esp_log_level_set(TAG, ESP_LOG_INFO); 178 | 179 | // create and register streams 180 | audio_pipeline_cfg_t pipeline_cfg = DEFAULT_AUDIO_PIPELINE_CONFIG(); 181 | pipeline->audio_pipeline = audio_pipeline_init(&pipeline_cfg); 182 | mem_assert(pipeline->audio_pipeline); 183 | 184 | pipeline->i2s_stream_reader = create_record_i2s_stream(); 185 | audio_pipeline_register(pipeline->audio_pipeline, pipeline->i2s_stream_reader, "i2s"); 186 | 187 | pipeline->algo_aec = create_record_algo_stream(); 188 | audio_pipeline_register(pipeline->audio_pipeline, pipeline->algo_aec, "algo"); 189 | 190 | #ifndef RTC_DEMO_AUDIO_PIPELINE_CODEC_OPUS 191 | pipeline->rsp = create_resample_stream(I2S_SAMPLE_RATE, 1, CODEC_SAMPLE_RATE, 1); 192 | audio_pipeline_register(pipeline->audio_pipeline, pipeline->rsp, "rsp"); 193 | #endif 194 | 195 | pipeline->audio_encoder = create_record_encoder_stream(); 196 | if (pipeline->audio_encoder) { 197 | audio_pipeline_register(pipeline->audio_pipeline, pipeline->audio_encoder, CODEC_NAME); 198 | } 199 | 200 | pipeline->raw_reader = create_record_raw_stream(); 201 | audio_pipeline_register(pipeline->audio_pipeline, pipeline->raw_reader, "raw"); 202 | 203 | #ifdef RTC_DEMO_AUDIO_PIPELINE_CODEC_OPUS 204 | const char *link_tag[] = {"i2s", "algo", CODEC_NAME, "raw"}; 205 | #elif defined (RTC_DEMO_AUDIO_PIPELINE_CODEC_AAC) 206 | const char *link_tag[] = {"i2s", "aac", "rsp", CODEC_NAME, "raw"}; 207 | #elif defined (RTC_DEMO_AUDIO_PIPELINE_CODEC_G711A) 208 | const char *link_tag[] = {"i2s", "algo", "rsp", "g711a", "raw"}; 209 | #elif defined (RTC_DEMO_AUDIO_PIPELINE_CODEC_PCM) 210 | const char *link_tag[] = {"i2s", "algo", "rsp", "raw"}; 211 | #endif 212 | 213 | audio_pipeline_link(pipeline->audio_pipeline, &link_tag[0], sizeof(link_tag) / sizeof(link_tag[0])); 214 | return pipeline; 215 | } 216 | 217 | void recorder_pipeline_close(recorder_pipeline_handle_t pipeline) { 218 | audio_pipeline_stop(pipeline->audio_pipeline); 219 | audio_pipeline_wait_for_stop(pipeline->audio_pipeline); 220 | audio_pipeline_terminate(pipeline->audio_pipeline); 221 | 222 | if (pipeline->i2s_stream_reader) { 223 | audio_pipeline_unregister(pipeline->audio_pipeline, pipeline->i2s_stream_reader); 224 | audio_element_deinit(pipeline->i2s_stream_reader); 225 | } 226 | if (pipeline->audio_encoder) { 227 | audio_pipeline_unregister(pipeline->audio_pipeline, pipeline->audio_encoder); 228 | audio_element_deinit(pipeline->audio_encoder); 229 | } 230 | if (pipeline->raw_reader) { 231 | audio_pipeline_unregister(pipeline->audio_pipeline, pipeline->raw_reader); 232 | audio_element_deinit(pipeline->raw_reader); 233 | } 234 | if (pipeline->rsp) { 235 | audio_pipeline_unregister(pipeline->audio_pipeline, pipeline->rsp); 236 | audio_element_deinit(pipeline->rsp); 237 | } 238 | if (pipeline->algo_aec) { 239 | audio_pipeline_unregister(pipeline->audio_pipeline, pipeline->algo_aec); 240 | audio_element_deinit(pipeline->algo_aec); 241 | } 242 | 243 | audio_pipeline_deinit(pipeline->audio_pipeline); 244 | heap_caps_free(pipeline); 245 | }; 246 | 247 | void recorder_pipeline_run(recorder_pipeline_handle_t pipeline){ 248 | audio_pipeline_run(pipeline->audio_pipeline); 249 | }; 250 | 251 | int recorder_pipeline_get_default_read_size(recorder_pipeline_handle_t pipeline){ 252 | #if defined (RTC_DEMO_AUDIO_PIPELINE_CODEC_OPUS) 253 | return 80; 254 | #elif defined (RTC_DEMO_AUDIO_PIPELINE_CODEC_AAC) 255 | return -1;// 256 | #elif defined (RTC_DEMO_AUDIO_PIPELINE_CODEC_G711A) 257 | return 160; 258 | #elif defined (RTC_DEMO_AUDIO_PIPELINE_CODEC_PCM) 259 | return 320; 260 | #endif 261 | }; 262 | 263 | audio_element_handle_t recorder_pipeline_get_raw_reader(recorder_pipeline_handle_t pipeline){ 264 | return pipeline->raw_reader; 265 | }; 266 | audio_pipeline_handle_t recorder_pipeline_get_pipeline(recorder_pipeline_handle_t pipeline){ 267 | return pipeline->audio_pipeline; 268 | }; 269 | 270 | int recorder_pipeline_read(recorder_pipeline_handle_t pipeline,char *buffer, int buf_size) { 271 | return raw_stream_read(pipeline->raw_reader, buffer,buf_size); 272 | } 273 | 274 | static audio_element_handle_t create_player_raw_stream(void) 275 | { 276 | raw_stream_cfg_t raw_cfg = RAW_STREAM_CFG_DEFAULT(); 277 | raw_cfg.type = AUDIO_STREAM_READER; 278 | raw_cfg.out_rb_size = 8 * 1024; 279 | return raw_stream_init(&raw_cfg); 280 | } 281 | 282 | static audio_element_handle_t create_player_i2s_stream(void) 283 | { 284 | i2s_stream_cfg_t i2s_cfg = I2S_STREAM_CFG_DEFAULT_WITH_PARA(I2S_NUM_0, I2S_SAMPLE_RATE, ALGORITHM_STREAM_SAMPLE_BIT, AUDIO_STREAM_WRITER); 285 | i2s_cfg.type = AUDIO_STREAM_WRITER; 286 | #ifdef CONFIG_ESP32_S3_KORVO2_V3_BOARD 287 | i2s_cfg.need_expand = (16 != 32); 288 | #endif 289 | i2s_cfg.out_rb_size = 8 * 1024; 290 | i2s_cfg.buffer_len = 1416;//708 291 | i2s_stream_set_channel_type(&i2s_cfg, CHANNEL_FORMAT); 292 | audio_element_handle_t stream = i2s_stream_init(&i2s_cfg); 293 | i2s_stream_set_clk(stream, I2S_SAMPLE_RATE, ALGORITHM_STREAM_SAMPLE_BIT, CHANNEL_NUM); 294 | return stream; 295 | } 296 | 297 | static audio_element_handle_t create_player_decoder_stream(void) 298 | { 299 | #ifdef RTC_DEMO_AUDIO_PIPELINE_CODEC_OPUS 300 | raw_opus_dec_cfg_t opus_dec_cfg = RAW_OPUS_DEC_CONFIG_DEFAULT(); 301 | opus_dec_cfg.enable_frame_length_prefix = true; 302 | opus_dec_cfg.sample_rate = DEC_SAMPLE_RATE; 303 | opus_dec_cfg.channels = 1; 304 | opus_dec_cfg.task_core = 1; 305 | return raw_opus_decoder_init(&opus_dec_cfg); 306 | #elif RTC_DEMO_AUDIO_PIPELINE_CODEC_AAC 307 | aac_decoder_cfg_t aac_dec_cfg = DEFAULT_AAC_DECODER_CONFIG(); 308 | return aac_decoder_init(&aac_dec_cfg); 309 | #elif RTC_DEMO_AUDIO_PIPELINE_CODEC_G711A 310 | g711_decoder_cfg_t g711_dec_cfg = DEFAULT_G711_DECODER_CONFIG(); 311 | g711_dec_cfg.out_rb_size = 8 * 1024; 312 | return g711_decoder_init(&g711_dec_cfg); 313 | #else 314 | return NULL; 315 | #endif 316 | } 317 | 318 | player_pipeline_handle_t player_pipeline_open(void) { 319 | player_pipeline_handle_t player_pipeline = heap_caps_calloc(1, sizeof(player_pipeline_t), MALLOC_CAP_SPIRAM | MALLOC_CAP_DEFAULT); 320 | esp_log_level_set("*", ESP_LOG_WARN); 321 | esp_log_level_set(TAG, ESP_LOG_INFO); 322 | assert(player_pipeline != 0); 323 | 324 | audio_pipeline_cfg_t pipeline_cfg = DEFAULT_AUDIO_PIPELINE_CONFIG(); 325 | player_pipeline->audio_pipeline = audio_pipeline_init(&pipeline_cfg); 326 | mem_assert(pipeline); 327 | 328 | 329 | player_pipeline->raw_writer = create_player_raw_stream(); 330 | audio_pipeline_register(player_pipeline->audio_pipeline, player_pipeline->raw_writer, "raw"); 331 | 332 | player_pipeline->i2s_stream_writer = create_player_i2s_stream(); 333 | audio_pipeline_register(player_pipeline->audio_pipeline, player_pipeline->i2s_stream_writer, "i2s"); 334 | 335 | #ifndef RTC_DEMO_AUDIO_PIPELINE_CODEC_OPUS 336 | player_pipeline->rsp = create_resample_stream(CODEC_SAMPLE_RATE, 1, I2S_SAMPLE_RATE, CHANNEL_NUM); 337 | audio_element_set_output_timeout(player_pipeline->rsp, portMAX_DELAY); 338 | audio_pipeline_register(player_pipeline->audio_pipeline, player_pipeline->rsp, "rsp"); 339 | #endif 340 | 341 | player_pipeline->audio_decoder = create_player_decoder_stream(); 342 | if (player_pipeline->audio_decoder != NULL) { 343 | audio_pipeline_register(player_pipeline->audio_pipeline, player_pipeline->audio_decoder, "dec"); 344 | } 345 | 346 | #if defined (RTC_DEMO_AUDIO_PIPELINE_CODEC_PCM) 347 | const char *link_tag[] = {"raw", "rsp", "i2s"}; 348 | #elif defined(RTC_DEMO_AUDIO_PIPELINE_CODEC_OPUS) 349 | const char *link_tag[] = {"raw", "dec", "i2s"}; 350 | #else 351 | const char *link_tag[] = {"raw", "dec", "rsp", "i2s"}; 352 | #endif 353 | audio_pipeline_link(player_pipeline->audio_pipeline, &link_tag[0], sizeof(link_tag) / sizeof(link_tag[0])); 354 | 355 | return player_pipeline; 356 | } 357 | 358 | 359 | void player_pipeline_run(player_pipeline_handle_t player_pipeline){ 360 | audio_pipeline_run(player_pipeline->audio_pipeline); 361 | }; 362 | 363 | void player_pipeline_close(player_pipeline_handle_t player_pipeline){ 364 | audio_pipeline_stop(player_pipeline->audio_pipeline); 365 | audio_pipeline_wait_for_stop(player_pipeline->audio_pipeline); 366 | audio_pipeline_terminate(player_pipeline->audio_pipeline); 367 | 368 | if (player_pipeline->raw_writer) { 369 | audio_pipeline_unregister(player_pipeline->audio_pipeline, player_pipeline->raw_writer); 370 | audio_element_deinit(player_pipeline->raw_writer); 371 | } 372 | if (player_pipeline->audio_decoder) { 373 | audio_pipeline_unregister(player_pipeline->audio_pipeline, player_pipeline->audio_decoder); 374 | audio_element_deinit(player_pipeline->audio_decoder); 375 | } 376 | if (player_pipeline->rsp) { 377 | audio_pipeline_unregister(player_pipeline->audio_pipeline, player_pipeline->rsp); 378 | audio_element_deinit(player_pipeline->rsp); 379 | } 380 | if (player_pipeline->i2s_stream_writer) { 381 | audio_pipeline_unregister(player_pipeline->audio_pipeline, player_pipeline->i2s_stream_writer); 382 | audio_element_deinit(player_pipeline->i2s_stream_writer); 383 | } 384 | 385 | audio_pipeline_deinit(player_pipeline->audio_pipeline); 386 | heap_caps_free(player_pipeline); 387 | }; 388 | 389 | int player_pipeline_write(player_pipeline_handle_t player_pipeline, char *buffer, int buf_size){ 390 | raw_stream_write(player_pipeline->raw_writer, buffer, buf_size); 391 | return 0; 392 | }; -------------------------------------------------------------------------------- /client/espressif/esp32s3_demo/main/AudioPipeline.h: -------------------------------------------------------------------------------- 1 | // Copyright (2025) Beijing Volcano Engine Technology Ltd. 2 | // SPDX-License-Identifier: MIT 3 | 4 | #ifndef __AUDIO_PIPELINE_H__ 5 | #define __AUDIO_PIPELINE_H__ 6 | 7 | #include 8 | #include 9 | #include 10 | #include "audio_pipeline.h" 11 | 12 | #ifdef __cplusplus 13 | extern "C" { 14 | #endif 15 | 16 | #ifdef CONFIG_AUDIO_CODEC_TYPE_OPUS 17 | #define RTC_DEMO_AUDIO_PIPELINE_CODEC_OPUS 1 18 | #elif defined(CONFIG_AUDIO_CODEC_TYPE_PCM) 19 | #define RTC_DEMO_AUDIO_PIPELINE_CODEC_PCM 1 20 | #elif defined(CONFIG_AUDIO_CODEC_TYPE_G711A) 21 | #define RTC_DEMO_AUDIO_PIPELINE_CODEC_G711A 1 22 | #elif defined(CONFIG_AUDIO_CODEC_TYPE_G722) 23 | #define RTC_DEMO_AUDIO_PIPELINE_CODEC_G722 1 24 | #elif defined(CONFIG_AUDIO_CODEC_TYPE_AAC) 25 | #define RTC_DEMO_AUDIO_PIPELINE_CODEC_AAC 1 26 | #endif 27 | 28 | struct recorder_pipeline_t; 29 | typedef struct recorder_pipeline_t recorder_pipeline_t,*recorder_pipeline_handle_t; 30 | recorder_pipeline_handle_t recorder_pipeline_open(); 31 | void recorder_pipeline_run(recorder_pipeline_handle_t); 32 | void recorder_pipeline_close(recorder_pipeline_handle_t); 33 | int recorder_pipeline_get_default_read_size(recorder_pipeline_handle_t); 34 | int recorder_pipeline_read(recorder_pipeline_handle_t,char *buffer, int buf_size); 35 | 36 | struct player_pipeline_t; 37 | typedef struct player_pipeline_t player_pipeline_t,*player_pipeline_handle_t; 38 | player_pipeline_handle_t player_pipeline_open(); 39 | void player_pipeline_run(player_pipeline_handle_t); 40 | void player_pipeline_close(player_pipeline_handle_t); 41 | int player_pipeline_get_default_read_size(player_pipeline_handle_t); 42 | int player_pipeline_write(player_pipeline_handle_t,char *buffer, int buf_size); 43 | void player_pipeline_write_play_buffer_flag(player_pipeline_handle_t player_pipeline); 44 | 45 | #ifdef __cplusplus 46 | } 47 | #endif 48 | #endif -------------------------------------------------------------------------------- /client/espressif/esp32s3_demo/main/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # Copyright (2025) Beijing Volcano Engine Technology Ltd. 2 | # SPDX-License-Identifier: MIT 3 | 4 | set(COMPONENT_SRCS "VolcRTCDemo.c AudioPipeline.c RtcHttpUtils.c RtcBotUtils.c" ) 5 | set(COMPONENT_ADD_INCLUDEDIRS .) 6 | 7 | register_component() 8 | -------------------------------------------------------------------------------- /client/espressif/esp32s3_demo/main/Kconfig.projbuild: -------------------------------------------------------------------------------- 1 | menu "Example Configuration" 2 | 3 | config WIFI_SSID 4 | string "WiFi SSID" 5 | default "YOUR WIFI SSID" 6 | help 7 | SSID (network name) for the example to connect to. 8 | 9 | config WIFI_PASSWORD 10 | string "WiFi Password" 11 | default "YOUR WIFI PASSWORD" 12 | help 13 | WiFi password (WPA or WPA2) for the example to use. 14 | 15 | Can be left blank if the network has no security set. 16 | 17 | config RTC_APPID 18 | string "RTC_APPID" 19 | default "67582ac8******0174410bd1" 20 | 21 | config AIGENT_SERVER_HOST 22 | string "AIGC SERVER IP:PORT" 23 | default "192.***.***.2:8080" 24 | 25 | choice AUDIO_CODEC_SUPPORT 26 | prompt "Audio Codec" 27 | default AUDIO_CODEC_TYPE_PCM 28 | 29 | config AUDIO_CODEC_TYPE_PCM 30 | bool "audio codec is pcm, use internal audio codec instead" 31 | 32 | config AUDIO_CODEC_TYPE_OPUS 33 | bool "audio codec is opus" 34 | 35 | config AUDIO_CODEC_TYPE_G711A 36 | bool "audio codec is g711a" 37 | 38 | config AUDIO_CODEC_TYPE_G722 39 | bool "audio codec is g722, not support yet" 40 | 41 | config AUDIO_CODEC_TYPE_AACLC 42 | bool "audio codec is aaclc, not support yet" 43 | 44 | endchoice 45 | endmenu 46 | -------------------------------------------------------------------------------- /client/espressif/esp32s3_demo/main/RtcBotUtils.c: -------------------------------------------------------------------------------- 1 | // Copyright (2025) Beijing Volcano Engine Technology Ltd. 2 | // SPDX-License-Identifier: MIT 3 | 4 | #include "RtcBotUtils.h" 5 | #include "RtcHttpUtils.h" 6 | #include "cJSON.h" 7 | #include "esp_log.h" 8 | #include "esp_heap_caps.h" 9 | #include 10 | 11 | static const char *TAG = "RTC_BOT_UTILS"; 12 | 13 | static void *impl_malloc_fn(size_t size) { 14 | uint32_t allocate_caps = 0; 15 | #if CONFIG_PSRAM 16 | allocate_caps = MALLOC_CAP_SPIRAM; 17 | #else 18 | allocate_caps = MALLOC_CAP_INTERNAL; 19 | #endif 20 | return heap_caps_malloc(size, allocate_caps); 21 | } 22 | 23 | static void impl_free_fn(void *ptr) { 24 | heap_caps_free(ptr); 25 | } 26 | 27 | const char* common_headers[] = { 28 | "Content-Type", "application/json", 29 | "Authorization", "af78e30" CONFIG_RTC_APPID, 30 | NULL 31 | }; 32 | 33 | int start_voice_bot(rtc_room_info_t* room_info) { 34 | char post_data[512]; 35 | cJSON *post_jobj = cJSON_CreateObject(); 36 | #ifdef CONFIG_AUDIO_CODEC_TYPE_OPUS 37 | cJSON_AddStringToObject(post_jobj, "audio_codec", "OPUS"); 38 | #elif defined(CONFIG_AUDIO_CODEC_TYPE_PCM) || defined(CONFIG_AUDIO_CODEC_TYPE_G711A) 39 | cJSON_AddStringToObject(post_jobj, "audio_codec", "G711A"); 40 | #elif defined(CONFIG_AUDIO_CODEC_TYPE_G722) 41 | cJSON_AddStringToObject(post_jobj, "audio_codec", "G722"); 42 | #elif defined(CONFIG_AUDIO_CODEC_TYPE_AAC) 43 | cJSON_AddStringToObject(post_jobj, "audio_codec", "AAC"); 44 | #endif 45 | const char* json_str = cJSON_Print(post_jobj); 46 | strcpy(post_data, json_str); 47 | cJSON_Delete(post_jobj); 48 | 49 | rtc_post_config_t post_config = { 50 | .uri = "http://" CONFIG_AIGENT_SERVER_HOST "/startvoicechat", 51 | .headers = common_headers, 52 | .post_data = post_data // 根据需要传入智能体id和音色id 53 | }; 54 | rtc_req_result_t post_result = rtc_http_post(&post_config); 55 | if (post_result.code == 200 && post_result.response != NULL) { 56 | // parse json 57 | cJSON* root = cJSON_Parse(post_result.response); 58 | rtc_request_free(&post_result); 59 | if (root == NULL) { 60 | ESP_LOGE(TAG, "Error parsing JSON"); 61 | return -1; 62 | } 63 | 64 | cJSON* data = cJSON_GetObjectItem(root, "data"); 65 | 66 | if (data == NULL) { 67 | cJSON_Delete(root); 68 | ESP_LOGE(TAG, "Not found data object."); 69 | return -1; 70 | } 71 | 72 | cJSON* app_id_item = cJSON_GetObjectItem(data, "app_id"); 73 | const char* app_id = cJSON_GetStringValue(app_id_item); 74 | strcpy(room_info->app_id, app_id); 75 | 76 | cJSON* uid_item = cJSON_GetObjectItem(data, "uid"); 77 | const char* uid = cJSON_GetStringValue(uid_item); 78 | strcpy(room_info->uid, uid); 79 | 80 | cJSON* room_id_item = cJSON_GetObjectItem(data, "room_id"); 81 | const char* room_id = cJSON_GetStringValue(room_id_item); 82 | strcpy(room_info->room_id, room_id); 83 | 84 | cJSON* task_id_item = cJSON_GetObjectItem(data, "task_id"); 85 | const char* task_id = cJSON_GetStringValue(task_id_item); 86 | strcpy(room_info->task_id, task_id); 87 | 88 | cJSON* bot_uid_item = cJSON_GetObjectItem(data, "bot_uid"); 89 | const char* bot_uid = cJSON_GetStringValue(bot_uid_item); 90 | strcpy(room_info->bot_uid, bot_uid); 91 | 92 | cJSON* token_item = cJSON_GetObjectItem(data, "token"); 93 | const char* token = cJSON_GetStringValue(token_item); 94 | strcpy(room_info->token, token); 95 | 96 | cJSON_Delete(root); 97 | 98 | return 200; 99 | } else { 100 | cJSON* root = cJSON_Parse(post_result.response); 101 | if (root != NULL) { 102 | cJSON* message_item = cJSON_GetObjectItem(root, "message"); 103 | const char* message = cJSON_GetStringValue(message_item); 104 | ESP_LOGE(TAG, "Error: %s", message); 105 | cJSON_Delete(root); 106 | } 107 | return post_result.code; 108 | } 109 | } 110 | 111 | int stop_voice_bot(const rtc_room_info_t* room_info) { 112 | 113 | char post_data[512]; 114 | cJSON *post_jobj = cJSON_CreateObject(); 115 | cJSON_AddStringToObject(post_jobj, "app_id", room_info->app_id); 116 | cJSON_AddStringToObject(post_jobj, "room_id", room_info->room_id); 117 | cJSON_AddStringToObject(post_jobj, "task_id", room_info->task_id); 118 | 119 | const char* json_str = cJSON_Print(post_jobj); 120 | strcpy(post_data, json_str); 121 | cJSON_Delete(post_jobj); 122 | 123 | rtc_post_config_t post_config = { 124 | .uri = "http://" CONFIG_AIGENT_SERVER_HOST "/stopvoicechat", 125 | .headers = common_headers, 126 | .post_data = post_data 127 | }; 128 | rtc_req_result_t post_result = rtc_http_post(&post_config); 129 | if (post_result.code == 200 && post_result.response != NULL) { 130 | // parse json 131 | cJSON* root = cJSON_Parse(post_result.response); 132 | rtc_request_free(&post_result); 133 | if (root == NULL) { 134 | ESP_LOGE(TAG, "Error parsing JSON"); 135 | return -1; 136 | } 137 | 138 | cJSON* data = cJSON_GetObjectItem(root, "data"); 139 | 140 | if (data == NULL) { 141 | cJSON_Delete(root); 142 | ESP_LOGE(TAG, "Not found data object."); 143 | return -1; 144 | } 145 | 146 | 147 | cJSON_Delete(root); 148 | 149 | return 200; 150 | } else { 151 | cJSON* root = cJSON_Parse(post_result.response); 152 | if (root != NULL) { 153 | cJSON* message_item = cJSON_GetObjectItem(root, "message"); 154 | const char* message = cJSON_GetStringValue(message_item); 155 | ESP_LOGE(TAG, "Error: %s", message); 156 | cJSON_Delete(root); 157 | } 158 | return post_result.code; 159 | } 160 | } 161 | 162 | int update_voice_bot(const rtc_room_info_t* room_info, const char* command, const char* message) { 163 | char post_data[1024]; 164 | cJSON *post_jobj = cJSON_CreateObject(); 165 | cJSON_AddStringToObject(post_jobj, "app_id", room_info->app_id); 166 | cJSON_AddStringToObject(post_jobj, "room_id", room_info->room_id); 167 | cJSON_AddStringToObject(post_jobj, "task_id", room_info->task_id); 168 | cJSON_AddStringToObject(post_jobj, "command", command); 169 | if (message) { 170 | cJSON_AddStringToObject(post_jobj, "message", message); 171 | } 172 | 173 | const char* json_str = cJSON_Print(post_jobj); 174 | strcpy(post_data, json_str); 175 | cJSON_Delete(post_jobj); 176 | 177 | 178 | rtc_post_config_t post_config = { 179 | .uri = "http://" CONFIG_AIGENT_SERVER_HOST "/updatevoicechat", 180 | .headers = common_headers, 181 | .post_data = post_data 182 | }; 183 | rtc_req_result_t post_result = rtc_http_post(&post_config); 184 | if (post_result.code == 200 && post_result.response != NULL) { 185 | // parse json 186 | cJSON* root = cJSON_Parse(post_result.response); 187 | rtc_request_free(&post_result); 188 | if (root == NULL) { 189 | ESP_LOGE(TAG, "Error parsing JSON"); 190 | return -1; 191 | } 192 | 193 | cJSON* data = cJSON_GetObjectItem(root, "data"); 194 | 195 | if (data == NULL) { 196 | cJSON_Delete(root); 197 | ESP_LOGE(TAG, "Not found data object."); 198 | return -1; 199 | } 200 | 201 | cJSON_Delete(root); 202 | return 200; 203 | } else { 204 | cJSON* root = cJSON_Parse(post_result.response); 205 | if (root != NULL) { 206 | cJSON* message_item = cJSON_GetObjectItem(root, "message"); 207 | const char* message = cJSON_GetStringValue(message_item); 208 | ESP_LOGE(TAG, "Error: %s", message); 209 | cJSON_Delete(root); 210 | } 211 | return post_result.code; 212 | } 213 | 214 | } 215 | 216 | int interrupt_voice_bot(const rtc_room_info_t* room_info) { 217 | return update_voice_bot(room_info, "interrupt", NULL); 218 | } 219 | 220 | int voice_bot_function_calling(const rtc_room_info_t* room_info, const char* message) { 221 | return update_voice_bot(room_info, "function", message); 222 | } -------------------------------------------------------------------------------- /client/espressif/esp32s3_demo/main/RtcBotUtils.h: -------------------------------------------------------------------------------- 1 | // Copyright (2025) Beijing Volcano Engine Technology Ltd. 2 | // SPDX-License-Identifier: MIT 3 | 4 | #ifndef __RTC_BOT_UTILS_H__ 5 | #define __RTC_BOT_UTILS_H__ 6 | typedef struct { 7 | char room_id[129]; 8 | char uid[129]; 9 | char app_id[25]; 10 | char task_id[129]; 11 | char bot_uid[129]; 12 | char token[257]; 13 | } rtc_room_info_t; 14 | 15 | int start_voice_bot(rtc_room_info_t* room_info); 16 | int stop_voice_bot(const rtc_room_info_t* room_info); 17 | int update_voice_bot(const rtc_room_info_t* room_info, const char* command, const char* message); 18 | int interrupt_voice_bot(const rtc_room_info_t* room_info); 19 | int voice_bot_function_calling(const rtc_room_info_t* room_info, const char* message); 20 | 21 | 22 | #endif // __RTC_BOT_UTILS_H__ -------------------------------------------------------------------------------- /client/espressif/esp32s3_demo/main/RtcHttpUtils.c: -------------------------------------------------------------------------------- 1 | // Copyright (2025) Beijing Volcano Engine Technology Ltd. 2 | // SPDX-License-Identifier: MIT 3 | 4 | #include "RtcHttpUtils.h" 5 | #include "esp_log.h" 6 | #include "freertos/FreeRTOS.h" 7 | #include "freertos/event_groups.h" 8 | #include "esp_http_client.h" 9 | #include 10 | 11 | #define HTTP_FINSH_BIT 1 12 | 13 | static const char *TAG = "RTC_HTTP_UTILS"; 14 | #if CONFIG_PSRAM 15 | static const unsigned int mem_flags = MALLOC_CAP_SPIRAM; 16 | #else 17 | static const unsigned int mem_flags = MALLOC_CAP_INTERNAL; 18 | #endif // CONFIG_PSRAM 19 | 20 | typedef struct { 21 | EventGroupHandle_t http_finish_event; 22 | int output_len; 23 | rtc_req_result_t result; 24 | } rtc_http_post_context_t; 25 | 26 | static esp_err_t _http_event_handler(esp_http_client_event_t *evt) 27 | { 28 | rtc_http_post_context_t *context = (rtc_http_post_context_t *) evt->user_data; 29 | 30 | switch(evt->event_id) { 31 | case HTTP_EVENT_ERROR: 32 | ESP_LOGD(TAG, "HTTP_EVENT_ERROR"); 33 | break; 34 | case HTTP_EVENT_ON_CONNECTED: 35 | ESP_LOGD(TAG, "HTTP_EVENT_ON_CONNECTED"); 36 | break; 37 | case HTTP_EVENT_HEADER_SENT: 38 | ESP_LOGD(TAG, "HTTP_EVENT_HEADER_SENT"); 39 | break; 40 | case HTTP_EVENT_ON_HEADER: 41 | ESP_LOGD(TAG, "HTTP_EVENT_ON_HEADER, key=%s, value=%s", evt->header_key, evt->header_value); 42 | break; 43 | case HTTP_EVENT_ON_DATA: 44 | memcpy(context->result.response + context->output_len, evt->data, evt->data_len); 45 | context->output_len += evt->data_len; 46 | context->result.response[context->output_len] = 0; 47 | break; 48 | case HTTP_EVENT_ON_FINISH: 49 | ESP_LOGD(TAG, "HTTP_EVENT_ON_FINISH"); 50 | xEventGroupSetBits(context->http_finish_event, HTTP_FINSH_BIT); 51 | break; 52 | case HTTP_EVENT_DISCONNECTED: 53 | ESP_LOGD(TAG, "HTTP_EVENT_DISCONNECTED"); 54 | break; 55 | case HTTP_EVENT_REDIRECT: 56 | break; 57 | } 58 | return ESP_OK; 59 | } 60 | 61 | rtc_req_result_t rtc_http_post(rtc_post_config_t* config) { 62 | if (!config || !config->uri || !config->post_data) { 63 | ESP_LOGE(TAG, "Invalid parameters: config"); 64 | } 65 | 66 | rtc_http_post_context_t context = {0}; 67 | context.http_finish_event = xEventGroupCreate(); 68 | if (!context.http_finish_event) { 69 | ESP_LOGE(TAG, "http_finish_event create failed."); 70 | return context.result; 71 | } 72 | context.result.code = 0; 73 | context.result.response = heap_caps_malloc(2048, mem_flags); 74 | if (!context.result.response) { 75 | vEventGroupDelete(context.http_finish_event); 76 | ESP_LOGE(TAG, "http_finish_event create failed."); 77 | return context.result; 78 | } 79 | context.result.response[0] = 0; 80 | 81 | esp_http_client_config_t http_client_config = { 82 | .url = config->uri, 83 | .query = "", 84 | .event_handler = _http_event_handler, 85 | .user_data = &context, 86 | .disable_auto_redirect = true, 87 | }; 88 | 89 | esp_http_client_handle_t client = esp_http_client_init(&http_client_config); 90 | esp_http_client_set_method(client, HTTP_METHOD_POST); 91 | if (config->headers) { 92 | int header_index = 0; 93 | while(config->headers[header_index]) { 94 | esp_http_client_set_header(client, config->headers[header_index], config->headers[header_index + 1]); 95 | header_index += 2; 96 | } 97 | } 98 | esp_http_client_set_post_field(client, config->post_data, strlen(config->post_data)); 99 | esp_err_t err = esp_http_client_perform(client); 100 | if (err != ESP_OK) { 101 | ESP_LOGE(TAG, "request failed: %s", esp_err_to_name(err)); 102 | } 103 | 104 | EventBits_t ux_bits = xEventGroupWaitBits(context.http_finish_event, HTTP_FINSH_BIT , pdTRUE, pdFALSE, pdMS_TO_TICKS(10000)); // wait 10s 105 | if ((ux_bits & HTTP_FINSH_BIT) == 0) { 106 | ESP_LOGE(TAG, "request failed: %s", esp_err_to_name(err)); 107 | } 108 | 109 | context.result.code = esp_http_client_get_status_code(client); 110 | ESP_LOGI(TAG, "context.result.code: %d, context.result.response: %s", context.result.code, context.result.response); 111 | 112 | esp_http_client_cleanup(client); 113 | vEventGroupDelete(context.http_finish_event); 114 | return context.result; 115 | } 116 | 117 | void rtc_request_free(rtc_req_result_t *result) { 118 | if (result && result->response) { 119 | heap_caps_free(result->response); 120 | result->response = NULL; 121 | } 122 | } -------------------------------------------------------------------------------- /client/espressif/esp32s3_demo/main/RtcHttpUtils.h: -------------------------------------------------------------------------------- 1 | // Copyright (2025) Beijing Volcano Engine Technology Ltd. 2 | // SPDX-License-Identifier: MIT 3 | 4 | #ifndef __RTC_HTTP_UTILS_H__ 5 | #define __RTC_HTTP_UTILS_H__ 6 | 7 | typedef struct { 8 | int code; 9 | char* response; 10 | } rtc_req_result_t; 11 | 12 | typedef struct { 13 | const char* uri; 14 | const char** headers; // key1,value1,key2,value2....keyn,valuen,NULL 15 | const char* post_data; 16 | } rtc_post_config_t; 17 | 18 | rtc_req_result_t rtc_http_post(rtc_post_config_t* config); 19 | void rtc_request_free(rtc_req_result_t *result); 20 | 21 | #endif // __RTC_HTTP_UTILS_H__ -------------------------------------------------------------------------------- /client/espressif/esp32s3_demo/main/VolcRTCDemo.c: -------------------------------------------------------------------------------- 1 | // Copyright (2025) Beijing Volcano Engine Technology Ltd. 2 | // SPDX-License-Identifier: MIT 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | 12 | #include "esp_event.h" 13 | #include "esp_log.h" 14 | #include "esp_system.h" 15 | #include "nvs_flash.h" 16 | #include "esp_netif.h" 17 | #include "freertos/FreeRTOS.h" 18 | #include "freertos/task.h" 19 | #include "esp_heap_task_info.h" 20 | #include "esp_random.h" 21 | 22 | #include 23 | #include "freertos/semphr.h" 24 | #include "esp_err.h" 25 | #include "sdkconfig.h" 26 | #include "audio_element.h" 27 | #include "audio_pipeline.h" 28 | #include "audio_event_iface.h" 29 | #include "audio_common.h" 30 | #include "audio_sys.h" 31 | #include "board.h" 32 | #include "esp_peripherals.h" 33 | #include "periph_wifi.h" 34 | #include "fatfs_stream.h" 35 | #include "i2s_stream.h" 36 | #include "AudioPipeline.h" 37 | #include "RtcBotUtils.h" 38 | #include "cJSON.h" 39 | 40 | #define STATS_TASK_PRIO 5 41 | 42 | static const char* TAG = "VolcRTCDemo"; 43 | static bool joined = false; 44 | 45 | typedef struct { 46 | player_pipeline_handle_t player_pipeline; 47 | rtc_room_info_t* room_info; 48 | char remote_uid[128]; 49 | } engine_context_t; 50 | // byte rtc lite callbacks 51 | static void byte_rtc_on_join_room_success(byte_rtc_engine_t engine, const char* channel, int elapsed_ms, bool rejoin) { 52 | ESP_LOGI(TAG, "join channel success %s elapsed %d ms now %d ms\n", channel, elapsed_ms, elapsed_ms); 53 | joined = true; 54 | }; 55 | 56 | static void byte_rtc_on_rejoin_room_success(byte_rtc_engine_t engine, const char* channel, int elapsed_ms){ 57 | // g_byte_rtc_data.channel_joined = TRUE; 58 | ESP_LOGI(TAG, "rejoin channel success %s\n", channel); 59 | }; 60 | 61 | static void byte_rtc_on_user_joined(byte_rtc_engine_t engine, const char* channel, const char* user_name, int elapsed_ms){ 62 | ESP_LOGI(TAG, "remote user joined %s:%s\n", channel, user_name); 63 | engine_context_t* context = (engine_context_t *) byte_rtc_get_user_data(engine); 64 | strcpy(context->remote_uid, user_name); 65 | }; 66 | 67 | static void byte_rtc_on_user_offline(byte_rtc_engine_t engine, const char* channel, const char* user_name, int reason){ 68 | ESP_LOGI(TAG, "remote user offline %s:%s\n", channel, user_name); 69 | }; 70 | 71 | static void byte_rtc_on_user_mute_audio(byte_rtc_engine_t engine, const char* channel, const char* user_name, int muted){ 72 | ESP_LOGI(TAG, "remote user mute audio %s:%s %d\n", channel, user_name, muted); 73 | }; 74 | 75 | static void byte_rtc_on_user_mute_video(byte_rtc_engine_t engine, const char* channel, const char* user_name, int muted){ 76 | ESP_LOGI(TAG, "remote user mute video %s:%s %d\n", channel, user_name, muted); 77 | }; 78 | 79 | static void byte_rtc_on_connection_lost(byte_rtc_engine_t engine, const char* channel){ 80 | ESP_LOGI(TAG, "connection Lost %s\n", channel); 81 | }; 82 | 83 | static void byte_rtc_on_room_error(byte_rtc_engine_t engine, const char* channel, int code, const char* msg){ 84 | ESP_LOGE(TAG, "error occur %s %d %s\n", channel, code, msg?msg:""); 85 | }; 86 | 87 | // remote audio 88 | static void byte_rtc_on_audio_data(byte_rtc_engine_t engine, const char* channel, const char* uid , uint16_t sent_ts, 89 | audio_data_type_e codec, const void* data_ptr, size_t data_len){ 90 | // ESP_LOGI(TAG, "byte_rtc_on_audio_data... len %d\n", data_len); 91 | engine_context_t* context = (engine_context_t *) byte_rtc_get_user_data(engine); 92 | #ifdef RTC_DEMO_AUDIO_PIPELINE_CODEC_OPUS 93 | static char opus_data_cache[1024]; 94 | opus_data_cache[0] = (data_len >> 8) & 0xFF; 95 | opus_data_cache[1] = data_len & 0xFF; 96 | memcpy(opus_data_cache + 2, data_ptr, data_len); 97 | player_pipeline_write(context->player_pipeline, opus_data_cache, data_len + 2); 98 | #endif 99 | player_pipeline_write(context->player_pipeline, data_ptr, data_len); 100 | } 101 | 102 | // remote video 103 | static void byte_rtc_on_video_data(byte_rtc_engine_t engine, const char* channel, const char* uid, uint16_t sent_ts, 104 | video_data_type_e codec, int is_key_frame, 105 | const void * data_ptr, size_t data_len){ 106 | ESP_LOGI(TAG, "byte_rtc_on_video_data... len %d\n", data_len); 107 | } 108 | 109 | // remote message 110 | // 字幕消息 参考https://www.volcengine.com/docs/6348/1337284 111 | static void on_subtitle_message_received(byte_rtc_engine_t engine, const cJSON* root) { 112 | /* 113 | { 114 | "data" : 115 | [ 116 | { 117 | "definite" : false, 118 | "language" : "zh", 119 | "mode" : 1, 120 | "paragraph" : false, 121 | "sequence" : 0, 122 | "text" : "\\u4f60\\u597d", 123 | "userId" : "voiceChat_xxxxx" 124 | } 125 | ], 126 | "type" : "subtitle" 127 | } 128 | */ 129 | cJSON * type_obj = cJSON_GetObjectItem(root, "type"); 130 | if (type_obj != NULL && strcmp("subtitle", cJSON_GetStringValue(type_obj)) == 0) { 131 | cJSON* data_obj_arr = cJSON_GetObjectItem(root, "data"); 132 | cJSON* obji = NULL; 133 | cJSON_ArrayForEach(obji, data_obj_arr) { 134 | cJSON* user_id_obj = cJSON_GetObjectItem(obji, "userId"); 135 | cJSON* text_obj = cJSON_GetObjectItem(obji, "text"); 136 | if (user_id_obj && text_obj) { 137 | ESP_LOGE(TAG, "subtitle:%s:%s", cJSON_GetStringValue(user_id_obj), cJSON_GetStringValue(text_obj)); 138 | } 139 | } 140 | } 141 | } 142 | 143 | // function calling 消息 参考 https://www.volcengine.com/docs/6348/1359441 144 | static void on_function_calling_message_received(byte_rtc_engine_t engine, const cJSON* root, const char* json_str) { 145 | /* 146 | { 147 | "subscriber_user_id" : "", 148 | "tool_calls" : 149 | [ 150 | { 151 | "function" : 152 | { 153 | "arguments" : "{\\"location\\": \\"\\u5317\\u4eac\\u5e02\\"}", 154 | "name" : "get_current_weather" 155 | }, 156 | "id" : "call_py400kek0e3pczrqdxgnb3lo", 157 | "type" : "function" 158 | } 159 | ] 160 | } 161 | */ 162 | // 收到function calling 消息,需要根据具体情况要在服务端处理还是客户端处理 163 | 164 | engine_context_t* context = (engine_context_t *) byte_rtc_get_user_data(engine); 165 | 166 | // 服务端处理: 167 | // voice_bot_function_calling(context->room_info, json_str); 168 | 169 | // 在客户端处理,通过byte_rtc_rts_send_message接口通知智能体 170 | /*cJSON* tool_obj_arr = cJSON_GetObjectItem(root, "tool_calls"); 171 | cJSON* obji = NULL; 172 | cJSON_ArrayForEach(obji, tool_obj_arr) { 173 | cJSON* id_obj = cJSON_GetObjectItem(obji, "id"); 174 | cJSON* function_obj = cJSON_GetObjectItem(obji, "function"); 175 | if (id_obj && function_obj) { 176 | cJSON* arguments_obj = cJSON_GetObjectItem(function_obj, "arguments"); 177 | cJSON* name_obj = cJSON_GetObjectItem(function_obj, "name"); 178 | cJSON* location_obj = cJSON_GetObjectItem(arguments_obj, "arguments"); 179 | const char* func_name = cJSON_GetStringValue(name_obj); 180 | const char* loction = cJSON_GetStringValue(location_obj); 181 | const char* func_id = cJSON_GetStringValue(id_obj); 182 | 183 | if (strcmp(func_name, "get_current_weather") == 0) { 184 | cJSON *fc_obj = cJSON_CreateObject(); 185 | cJSON_AddStringToObject(fc_obj, "ToolCallID", func_id); 186 | cJSON_AddStringToObject(fc_obj, "Content", "今天白天风和日丽,天气晴朗,晚上阵风二级。"); 187 | char *json_string = cJSON_Print(fc_obj); 188 | static char fc_message_buffer[256] = {'f', 'u', 'n', 'c'}; 189 | int json_str_len = strlen(json_string); 190 | fc_message_buffer[4] = (json_str_len >> 24) & 0xff; 191 | fc_message_buffer[5] = (json_str_len >> 16) & 0xff; 192 | fc_message_buffer[6] = (json_str_len >> 8) & 0xff; 193 | fc_message_buffer[7] = (json_str_len >> 0) & 0xff; 194 | memcpy(fc_message_buffer + 8, json_string, json_str_len); 195 | ESP_LOGE(TAG, "send message: %s", json_string); 196 | cJSON_Delete(fc_obj); 197 | 198 | byte_rtc_rts_send_message(engine, context->room_info->room_id, context->remote_uid, fc_message_buffer, json_str_len + 8, 1, RTS_MESSAGE_RELIABLE); 199 | } 200 | } 201 | }*/ 202 | 203 | } 204 | 205 | void on_message_received(byte_rtc_engine_t engine, const char* room, const char* uid, const uint8_t* message, int size, bool binary) { 206 | // 字幕消息,参考https://www.volcengine.com/docs/6348/1337284 207 | // subv|length(4)|json str 208 | // 209 | // function calling 消息,参考https://www.volcengine.com/docs/6348/1359441 210 | // tool|length(4)|json str 211 | 212 | static char message_buffer[4096]; 213 | if (size > 8) { 214 | memcpy(message_buffer, message, size); 215 | message_buffer[size] = 0; 216 | message_buffer[size + 1] = 0; 217 | cJSON *root = cJSON_Parse(message_buffer + 8); 218 | if (root != NULL) { 219 | if (message[0] == 's' && message[1] == 'u' && message[2] == 'b' && message[3] == 'v') { 220 | // 字幕消息 221 | on_subtitle_message_received(engine, root); 222 | } else if (message[0] == 't' && message[1] == 'o' && message[2] == 'o' && message[3] == 'l') { 223 | // function calling 消息 224 | on_function_calling_message_received(engine, root, message_buffer + 8); 225 | } else { 226 | ESP_LOGE(TAG, "unknown json message: %s", message_buffer + 8); 227 | } 228 | cJSON_Delete(root); 229 | } else { 230 | ESP_LOGE(TAG, "unknown message."); 231 | } 232 | } else { 233 | ESP_LOGE(TAG, "unknown message."); 234 | } 235 | } 236 | 237 | static void on_key_frame_gen_req(byte_rtc_engine_t engine, const char* channel, const char* uid) {} 238 | // byte rtc lite callbacks end. 239 | 240 | 241 | static void byte_rtc_task(void *pvParameters) { 242 | rtc_room_info_t* room_info = heap_caps_malloc(sizeof(rtc_room_info_t), MALLOC_CAP_SPIRAM | MALLOC_CAP_8BIT); 243 | // step 1: start ai agent & get room info 244 | int start_ret = start_voice_bot(room_info); 245 | if (start_ret != 200) { 246 | ESP_LOGE(TAG, "Bot start Failed, ret = %d", start_ret); 247 | return; 248 | } 249 | 250 | // step 2: start audio capture & play 251 | recorder_pipeline_handle_t pipeline = recorder_pipeline_open(); 252 | player_pipeline_handle_t player_pipeline = player_pipeline_open(); 253 | recorder_pipeline_run(pipeline); 254 | player_pipeline_run(player_pipeline); 255 | 256 | // step 3: start byte rtc engine 257 | byte_rtc_event_handler_t handler = { 258 | .on_join_room_success = byte_rtc_on_join_room_success, 259 | .on_room_error = byte_rtc_on_room_error, 260 | .on_user_joined = byte_rtc_on_user_joined, 261 | .on_user_offline = byte_rtc_on_user_offline, 262 | .on_user_mute_audio = byte_rtc_on_user_mute_audio, 263 | .on_user_mute_video = byte_rtc_on_user_mute_video, 264 | .on_audio_data = byte_rtc_on_audio_data, 265 | .on_video_data = byte_rtc_on_video_data, 266 | .on_key_frame_gen_req = on_key_frame_gen_req, 267 | .on_message_received = on_message_received, 268 | }; 269 | 270 | byte_rtc_engine_t engine = byte_rtc_create(room_info->app_id, &handler); 271 | byte_rtc_set_log_level(engine, BYTE_RTC_LOG_LEVEL_ERROR); 272 | byte_rtc_set_params(engine, "{\"debug\":{\"log_to_console\":1}}"); 273 | #ifdef RTC_DEMO_AUDIO_PIPELINE_CODEC_PCM 274 | byte_rtc_set_params(engine,"{\"audio\":{\"codec\":{\"internal\":{\"enable\":1}}}}"); 275 | #endif 276 | 277 | byte_rtc_init(engine); 278 | #ifdef CONFIG_AUDIO_CODEC_TYPE_OPUS 279 | byte_rtc_set_audio_codec(engine, AUDIO_CODEC_TYPE_OPUS); 280 | #elif defined(CONFIG_AUDIO_CODEC_TYPE_PCM) || defined(CONFIG_AUDIO_CODEC_TYPE_G711A) 281 | byte_rtc_set_audio_codec(engine, AUDIO_CODEC_TYPE_G711A); 282 | #elif defined(CONFIG_AUDIO_CODEC_TYPE_G722) 283 | byte_rtc_set_audio_codec(engine, AUDIO_CODEC_TYPE_G722); 284 | #elif defined(CONFIG_AUDIO_CODEC_TYPE_AAC) 285 | byte_rtc_set_audio_codec(engine, AUDIO_CODEC_TYPE_AACLC); 286 | #endif 287 | 288 | // byte_rtc_set_video_codec(engine, VIDEO_CODEC_TYPE_H264); // 需要视频功能时设置 289 | 290 | engine_context_t engine_context = { 291 | .player_pipeline = player_pipeline, 292 | .room_info = room_info 293 | }; 294 | byte_rtc_set_user_data(engine, &engine_context); 295 | 296 | // step 4: join room 297 | byte_rtc_room_options_t options; 298 | options.auto_subscribe_audio = 1; // 接收远端音频 299 | options.auto_subscribe_video = 0; // 不接收远端视频 300 | options.auto_publish_audio = 1; // 发送音频 301 | options.auto_publish_video = 0; // 发送视频 302 | byte_rtc_join_room(engine, room_info->room_id, room_info->uid, room_info->token, &options); 303 | 304 | const int DEFAULT_READ_SIZE = recorder_pipeline_get_default_read_size(pipeline); 305 | uint8_t *audio_buffer = heap_caps_malloc(DEFAULT_READ_SIZE, MALLOC_CAP_SPIRAM | MALLOC_CAP_8BIT); 306 | if (!audio_buffer) { 307 | ESP_LOGE(TAG, "Failed to alloc audio buffer!"); 308 | return; 309 | } 310 | 311 | // step 5: start sending audio data 312 | while (true) { 313 | int ret = recorder_pipeline_read(pipeline, (char*) audio_buffer, DEFAULT_READ_SIZE); 314 | if (ret == DEFAULT_READ_SIZE && joined) { 315 | // push_audio data 316 | #ifdef RTC_DEMO_AUDIO_PIPELINE_CODEC_PCM 317 | audio_frame_info_t audio_frame_info = {.data_type = AUDIO_DATA_TYPE_PCM}; 318 | #elif defined(CONFIG_AUDIO_CODEC_TYPE_G711A) 319 | audio_frame_info_t audio_frame_info = {.data_type = AUDIO_DATA_TYPE_PCMA}; 320 | #elif defined(CONFIG_AUDIO_CODEC_TYPE_G722) 321 | audio_frame_info_t audio_frame_info = {.data_type = AUDIO_DATA_TYPE_G722}; 322 | #elif defined(CONFIG_AUDIO_CODEC_TYPE_AAC) 323 | audio_frame_info_t audio_frame_info = {.data_type = AUDIO_DATA_TYPE_AAC}; 324 | #elif defined(CONFIG_AUDIO_CODEC_TYPE_OPUS) 325 | audio_frame_info_t audio_frame_info = {.data_type = AUDIO_DATA_TYPE_OPUS}; 326 | #endif 327 | byte_rtc_send_audio_data(engine, room_info->room_id, audio_buffer, DEFAULT_READ_SIZE, &audio_frame_info); 328 | } 329 | } 330 | 331 | // step 6: leave room and destroy engine 332 | byte_rtc_leave_room(engine, room_info->room_id); 333 | usleep(1000 * 1000); 334 | byte_rtc_fini(engine); 335 | usleep(1000 * 1000); 336 | byte_rtc_destory(engine); 337 | 338 | // step 7: stop ai agent or it will not stop until 3 minutes 339 | stop_voice_bot(room_info); 340 | heap_caps_free(room_info); 341 | 342 | // step 8: stop audio capture & play 343 | recorder_pipeline_close(pipeline); 344 | player_pipeline_close(player_pipeline); 345 | ESP_LOGI(TAG, "............. finished\n"); 346 | } 347 | 348 | void app_main(void) 349 | { 350 | ESP_ERROR_CHECK(nvs_flash_init() ); 351 | ESP_ERROR_CHECK(esp_netif_init()); 352 | 353 | esp_periph_config_t periph_cfg = DEFAULT_ESP_PERIPH_SET_CONFIG(); 354 | esp_periph_set_handle_t set = esp_periph_set_init(&periph_cfg); 355 | 356 | periph_wifi_cfg_t wifi_cfg = { 357 | .wifi_config.sta.ssid = CONFIG_WIFI_SSID, 358 | .wifi_config.sta.password = CONFIG_WIFI_PASSWORD, 359 | }; 360 | esp_periph_handle_t wifi_handle = periph_wifi_init(&wifi_cfg); 361 | esp_periph_start(set, wifi_handle); 362 | periph_wifi_wait_for_connected(wifi_handle, portMAX_DELAY); 363 | 364 | audio_board_handle_t board_handle = audio_board_init(); 365 | audio_hal_ctrl_codec(board_handle->audio_hal, AUDIO_HAL_CODEC_MODE_BOTH, AUDIO_HAL_CTRL_START); 366 | audio_hal_set_volume(board_handle->audio_hal, 80); 367 | ESP_LOGI(TAG, "Starting again!\n"); 368 | 369 | // Allow other core to finish initialization 370 | vTaskDelay(pdMS_TO_TICKS(100)); 371 | 372 | // Create and start stats task 373 | xTaskCreate(&byte_rtc_task, "byte_rtc_task", 8192, NULL, STATS_TASK_PRIO, NULL); 374 | } 375 | -------------------------------------------------------------------------------- /client/espressif/esp32s3_demo/partitions.csv: -------------------------------------------------------------------------------- 1 | # Name, Type, SubType, Offset, Size, Flags 2 | # Note: if you have increased the bootloader size, make sure to update the offsets to avoid overlap 3 | nvs, data, nvs, 0x9000, 0x6000, 4 | phy_init, data, phy, 0xf000, 0x1000, 5 | factory, app, factory, 0x10000, 5M, 6 | coredump, data, coredump,, 64K 7 | storage, data, littlefs, , 2M, 8 | -------------------------------------------------------------------------------- /client/espressif/esp32s3_demo/sdkconfig.defaults: -------------------------------------------------------------------------------- 1 | # 2 | # Serial flasher config 3 | # 4 | CONFIG_ESPTOOLPY_FLASHMODE_QIO=y 5 | CONFIG_ESPTOOLPY_FLASH_SAMPLE_MODE_STR=y 6 | CONFIG_ESPTOOLPY_FLASHFREQ_80M=y 7 | CONFIG_ESPTOOLPY_FLASHFREQ="80m" 8 | CONFIG_ESPTOOLPY_HEADER_FLASHSIZE_UPDATE=y 9 | CONFIG_ESPTOOLPY_FLASHSIZE_8MB=y 10 | # end of Serial flasher config 11 | 12 | 13 | # 14 | # Partition Table 15 | # 16 | # CONFIG_PARTITION_TABLE_SINGLE_APP is not set 17 | # CONFIG_PARTITION_TABLE_SINGLE_APP_LARGE is not set 18 | # CONFIG_PARTITION_TABLE_TWO_OTA is not set 19 | CONFIG_PARTITION_TABLE_CUSTOM=y 20 | CONFIG_PARTITION_TABLE_CUSTOM_FILENAME="partitions.csv" 21 | CONFIG_PARTITION_TABLE_FILENAME="partitions.csv" 22 | CONFIG_PARTITION_TABLE_MD5=y 23 | # end of Partition Table 24 | 25 | # 26 | # Compiler options 27 | # 28 | CONFIG_COMPILER_OPTIMIZATION_PERF=y 29 | 30 | # 31 | # ESP WDT CONFIG 32 | # 33 | # CONFIG_TASK_WDT_PANIC is not set 34 | CONFIG_TASK_WDT_TIMEOUT_S=10 35 | 36 | # 37 | # ESP System Settings 38 | # 39 | CONFIG_ESP_MAIN_TASK_STACK_SIZE=4096 40 | 41 | # 42 | # ESP-TLS 43 | # 44 | CONFIG_ESP_TLS_INSECURE=y 45 | CONFIG_ESP_TLS_SKIP_SERVER_CERT_VERIFY=y 46 | 47 | # 48 | # FREERTOS 49 | # 50 | CONFIG_FREERTOS_ENABLE_BACKWARD_COMPATIBILITY=y 51 | CONFIG_FREERTOS_TIMER_TASK_STACK_DEPTH=4096 52 | CONFIG_FREERTOS_GENERATE_RUN_TIME_STATS=y 53 | CONFIG_FREERTOS_VTASKLIST_INCLUDE_COREID=y 54 | CONFIG_FREERTOS_HZ=1000 55 | -------------------------------------------------------------------------------- /client/espressif/esp32s3_demo/sdkconfig.defaults.esp32s3: -------------------------------------------------------------------------------- 1 | CONFIG_IDF_CMAKE=y 2 | CONFIG_IDF_TARGET_ARCH_XTENSA=y 3 | CONFIG_IDF_TARGET="esp32s3" 4 | CONFIG_IDF_TARGET_ESP32S3=y 5 | CONFIG_IDF_FIRMWARE_CHIP_ID=0x0009 6 | 7 | # 8 | # Make experimental features visible 9 | # 10 | CONFIG_IDF_EXPERIMENTAL_FEATURES=y 11 | 12 | # 13 | # Serial flasher config 14 | # 15 | CONFIG_BOOTLOADER_FLASH_DC_AWARE=y 16 | CONFIG_ESPTOOLPY_FLASHFREQ_120M=y 17 | CONFIG_SPI_FLASH_HPM_ENA=y 18 | # end of Serial flasher config 19 | 20 | # 21 | # Audio HAL 22 | # 23 | CONFIG_ESP32_S3_KORVO2_V3_BOARD=y 24 | # end of Audio HAL 25 | 26 | # 27 | # Audio Recorder 28 | # 29 | CONFIG_AFE_MIC_NUM=2 30 | # end of Audio Recorder 31 | 32 | # 33 | # ESP Speech Recognition 34 | # 35 | CONFIG_MODEL_IN_FLASH=y 36 | CONFIG_AFE_INTERFACE_V1=y 37 | CONFIG_SR_WN_WN9_HILEXIN=n 38 | # end of ESP Speech Recognition 39 | 40 | # 41 | # Component config 42 | # 43 | 44 | # 45 | # Driver configurations 46 | # 47 | 48 | # 49 | # mbedTLS 50 | # 51 | # CONFIG_MBEDTLS_INTERNAL_MEM_ALLOC is not set 52 | CONFIG_MBEDTLS_EXTERNAL_MEM_ALLOC=y 53 | # CONFIG_MBEDTLS_HARDWARE_AES is not set 54 | # CONFIG_MBEDTLS_HARDWARE_SHA is not set 55 | # end of mbedTLS 56 | 57 | 58 | # 59 | # ESP32s3-PSRAM 60 | # 61 | CONFIG_SPIRAM_XIP_FROM_PSRAM=y 62 | 63 | # 64 | # ESP32S3-Specific 65 | # 66 | # CONFIG_ESP32S3_DEFAULT_CPU_FREQ_80 is not set 67 | # CONFIG_ESP32S3_DEFAULT_CPU_FREQ_160 is not set 68 | CONFIG_ESP32S3_DEFAULT_CPU_FREQ_240=y 69 | CONFIG_ESP32S3_DEFAULT_CPU_FREQ_MHZ=240 70 | 71 | # 72 | # Cache config 73 | # 74 | # CONFIG_ESP32S3_INSTRUCTION_CACHE_16KB is not set 75 | CONFIG_ESP32S3_INSTRUCTION_CACHE_32KB=y 76 | CONFIG_ESP32S3_INSTRUCTION_CACHE_SIZE=0x8000 77 | # CONFIG_ESP32S3_INSTRUCTION_CACHE_4WAYS is not set 78 | CONFIG_ESP32S3_INSTRUCTION_CACHE_8WAYS=y 79 | CONFIG_ESP32S3_ICACHE_ASSOCIATED_WAYS=8 80 | CONFIG_ESP32S3_INSTRUCTION_CACHE_LINE_32B=y 81 | CONFIG_ESP32S3_INSTRUCTION_CACHE_LINE_SIZE=32 82 | # CONFIG_ESP32S3_INSTRUCTION_CACHE_WRAP is not set 83 | # CONFIG_ESP32S3_DATA_CACHE_16KB is not set 84 | # CONFIG_ESP32S3_DATA_CACHE_32KB is not set 85 | CONFIG_ESP32S3_DATA_CACHE_64KB=y 86 | CONFIG_ESP32S3_DATA_CACHE_SIZE=0x10000 87 | # CONFIG_ESP32S3_DATA_CACHE_4WAYS is not set 88 | CONFIG_ESP32S3_DATA_CACHE_8WAYS=y 89 | CONFIG_ESP32S3_DCACHE_ASSOCIATED_WAYS=8 90 | # CONFIG_ESP32S3_DATA_CACHE_LINE_32B is not set 91 | CONFIG_ESP32S3_DATA_CACHE_LINE_64B=y 92 | CONFIG_ESP32S3_DATA_CACHE_LINE_SIZE=64 93 | # CONFIG_ESP32S3_DATA_CACHE_WRAP is not set 94 | # end of Cache config 95 | 96 | CONFIG_ESP32S3_SPIRAM_SUPPORT=y 97 | 98 | # 99 | # SPI RAM config 100 | # 101 | CONFIG_SPIRAM_MODE_OCT=y 102 | CONFIG_SPIRAM_TYPE_AUTO=y 103 | CONFIG_SPIRAM_SPEED_120M=y 104 | CONFIG_SPIRAM=y 105 | CONFIG_SPIRAM_BOOT_INIT=y 106 | CONFIG_SPIRAM_USE_MALLOC=y 107 | CONFIG_SPIRAM_MEMTEST=y 108 | CONFIG_SPIRAM_MALLOC_ALWAYSINTERNAL=4096 109 | CONFIG_SPIRAM_TRY_ALLOCATE_WIFI_LWIP=y 110 | CONFIG_SPIRAM_MALLOC_RESERVE_INTERNAL=32768 111 | # end of SPI RAM config 112 | # end of ESP32S3-Specific 113 | -------------------------------------------------------------------------------- /resource/image/tech_support.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/volcengine/rtc-aigc-embedded-demo/9076c6c76592bef14f33b019f9441875ffc5b69c/resource/image/tech_support.png -------------------------------------------------------------------------------- /server/src/AccessToken.py: -------------------------------------------------------------------------------- 1 | # Copyright (2025) Beijing Volcano Engine Technology Ltd. 2 | # SPDX-License-Identifier: MIT 3 | 4 | # token生成代码来自 https://www.volcengine.com/docs/6348/70121 5 | # 其它语言可以通过上面的链接获取 6 | import base64 7 | import hmac 8 | import random 9 | import struct 10 | import time 11 | from hashlib import sha256 12 | from collections import OrderedDict 13 | 14 | VERSION = "001" 15 | VERSION_LENGTH = 3 16 | 17 | APP_ID_LENGTH = 24 18 | 19 | PrivPublishStream = 0 20 | 21 | # not exported, do not use directly 22 | privPublishAudioStream = 1 23 | privPublishVideoStream = 2 24 | privPublishDataStream = 3 25 | 26 | PrivSubscribeStream = 4 27 | 28 | class AccessToken: 29 | # Initializes token struct by required parameters. 30 | def __init__(self, app_id, app_key, room_id, user_id): 31 | random.seed(time.time()) 32 | self.app_id = app_id 33 | self.app_key = app_key 34 | self.room_id = room_id 35 | self.user_id = user_id 36 | self.issued_at = int(time.time()) 37 | self.nonce = random.randint(1, 99999999) 38 | self.expire_at = 0 39 | self.privileges = {} 40 | 41 | # AddPrivilege adds permission for token with an expiration. 42 | def add_privilege(self, privilege, expire_ts): 43 | if self.privileges is None: 44 | self.privileges = {} 45 | 46 | self.privileges[privilege] = expire_ts 47 | if privilege == PrivPublishStream: 48 | self.privileges[privPublishVideoStream] = expire_ts 49 | self.privileges[privPublishAudioStream] = expire_ts 50 | self.privileges[privPublishDataStream] = expire_ts 51 | 52 | # ExpireTime sets token expire time, won't expire by default. 53 | # The token will be invalid after expireTime no matter what privilege's expireTime is. 54 | def expire_time(self, expire_ts): 55 | self.expire_at = expire_ts 56 | 57 | def pack_msg(self): 58 | m = pack_uint32(self.nonce) 59 | m += pack_uint32(self.issued_at) 60 | m += pack_uint32(self.expire_at) 61 | m += pack_string(self.room_id) 62 | m += pack_string(self.user_id) 63 | m += pack_map_uint32(self.privileges) 64 | return m 65 | 66 | # Serialize generates the token string 67 | def serialize(self): 68 | m = self.pack_msg() 69 | signature = hmac.new(self.app_key.encode('utf-8'), m, sha256).digest() 70 | content = pack_bytes(m) + pack_bytes(signature) 71 | 72 | return VERSION + self.app_id + base64.b64encode(content).decode('utf-8') 73 | 74 | # Verify checks if this token valid, called by server side. 75 | def verify(self, key): 76 | if 0 < self.expire_at < int(time.time()): 77 | return False 78 | 79 | self.app_key = key 80 | return hmac.new(self.app_key.encode('utf-8'), self.pack_msg(), sha256).digest() == self.signature 81 | 82 | # Parse retrieves token information from raw string 83 | def parse(raw): 84 | try: 85 | if len(raw) <= VERSION_LENGTH: 86 | return 87 | if raw[:VERSION_LENGTH] != VERSION: 88 | return 89 | 90 | token = AccessToken("", "", "", "") 91 | token.app_id = raw[VERSION_LENGTH:VERSION_LENGTH + APP_ID_LENGTH] 92 | 93 | content_buf = base64.b64decode(raw[VERSION_LENGTH + APP_ID_LENGTH:]) 94 | readbuf = ReadByteBuffer(content_buf) 95 | 96 | msg = readbuf.unpack_bytes() 97 | token.signature = readbuf.unpack_bytes() 98 | 99 | msgbuf = ReadByteBuffer(msg) 100 | token.nonce = msgbuf.unpack_uint32() 101 | token.issued_at = msgbuf.unpack_uint32() 102 | token.expire_at = msgbuf.unpack_uint32() 103 | token.room_id = msgbuf.unpack_string() 104 | token.user_id = msgbuf.unpack_string() 105 | token.privileges = msgbuf.unpack_map_uint32() 106 | return token 107 | 108 | except Exception as e: 109 | print("parse error:", str(e)) 110 | return 111 | 112 | 113 | def pack_uint16(x): 114 | return struct.pack('= 200 and interrupt_speech_duration_client <= 3000: 196 | interrupt_speech_duration = interrupt_speech_duration_client 197 | 198 | 199 | 200 | # 读取客户端传来的 vad_silence_time 201 | vad_silence_time = 600 202 | if "vad_silence_time" in json_obj: 203 | vad_silence_time = int(json_obj["vad_silence_time"]) 204 | if vad_silence_time < 500: 205 | vad_silence_time = 500 206 | elif vad_silence_time >= 3000: 207 | vad_silence_time = 2999 208 | 209 | # 读取客户端传来的 tts_is_bidirection 和 voice_type,设置 tts_provider_params 210 | tts_provider = "volcano" 211 | volcano_tts_config = { 212 | "app" : { 213 | "appid" : TTS_APP_ID, # 语音合成服务的app id 214 | "cluster" : "volcano_tts" # 具体语音合成服务对应的 Cluster ID 215 | }, 216 | "audio" : { 217 | "voice_type" : DEFAULT_VOICE_TYPE, # 音色 id 218 | "speed_ratio" : 1.0, # 语速。 219 | "volume_ratio" : 1.0, # 音量。 220 | "pitch_ratio" : 1.0 # 声调 221 | } 222 | } 223 | 224 | volcano_bi_tts_config = { 225 | "app" : { 226 | "appid" : TTS_APP_ID, # 语音合成服务的app id 227 | "token" : TTS_ACCESS_TOKEN # 语音合成服务的token 228 | }, 229 | "audio" : { 230 | "voice_type" : DEFAULT_VOICE_TYPE, # 音色 id 231 | "pitch_rate" : 0, # 音调 取值范围为 [-12,12]。默认值为 0 232 | "speech_rate" : 0 # 语速。取值范围为[-50,100],100代表2.0倍速,-50代表0.5倍速。默认值为 0 233 | }, 234 | "Additions" : { 235 | "enable_latex_tn" : True, # 是否可以播报 latex公式 236 | "disable_markdown_filter" : True, # 是否关闭 markdown 格式过滤。 237 | "enable_language_detector" : False # 是否自动识别语种。 238 | }, 239 | "ResourceId": "volc.service_type.10029" 240 | } 241 | tts_provider_params = volcano_tts_config 242 | if "tts_is_bidirection" in json_obj: 243 | if json_obj["tts_is_bidirection"] == True: 244 | tts_provider = "volcano_bidirection" 245 | tts_provider_params = volcano_bi_tts_config 246 | else: 247 | tts_provider = "volcano" 248 | tts_provider_params = volcano_tts_config 249 | 250 | if "voice_type" in json_obj: 251 | voice_type = str(json_obj["voice_type"]) 252 | else: 253 | voice_type = DEFAULT_VOICE_TYPE 254 | tts_provider_params["audio"]["voice_type"] = voice_type 255 | 256 | # 读取客户端传来的 llm_prefill 257 | llm_prefill = False 258 | if "llm_prefill" in json_obj and json_obj["llm_prefill"] == True: 259 | llm_prefill = True 260 | 261 | # 读取客户端传来的 disable_rts_subtitle 262 | disable_rts_subtitle = False 263 | if "disable_rts_subtitle" in json_obj and json_obj["disable_rts_subtitle"] == True: 264 | disable_rts_subtitle = True 265 | 266 | # 读取客户端传来的 enable_conversation_state_callback 267 | enable_conversation_state_callback = False 268 | if "enable_conversation_state_callback" in json_obj and json_obj["enable_conversation_state_callback"] == True: 269 | enable_conversation_state_callback = True 270 | 271 | fc_tools = None 272 | if "fc_tools" in json_obj: 273 | fc_tools = json_obj["fc_tools"] 274 | 275 | request_body = { 276 | "AppId" : room_info["app_id"], # RTC App id 277 | "RoomId" : room_info["room_id"], # RTC 房间 id 278 | "TaskId" : room_info["task_id"], # 智能体任务id,你必须对每个智能体任务设定 TaskId,且在后续进行任务更新和结束时也须使用该 TaskId。 279 | "Config" : { 280 | "ASRConfig" : { 281 | "Provider" : "volcano", # 语音识别服务提供商。volcano:火山引擎语音识别。 282 | "ProviderParams" : asr_provider_params, # 参考 VolcanoASRConfig 和 VolcanoLMASRConfig 283 | "VADConfig" : { 284 | "SilenceTime" : vad_silence_time # 人声检查判停时间。停顿时间若高于该值设定时间,则认为一句话结束。取值范围为 [500,3000),单位为 ms,默认值为 600 285 | }, 286 | "VolumeGain" : 0.3, # 音量增益值。降低采集音量,以减少噪音引起的 ASR 错误识别。默认值 1.0,推荐值 0.3 287 | "InterruptConfig" : { 288 | "InterruptSpeechDuration" : interrupt_speech_duration, # 自动打断触发阈值。房间内真人用户持续说话时间达到该参数设定值后,智能体自动停止输出。取值范围为0,[200,3000],单位为 ms,值越大智能体说话越不容易被打断。默认值为 0,表示用户发出声音且包含真实语义时即打断智能体输出。 289 | }, 290 | "TurnDetectionMode" : 0 # 新一轮对话的触发方式。0:服务端检测到完整的一句话后,自动触发新一轮对话。1:收到输入结束信令或说话字幕结果后,你自行决定是否触发新一轮会话。 291 | }, 292 | "TTSConfig" : { 293 | "IgnoreBracketText" : [1, 2, 3, 4, 5], # 非必填, 过滤大模型生成的文本中符号 1:"()" 2:"()", 3:"【】", 4:"[]", 5:"{}".默认不过滤 294 | "Provider" : tts_provider, # TTS 服务供应商 295 | "ProviderParams" : tts_provider_params 296 | }, 297 | "LLMConfig" : { 298 | "Mode" : "ArkV3", # 大模型名称,该参数固定取值: ArkV3 299 | "EndPointId" : end_point_id, # 推理接入点。使用方舟大模型时必填。 300 | "MaxTokens" : 1024, # 非必填,输出文本的最大token数,默认 1024 301 | "Temperature" : 0.1, # 非必填,用于控制生成文本的随机性和创造性,值越大随机性越高。取值范围为(0,1],默认值为 0.1 302 | "TopP" : 0.3, # 非必填,用于控制输出tokens的多样性,值越大输出的tokens类型越丰富。取值范围为(0,1],默认值为 0.3 303 | "SystemMessages" : [ # 非必填,大模型 System 角色预设指令,可用于控制模型输出。 304 | "你是一个语言大模型,你只能接收文本数据。用户的语音通过语音识别服务转换成文本,发送给你。有时候语音识别服务会有错误,你可以根据具体语境判断用户的真实意图,遇到实在理解不了的错误语句,可以引导用户换种方式表达。同样的,你输出的文本会通过语音合成服务转换成音频,然后发送给用户。有多种方式可以打断合成的音频,如果你发现用户不记得你前面一句说的是什么,请不要疑惑。在和用户对话时,请牢记:你的名字是小宁,性格幽默又善解人意。你在表达时需简明扼要,有自己的观点。" 305 | ], 306 | "UserMessages" : [ # 非必填,大模型 User 角色预设 Prompt,可用于增强模型的回复质量,模型回复时会参考此处内容。 307 | "user:\"你是谁\"", 308 | "assistant:\"我是问答助手\"", 309 | "user:\"你能干什么\"", 310 | "user:\"我能回答问题\"" 311 | ], 312 | "Prefill" : llm_prefill, # 非必填, 将 ASR 中间结果提前送入大模型进行处理以降低延时。开启后会产生额外模型消耗。默认值 false 313 | "HistoryLength" : 3, # 非必填,大模型上下文长度,默认 3。 314 | # "Tools" : [...] # 非必填,使用 Function calling 功能时,模型可以调用的工具列表 参考:https://www.volcengine.com/docs/6348/1359441 315 | # "VisionConfig" : {} # 视觉理解能力配置。仅在推理点选择模型为 doubao-vision-pro 和 doubao-vision-lite 时生效。该功能使用说明参看 https://www.volcengine.com/docs/6348/1408245 316 | }, 317 | "SubtitleConfig" : { 318 | "DisableRTSSubtitle" : disable_rts_subtitle, # 非必填,是否关闭房间内字幕回调,默认 false 319 | # "ServerMessageUrl" : "Your url", # 非必填,用于服务端接收字幕回调 320 | # "ServerMessageSignature" : "", # 用于你的服务端字幕回调鉴权 321 | "SubtitleMode" : 0 # 字幕回调时是否需要对齐音频时间戳。0 对齐,1 不对齐。默认 0 322 | }, 323 | "InterruptMode" : 0 # 非必填,智能体对话打断模式。 0: 智能体语音可以被用户语音打断 1: 不能被用户语音打断 324 | # "FunctionCallingConfig" : { # 服务端接收 Function calling 函数工具调用的信息指令配置。 325 | # "ServerMessageUrl" : "Your URL", # 服务端接收 Function calling 函数工具调用的信息指令的 URL 地址。功能使用详情参看 https://www.volcengine.com/docs/6348/1359441#callingconfig 326 | # "ServerMessageSignature" : "" # 鉴权签名。 327 | # } 328 | }, 329 | "AgentConfig" : { 330 | "TargetUserId" : [room_info["uid"]], # 房间内客户端 SDK 用户对应的 UserId。仅支持传入一个 UserId。注意该值是一个数组 331 | "WelcomeMessage" : "你好,有什么可以帮到你的吗", # 智能体启动后的欢迎词。 332 | "UserId" : room_info["bot_uid"], # 智能体的user id 333 | "EnableConversationStateCallback" : enable_conversation_state_callback # 是否接收任务状态变化回调。默认值为 false 334 | } 335 | } 336 | if fc_tools != None and len(fc_tools) > 0 : 337 | request_body["Config"]["LLMConfig"]["Tools"] = fc_tools 338 | 339 | request_body_str = json.dumps(request_body) 340 | canonical_query_string = "Action=%s&Version=%s" % (RTC_API_START_VOICE_CHAT_ACTION, RTC_API_VERSION) 341 | code, response = RtcApiRequester.request_rtc_api(RTC_API_HOST, "POST", "/", canonical_query_string, None, request_body_str, AK, SK) 342 | print("request_rtc_api start code:", code) 343 | print("request_rtc_api start response:", response) 344 | if code == RESPONSE_CODE_SUCCESS: 345 | if "Result" in response and response["Result"] == "ok": 346 | return None 347 | else: 348 | return response["ResponseMetadata"]["Error"]["Message"] 349 | else: 350 | if response != None: 351 | return response["ResponseMetadata"]["Error"]["Message"] 352 | else: 353 | return "request rtc api response code " + str(code) 354 | return None 355 | 356 | ###################################### stop voice chat ####################################### 357 | def stop_voice_chat(self, json_obj): 358 | # 参考 https://www.volcengine.com/docs/6348/1404672 359 | if "room_id" not in json_obj or "task_id" not in json_obj or "app_id" not in json_obj: 360 | self.response_data(RESPONSE_CODE_REQUEST_ERROR, "stop_voice_chat: \"room_id\", \"task_id\", \"app_id\" must be in json") 361 | return 362 | 363 | ret = self.request_stop_voice_chat(json_obj) 364 | if ret == None: 365 | resp_obj = { 366 | "data" : json_obj 367 | } 368 | self.response_data(RESPONSE_CODE_SUCCESS, "", resp_obj) 369 | else: 370 | self.response_data(RESPONSE_CODE_SERVER_ERROR, ret) 371 | 372 | def request_stop_voice_chat(self, json_obj): 373 | # 参考 https://www.volcengine.com/docs/6348/1404672 374 | request_body = { 375 | "AppId" : json_obj["app_id"], # rtc app id 376 | "RoomId" : json_obj["room_id"], # rtc 房间 id 377 | "TaskId" : json_obj["task_id"] # rtc 客户端用户id 378 | } 379 | 380 | request_body_str = json.dumps(request_body) 381 | canonical_query_string = "Action=%s&Version=%s" % (RTC_API_STOP_VOICE_CHAT_ACTION, RTC_API_VERSION) 382 | code, response = RtcApiRequester.request_rtc_api(RTC_API_HOST, "POST", "/", canonical_query_string, None, request_body_str, AK, SK) 383 | print("request_rtc_api stop code:", code) 384 | print("request_rtc_api stop response:", response) 385 | if code == RESPONSE_CODE_SUCCESS: 386 | if "Result" in response and response["Result"] == "ok": 387 | return None 388 | else: 389 | return response["ResponseMetadata"]["Error"]["Message"] 390 | else: 391 | if response != None: 392 | return response["ResponseMetadata"]["Error"]["Message"] 393 | else: 394 | return "request rtc api response code " + str(code) 395 | return None 396 | 397 | ###################################### update voice chat ##################################### 398 | def update_voice_chat(self, json_obj): 399 | # 更新智能体详细信息请参考 https://www.volcengine.com/docs/6348/1404671 400 | if "room_id" not in json_obj or "task_id" not in json_obj or "app_id" not in json_obj or "command" not in json_obj: 401 | self.response_data(RESPONSE_CODE_REQUEST_ERROR, "update_voice_chat: \"room_id\", \"task_id\", \"app_id\", \"command\" must be in json") 402 | return 403 | update_commands = {"interrupt", "function", "external_text_to_speech", "external_prompts_for_llm", "external_text_to_llm", "finish_speech_recognition"} 404 | if json_obj["command"] not in update_commands: 405 | self.response_data(RESPONSE_CODE_REQUEST_ERROR, "update_voice_chat: your command == " + json_obj["command"] + ", command must be in " + str(update_commands)) 406 | return 407 | required_message_commands = {"function", "external_text_to_speech", "external_prompts_for_llm", "external_text_to_llm"} 408 | if json_obj["command"] in required_message_commands and "message" not in json_obj: 409 | self.response_data(RESPONSE_CODE_REQUEST_ERROR, "update_voice_chat: your command == " + json_obj["command"] + ", \"message\" must be in json") 410 | return 411 | 412 | required_interrupt_mode_commands = {"external_text_to_speech", "external_text_to_llm"} 413 | if json_obj["command"] in required_interrupt_mode_commands and "interrupt_mode" not in json_obj: 414 | self.response_data(RESPONSE_CODE_REQUEST_ERROR, "update_voice_chat: your command == " + json_obj["command"] + ", \"interrupt_mode\" must be in json, interrupt_mode == 1, 2, or 3") 415 | return 416 | if "interrupt_mode" in json_obj: 417 | if json_obj["interrupt_mode"] not in {1, 2, 3}: 418 | self.response_data(RESPONSE_CODE_REQUEST_ERROR, "update_voice_chat: your command == " + json_obj["command"] + ", \"interrupt_mode\" must be in json, interrupt_mode == 1, 2, or 3") 419 | return 420 | ret = self.request_update_voice_chat(json_obj) 421 | if ret == None: 422 | resp_obj = { 423 | "data" : json_obj 424 | } 425 | self.response_data(RESPONSE_CODE_SUCCESS, "", resp_obj) 426 | else: 427 | self.response_data(RESPONSE_CODE_SERVER_ERROR, ret) 428 | 429 | def request_update_voice_chat(self, json_obj): 430 | # 参考 https://www.volcengine.com/docs/6348/1404671 431 | update_commands_map = { 432 | "interrupt" : "Interrupt", 433 | "function" : "Function", 434 | "external_text_to_speech" : "ExternalTextToSpeech", 435 | "external_prompts_for_llm" : "ExternalPromptsForLLM", 436 | "external_text_to_llm" : "ExternalTextToLLM", 437 | "finish_speech_recognition" : "FinishSpeechRecognition" 438 | } 439 | parsed_command = update_commands_map[json_obj["command"]] 440 | request_body = { 441 | "AppId" : json_obj["app_id"], # rtc app id 442 | "RoomId" : json_obj["room_id"], # rtc 房间 id 443 | "TaskId" : json_obj["task_id"], # 创建智能体时用的TaskId 444 | "Command" : parsed_command, # 更新指令 interrupt: 打断智能体说话;function:传回工具调用信息指令。 445 | # "Message" : "..." # 工具调用信息指令,格式为 Json 转译字符串。Command 取值为 function时,Message必填。 446 | # "InterruptMode" : 1 # 打断模式。取值范围为 1, 2, 3. 当 command 为 ExternalTextToSpeech 或 ExternalTextToLLM 时为该参数必填。 447 | } 448 | if "interrupt_mode" in json_obj: 449 | request_body["InterruptMode"] = json_obj["interrupt_mode"] 450 | if json_obj["command"] == "function": 451 | # function calling 数据, 参考 https://www.volcengine.com/docs/6348/1359441 452 | # 客户端传来的message数据是一个json字符串,内容如下: 453 | # { 454 | # "subscriber_user_id" : "", 455 | # "tool_calls" : 456 | # [ 457 | # { 458 | # "function" : 459 | # { 460 | # "arguments" : "{\\"location\\": \\"\\u5317\\u4eac\\u5e02\\"}", 461 | # "name" : "get_current_weather" 462 | # }, 463 | # "id" : "call_py400kek0e3pczrqdxgnb3lo", 464 | # "type" : "function" 465 | # } 466 | # ] 467 | # } 468 | 469 | print(json_obj["message"]) 470 | message_json_obj = parse_json(json_obj["message"]) 471 | if message_json_obj == None: 472 | self.response_data(RESPONSE_CODE_REQUEST_ERROR, "Post data is not a json string.") 473 | return 474 | # 下面代码只是示例,要根据实际情况,解析函数名称和参数,做出真实的响应 475 | if "tool_calls" not in message_json_obj or len(message_json_obj["tool_calls"]) <= 0 or "id" not in message_json_obj["tool_calls"][0]: 476 | self.response_data(RESPONSE_CODE_REQUEST_ERROR, "function calling message is error.") 477 | return 478 | message_body = { 479 | "ToolCallID" : message_json_obj["tool_calls"][0]["id"], 480 | "Content" : "今天天气很好,阳光明媚,偶尔有微风。" 481 | } 482 | 483 | request_body["Message"] = json.dumps(message_body) 484 | elif "message" in json_obj: 485 | request_body["Message"] = json_obj["message"] 486 | 487 | request_body_str = json.dumps(request_body) 488 | canonical_query_string = "Action=%s&Version=%s" % (RTC_API_UPDATE_VOICE_CHAT_ACTION, RTC_API_VERSION) 489 | code, response = RtcApiRequester.request_rtc_api(RTC_API_HOST, "POST", "/", canonical_query_string, None, request_body_str, AK, SK) 490 | print("request_rtc_api update code:", code) 491 | print("request_rtc_api update response:", response) 492 | if code == RESPONSE_CODE_SUCCESS: 493 | if "Result" in response and response["Result"] == "ok": 494 | return None 495 | else: 496 | return response["ResponseMetadata"]["Error"]["Message"] 497 | else: 498 | if response != None: 499 | return response["ResponseMetadata"]["Error"]["Message"] 500 | else: 501 | return "request rtc api response code " + str(code) 502 | return None 503 | 504 | 505 | ############################################################################################## 506 | def response_data(self, code, msg, extra_data = None): 507 | self.send_response(code) 508 | self.send_header('Content-type', 'application/json') 509 | self.end_headers() 510 | ret_data = { 511 | "code": code, 512 | "msg" : msg 513 | } 514 | 515 | if extra_data != None: 516 | for k, v in extra_data.items(): 517 | ret_data[k] = v 518 | self.wfile.write(json.dumps(ret_data).encode()) 519 | 520 | 521 | def parse_post_data(self): 522 | # check headers 523 | content_type = self.headers.get("Content-Type") 524 | authorization = self.headers.get("Authorization") 525 | if content_type != "application/json": 526 | self.response_data(RESPONSE_CODE_REQUEST_ERROR, "header Content-Type error, must be application/json.") 527 | return None 528 | if authorization == None or authorization == "": 529 | self.response_data(RESPONSE_CODE_REQUEST_ERROR, "header Authorization error, Authorization not be set.") 530 | return None 531 | if authorization != ("af78e30" + RTC_APP_ID): 532 | self.response_data(RESPONSE_CODE_REQUEST_ERROR, "header Authorization error, Bad Authorization.") 533 | return None 534 | 535 | # check post_data is json 536 | content_length = int(self.headers['Content-Length']) 537 | post_data = self.rfile.read(content_length).decode('utf-8') 538 | json_obj = None 539 | try: 540 | json_obj = json.loads(post_data) 541 | except Exception as e: 542 | self.response_data(RESPONSE_CODE_REQUEST_ERROR, "post data is not json string.") 543 | return None 544 | return json_obj 545 | 546 | 547 | 548 | # 启动服务 549 | with socketserver.TCPServer(("", PORT), RtcAigcHTTPRequestHandler) as httpd: 550 | print("serving at port", PORT) 551 | httpd.serve_forever() 552 | -------------------------------------------------------------------------------- /server/src/RtcApiRequester.py: -------------------------------------------------------------------------------- 1 | # Copyright (2025) Beijing Volcano Engine Technology Ltd. 2 | # SPDX-License-Identifier: MIT 3 | 4 | import datetime 5 | import hashlib 6 | import hmac 7 | import requests 8 | 9 | def hash_sha256(content): 10 | return hashlib.sha256(content.encode("utf-8")).hexdigest() 11 | 12 | def hmac_sha256(key, content): 13 | return hmac.new(key, content.encode("utf-8"), hashlib.sha256).digest() 14 | 15 | def request_rtc_api(http_host, http_request_method, canonical_uri, canonical_query_string, http_headers, http_body, AK, SK): 16 | now = datetime.datetime.utcnow() 17 | 18 | # 步骤1:创建规范请求 19 | x_content_sha256 = hash_sha256(http_body) 20 | x_date = now.strftime("%Y%m%dT%H%M%SZ") 21 | content_type = "application/json" 22 | signed_headers_vec = ( 23 | ("content-type", content_type), 24 | ("host", http_host), 25 | ("x-content-sha256", x_content_sha256), 26 | ("x-date", x_date) 27 | ) 28 | canonical_headers = "\n".join((":".join(x) for x in signed_headers_vec)) + "\n" 29 | signed_headers = ";".join((x[0] for x in signed_headers_vec)) 30 | canonical_request = http_request_method + "\n" + canonical_uri + "\n" + canonical_query_string + "\n" + canonical_headers + "\n" + signed_headers + "\n" + x_content_sha256 31 | 32 | # 步骤2:创建待签字符串 33 | credential_scope = x_date[0:8] + "/cn-north-1/rtc/request" 34 | string_to_sign = "HMAC-SHA256" + "\n" + x_date + "\n" + credential_scope + "\n" + hash_sha256(canonical_request) 35 | 36 | # 步骤3:构建签名 37 | hmac_contents = credential_scope.split("/") 38 | hmac_contents.append(string_to_sign) 39 | 40 | signature = SK.encode("utf-8") 41 | for hmac_content in hmac_contents: 42 | signature = hmac_sha256(signature, hmac_content) 43 | signature = signature.hex() 44 | 45 | # 步骤4:生成Authorization 46 | authorization = "HMAC-SHA256 Credential=%s/%s, SignedHeaders=%s, Signature=%s" % (AK, credential_scope, signed_headers, signature) 47 | 48 | # 步骤5:发起http请求 49 | if canonical_uri == "/": 50 | canonical_uri = "" 51 | url = 'https://' + http_host + canonical_uri + "?" + canonical_query_string 52 | headers = { 53 | "Content-Type" : content_type, 54 | "Host" : http_host, 55 | "X-Content-Sha256": x_content_sha256, 56 | "X-Date": x_date, 57 | "Authorization" : authorization 58 | } 59 | 60 | if http_headers != None: 61 | headers.update(http_headers) 62 | 63 | if http_request_method == "POST": 64 | response = requests.post(url, headers=headers, data=http_body) 65 | else: 66 | response = requests.get(url, headers=headers) 67 | 68 | return (response.status_code, response.json()) 69 | --------------------------------------------------------------------------------