├── .gitignore ├── esp32-m5stack-cores3 ├── src │ ├── idf_component.yml │ ├── rtvi_callbacks.cpp │ ├── CMakeLists.txt │ ├── main.cpp │ ├── main.h │ ├── wifi.cpp │ ├── webrtc.cpp │ ├── rtvi.cpp │ ├── media.cpp │ └── http.cpp ├── partitions.csv ├── sdkconfig.defaults ├── dependencies.lock └── CMakeLists.txt ├── esp32-m5stack-atoms3r ├── src │ ├── idf_component.yml │ ├── rtvi_callbacks.cpp │ ├── CMakeLists.txt │ ├── main.cpp │ ├── main.h │ ├── wifi.cpp │ ├── webrtc.cpp │ ├── rtvi.cpp │ ├── http.cpp │ └── media.cpp ├── partitions.csv ├── sdkconfig.defaults ├── dependencies.lock └── CMakeLists.txt ├── esp32-s3-box-3 ├── src │ ├── idf_component.yml │ ├── rtvi_callbacks.cpp │ ├── CMakeLists.txt │ ├── main.cpp │ ├── main.h │ ├── wifi.cpp │ ├── screen.cpp │ ├── webrtc.cpp │ ├── media.cpp │ ├── rtvi.cpp │ └── http.cpp ├── partitions.csv ├── components │ └── peer │ │ └── CMakeLists.txt ├── sdkconfig.defaults ├── CMakeLists.txt └── dependencies.lock ├── .clang-format ├── .github └── workflows │ ├── clang-format-check.yml │ └── build.yaml ├── .gitmodules ├── LICENSE └── README.md /.gitignore: -------------------------------------------------------------------------------- 1 | build 2 | sdkconfig 3 | sdkconfig.old 4 | managed_components 5 | -------------------------------------------------------------------------------- /esp32-m5stack-cores3/src/idf_component.yml: -------------------------------------------------------------------------------- 1 | dependencies: 2 | idf: 3 | version: ">=4.1.0" 4 | m5stack/m5unified: "^0.2.4" 5 | -------------------------------------------------------------------------------- /esp32-m5stack-atoms3r/src/idf_component.yml: -------------------------------------------------------------------------------- 1 | dependencies: 2 | idf: 3 | version: '>=4.1.0' 4 | espressif/esp_codec_dev: ^1.3.5 5 | -------------------------------------------------------------------------------- /esp32-s3-box-3/src/idf_component.yml: -------------------------------------------------------------------------------- 1 | dependencies: 2 | idf: 3 | version: ">=4.1.0" 4 | espressif/esp-box-3: 5 | version: "^3.0.0~1" 6 | rules: 7 | - if: target in ["esp32s3"] 8 | -------------------------------------------------------------------------------- /esp32-s3-box-3/partitions.csv: -------------------------------------------------------------------------------- 1 | # ESP-IDF Partition Table 2 | # Name, Type, SubType, Offset, Size, Flags 3 | nvs, data, nvs, 0x9000, 0x6000, 4 | phy_init, data, phy, 0xf000, 0x1000, 5 | factory, app, factory, 0x10000, 0x180000, 6 | 7 | -------------------------------------------------------------------------------- /esp32-m5stack-atoms3r/partitions.csv: -------------------------------------------------------------------------------- 1 | # ESP-IDF Partition Table 2 | # Name, Type, SubType, Offset, Size, Flags 3 | nvs, data, nvs, 0x9000, 0x6000, 4 | phy_init, data, phy, 0xf000, 0x1000, 5 | factory, app, factory, 0x10000, 0x180000, 6 | 7 | -------------------------------------------------------------------------------- /esp32-m5stack-cores3/partitions.csv: -------------------------------------------------------------------------------- 1 | # ESP-IDF Partition Table 2 | # Name, Type, SubType, Offset, Size, Flags 3 | nvs, data, nvs, 0x9000, 0x6000, 4 | phy_init, data, phy, 0xf000, 0x1000, 5 | factory, app, factory, 0x10000, 0x180000, 6 | 7 | -------------------------------------------------------------------------------- /.clang-format: -------------------------------------------------------------------------------- 1 | BasedOnStyle: Google 2 | IndentWidth: 2 3 | ColumnLimit: 80 4 | AllowShortFunctionsOnASingleLine: Empty 5 | AllowShortIfStatementsOnASingleLine: false 6 | AllowShortLoopsOnASingleLine: false 7 | BreakBeforeBraces: Attach 8 | DerivePointerAlignment: false 9 | PointerAlignment: Right 10 | -------------------------------------------------------------------------------- /.github/workflows/clang-format-check.yml: -------------------------------------------------------------------------------- 1 | name: clang-format Check 2 | on: [push, pull_request] 3 | jobs: 4 | formatting-check: 5 | name: Formatting Check 6 | runs-on: ubuntu-latest 7 | steps: 8 | - uses: actions/checkout@v4 9 | - name: Run clang-format style check for C/C++/Protobuf programs. 10 | uses: jidicula/clang-format-action@v4.13.0 11 | with: 12 | clang-format-version: '17' 13 | check-path: 'src' 14 | -------------------------------------------------------------------------------- /esp32-m5stack-atoms3r/sdkconfig.defaults: -------------------------------------------------------------------------------- 1 | CONFIG_IDF_TARGET="esp32s3" 2 | 3 | # Enable PSRAM 4 | CONFIG_SPIRAM=y 5 | CONFIG_SPIRAM_MODE_QUAD=n 6 | CONFIG_SPIRAM_MODE_OCT=y 7 | 8 | # Performance 9 | CONFIG_ESP_DEFAULT_CPU_FREQ_MHZ_240=y 10 | CONFIG_FREERTOS_HZ=1000 11 | CONFIG_COMPILER_OPTIMIZATION_PERF=y 12 | 13 | # Enable DTLS 14 | CONFIG_MBEDTLS_SSL_PROTO_DTLS=y 15 | 16 | # Defaults to partitions.csv 17 | CONFIG_PARTITION_TABLE_CUSTOM=y 18 | 19 | # libpeer requires large stack allocations 20 | CONFIG_ESP_MAIN_TASK_STACK_SIZE=16384 21 | -------------------------------------------------------------------------------- /esp32-m5stack-cores3/sdkconfig.defaults: -------------------------------------------------------------------------------- 1 | CONFIG_IDF_TARGET="esp32s3" 2 | 3 | # Faster Flashing 4 | CONFIG_ESPTOOLPY_FLASHMODE_QIO=y 5 | CONFIG_ESPTOOLPY_FLASHSIZE_16MB=y 6 | 7 | # Enable PSRAM 8 | CONFIG_SPIRAM=y 9 | 10 | # Performance 11 | CONFIG_ESP_DEFAULT_CPU_FREQ_MHZ_240=y 12 | CONFIG_FREERTOS_HZ=1000 13 | CONFIG_COMPILER_OPTIMIZATION_PERF=y 14 | 15 | # Enable DTLS 16 | CONFIG_MBEDTLS_SSL_PROTO_DTLS=y 17 | 18 | # Defaults to partitions.csv 19 | CONFIG_PARTITION_TABLE_CUSTOM=y 20 | 21 | # libpeer requires large stack allocations 22 | CONFIG_ESP_MAIN_TASK_STACK_SIZE=16384 23 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "esp32-s3-box-3/components/srtp"] 2 | path = esp32-s3-box-3/components/srtp 3 | url = https://git@github.com/sepfy/esp_ports 4 | [submodule "esp32-s3-box-3/deps/libpeer"] 5 | path = esp32-s3-box-3/deps/libpeer 6 | url = https://github.com/aconchillo/libpeer.git 7 | [submodule "esp32-s3-box-3/components/esp-libopus"] 8 | path = esp32-s3-box-3/components/esp-libopus 9 | url = https://github.com/XasWorks/esp-libopus.git 10 | [submodule "esp32-s3-box-3/components/esp-protocols"] 11 | path = esp32-s3-box-3/components/esp-protocols 12 | url = https://github.com/espressif/esp-protocols.git 13 | -------------------------------------------------------------------------------- /esp32-s3-box-3/src/rtvi_callbacks.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include "main.h" 4 | 5 | static void on_bot_started_speaking() { 6 | // pipecat_screen_new_log(); 7 | } 8 | 9 | static void on_bot_stopped_speaking() { 10 | // pipecat_screen_log("\n"); 11 | } 12 | 13 | static void on_bot_tts_text(const char *text) { 14 | // pipecat_screen_log(text); 15 | // pipecat_screen_log(" "); 16 | } 17 | 18 | rtvi_callbacks_t pipecat_rtvi_callbacks = { 19 | .on_bot_started_speaking = on_bot_started_speaking, 20 | .on_bot_stopped_speaking = on_bot_stopped_speaking, 21 | .on_bot_tts_text = on_bot_tts_text, 22 | }; 23 | -------------------------------------------------------------------------------- /esp32-m5stack-atoms3r/src/rtvi_callbacks.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include "main.h" 4 | 5 | static void on_bot_started_speaking() { 6 | // pipecat_screen_new_log(); 7 | } 8 | 9 | static void on_bot_stopped_speaking() { 10 | // pipecat_screen_log("\n"); 11 | } 12 | 13 | static void on_bot_tts_text(const char *text) { 14 | // pipecat_screen_log(text); 15 | // pipecat_screen_log(" "); 16 | } 17 | 18 | rtvi_callbacks_t pipecat_rtvi_callbacks = { 19 | .on_bot_started_speaking = on_bot_started_speaking, 20 | .on_bot_stopped_speaking = on_bot_stopped_speaking, 21 | .on_bot_tts_text = on_bot_tts_text, 22 | }; 23 | -------------------------------------------------------------------------------- /esp32-m5stack-cores3/src/rtvi_callbacks.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include "main.h" 4 | 5 | static void on_bot_started_speaking() { 6 | // pipecat_screen_new_log(); 7 | } 8 | 9 | static void on_bot_stopped_speaking() { 10 | // pipecat_screen_log("\n"); 11 | } 12 | 13 | static void on_bot_tts_text(const char *text) { 14 | // pipecat_screen_log(text); 15 | // pipecat_screen_log(" "); 16 | } 17 | 18 | rtvi_callbacks_t pipecat_rtvi_callbacks = { 19 | .on_bot_started_speaking = on_bot_started_speaking, 20 | .on_bot_stopped_speaking = on_bot_stopped_speaking, 21 | .on_bot_tts_text = on_bot_tts_text, 22 | }; 23 | -------------------------------------------------------------------------------- /esp32-m5stack-atoms3r/dependencies.lock: -------------------------------------------------------------------------------- 1 | dependencies: 2 | espressif/esp_codec_dev: 3 | component_hash: c71e2d13dad6fc41561590dd88dbc45c79e3f4ef48d5ee3575c60e8b6c8e79d5 4 | dependencies: 5 | - name: idf 6 | require: private 7 | version: '>=4.0' 8 | source: 9 | registry_url: https://components.espressif.com/ 10 | type: service 11 | version: 1.3.5 12 | idf: 13 | source: 14 | type: idf 15 | version: 5.4.1 16 | direct_dependencies: 17 | - espressif/esp_codec_dev 18 | - idf 19 | manifest_hash: c74d03622862bf2a85301c12f34d445e0d5bfc643f05f1b14820503605dd0b55 20 | target: esp32s3 21 | version: 2.0.0 22 | -------------------------------------------------------------------------------- /.github/workflows/build.yaml: -------------------------------------------------------------------------------- 1 | name: Build 2 | on: 3 | push: 4 | branches: 5 | - master 6 | pull_request: 7 | 8 | jobs: 9 | build: 10 | strategy: 11 | matrix: 12 | target: [esp32s3, linux] 13 | 14 | runs-on: ubuntu-latest 15 | 16 | steps: 17 | - name: Checkout repo 18 | uses: actions/checkout@v2 19 | with: 20 | submodules: 'recursive' 21 | 22 | - name: Build 23 | run: | 24 | docker run -v $PWD:/project -w /project -u 0 \ 25 | -e HOME=/tmp -e WIFI_SSID=A -e WIFI_PASSWORD=B -e OPENAI_API_KEY=X \ 26 | espressif/idf:latest \ 27 | /bin/bash -c 'idf.py --preview set-target ${{ matrix.target }} && idf.py build' 28 | shell: bash 29 | -------------------------------------------------------------------------------- /esp32-s3-box-3/components/peer/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | set(PEER_PROJECT_PATH "../../deps/libpeer") 2 | file(GLOB CODES "${PEER_PROJECT_PATH}/src/*.c") 3 | 4 | idf_component_register( 5 | SRCS ${CODES} 6 | INCLUDE_DIRS "${PEER_PROJECT_PATH}/src" 7 | REQUIRES mbedtls srtp json esp_netif 8 | ) 9 | 10 | if(NOT IDF_TARGET STREQUAL linux) 11 | add_definitions("-DESP32 -DCONFIG_USE_LWIP=1 -DCONFIG_AUDIO_BUFFER_SIZE=8096 -DCONFIG_DATA_BUFFER_SIZE=102400 -D__BYTE_ORDER=__LITTLE_ENDIAN") 12 | endif() 13 | 14 | # Force ECDSA, because aiortc doesn't want to use RSA. 15 | # Disable KeepAlives 16 | add_definitions("-DHTTP_DO_NOT_USE_CUSTOM_CONFIG -DMQTT_DO_NOT_USE_CUSTOM_CONFIG -DCONFIG_DTLS_USE_ECDSA=1 -DCONFIG_USE_USRSCTP=0 -DDISABLE_PEER_SIGNALING=0 -DCONFIG_KEEPALIVE_TIMEOUT=0") 17 | -------------------------------------------------------------------------------- /esp32-m5stack-cores3/dependencies.lock: -------------------------------------------------------------------------------- 1 | dependencies: 2 | idf: 3 | source: 4 | type: idf 5 | version: 5.4.1 6 | m5stack/m5gfx: 7 | component_hash: a44875eb6d6577bddcf1669ad5dec35e1afa425b7ca8e99af6b38505f37840e6 8 | dependencies: [] 9 | source: 10 | registry_url: https://components.espressif.com 11 | type: service 12 | version: 0.2.9 13 | m5stack/m5unified: 14 | component_hash: 9851fefc25bc05f8cdeadd4c6118c3300cfefcebf7e8c748dc9ccc2811100de6 15 | dependencies: 16 | - name: m5stack/m5gfx 17 | registry_url: https://components.espressif.com 18 | require: private 19 | version: ^0.2.6 20 | source: 21 | registry_url: https://components.espressif.com/ 22 | type: service 23 | version: 0.2.4 24 | direct_dependencies: 25 | - idf 26 | - m5stack/m5unified 27 | manifest_hash: 6c856b0c15014cd213a8417ee6353debbf5a3f71b049e54dff1b99b6268cff03 28 | target: esp32s3 29 | version: 2.0.0 30 | -------------------------------------------------------------------------------- /esp32-m5stack-atoms3r/src/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | set(COMMON_SRC "webrtc.cpp" "main.cpp" "http.cpp") 2 | 3 | if(IDF_TARGET STREQUAL linux) 4 | idf_component_register( 5 | SRCS ${COMMON_SRC} 6 | REQUIRES peer esp-libopus esp_http_client json) 7 | else() 8 | idf_component_register( 9 | SRCS ${COMMON_SRC} "wifi.cpp" "media.cpp" "rtvi.cpp" "rtvi_callbacks.cpp" 10 | REQUIRES driver esp_wifi nvs_flash peer esp_psram esp-libopus esp_http_client json) 11 | endif() 12 | 13 | idf_component_get_property(lib peer COMPONENT_LIB) 14 | target_compile_options(${lib} PRIVATE -Wno-error=restrict) 15 | target_compile_options(${lib} PRIVATE -Wno-error=stringop-truncation) 16 | 17 | idf_component_get_property(lib srtp COMPONENT_LIB) 18 | target_compile_options(${lib} PRIVATE -Wno-error=incompatible-pointer-types) 19 | 20 | idf_component_get_property(lib esp-libopus COMPONENT_LIB) 21 | target_compile_options(${lib} PRIVATE -Wno-error=maybe-uninitialized) 22 | target_compile_options(${lib} PRIVATE -Wno-error=stringop-overread) 23 | -------------------------------------------------------------------------------- /esp32-m5stack-cores3/src/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | set(COMMON_SRC "webrtc.cpp" "main.cpp" "http.cpp") 2 | 3 | if(IDF_TARGET STREQUAL linux) 4 | idf_component_register( 5 | SRCS ${COMMON_SRC} 6 | REQUIRES peer esp-libopus esp_http_client json) 7 | else() 8 | idf_component_register( 9 | SRCS ${COMMON_SRC} "wifi.cpp" "media.cpp" "rtvi.cpp" "rtvi_callbacks.cpp" 10 | REQUIRES driver esp_wifi nvs_flash peer esp_psram esp-libopus esp_http_client json) 11 | endif() 12 | 13 | idf_component_get_property(lib peer COMPONENT_LIB) 14 | target_compile_options(${lib} PRIVATE -Wno-error=restrict) 15 | target_compile_options(${lib} PRIVATE -Wno-error=stringop-truncation) 16 | 17 | idf_component_get_property(lib srtp COMPONENT_LIB) 18 | target_compile_options(${lib} PRIVATE -Wno-error=incompatible-pointer-types) 19 | 20 | idf_component_get_property(lib esp-libopus COMPONENT_LIB) 21 | target_compile_options(${lib} PRIVATE -Wno-error=maybe-uninitialized) 22 | target_compile_options(${lib} PRIVATE -Wno-error=stringop-overread) 23 | -------------------------------------------------------------------------------- /esp32-s3-box-3/sdkconfig.defaults: -------------------------------------------------------------------------------- 1 | CONFIG_IDF_TARGET="esp32s3" 2 | 3 | # ESP Event Loop on Linux 4 | CONFIG_ESP_EVENT_POST_FROM_ISR=n 5 | CONFIG_ESP_EVENT_POST_FROM_IRAM_ISR=n 6 | 7 | # Disable TLS verification 8 | # Production needs to include specific cert chain you care about 9 | CONFIG_ESP_TLS_INSECURE=y 10 | CONFIG_ESP_TLS_SKIP_SERVER_CERT_VERIFY=y 11 | 12 | # Enable DTLS-SRTP 13 | CONFIG_MBEDTLS_SSL_PROTO_DTLS=y 14 | 15 | # libpeer requires large stack allocations 16 | CONFIG_ESP_MAIN_TASK_STACK_SIZE=16384 17 | 18 | # Defaults to partitions.csv 19 | CONFIG_PARTITION_TABLE_CUSTOM=y 20 | 21 | # Set highest CPU Freq 22 | CONFIG_ESP_DEFAULT_CPU_FREQ_MHZ_240=y 23 | 24 | CONFIG_SPIRAM=y 25 | CONFIG_SPIRAM_MODE_OCT=y 26 | 27 | # Disable Watchdog 28 | # CONFIG_ESP_INT_WDT is not set 29 | # CONFIG_ESP_TASK_WDT_EN is not set 30 | 31 | # Enable Compiler Optimization 32 | CONFIG_COMPILER_OPTIMIZATION_PERF=y 33 | CONFIG_COMPILER_OPTIMIZATION_ASSERTIONS_DISABLE=y 34 | 35 | CONFIG_CODEC_I2C_BACKWARD_COMPATIBLE=n 36 | -------------------------------------------------------------------------------- /esp32-s3-box-3/src/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | set(COMMON_SRC "webrtc.cpp" "main.cpp" "http.cpp") 2 | 3 | if(IDF_TARGET STREQUAL linux) 4 | idf_component_register( 5 | SRCS ${COMMON_SRC} 6 | REQUIRES peer esp-libopus esp_http_client json) 7 | else() 8 | idf_component_register( 9 | SRCS ${COMMON_SRC} "wifi.cpp" "media.cpp" "rtvi.cpp" "rtvi_callbacks.cpp" "screen.cpp" 10 | REQUIRES driver esp_wifi nvs_flash peer esp_psram esp-libopus esp_http_client json lvgl) 11 | endif() 12 | 13 | idf_component_get_property(lib peer COMPONENT_LIB) 14 | target_compile_options(${lib} PRIVATE -Wno-error=restrict) 15 | target_compile_options(${lib} PRIVATE -Wno-error=stringop-truncation) 16 | 17 | idf_component_get_property(lib srtp COMPONENT_LIB) 18 | target_compile_options(${lib} PRIVATE -Wno-error=incompatible-pointer-types) 19 | 20 | idf_component_get_property(lib esp-libopus COMPONENT_LIB) 21 | target_compile_options(${lib} PRIVATE -Wno-error=maybe-uninitialized) 22 | target_compile_options(${lib} PRIVATE -Wno-error=stringop-overread) 23 | -------------------------------------------------------------------------------- /esp32-m5stack-atoms3r/src/main.cpp: -------------------------------------------------------------------------------- 1 | #include "main.h" 2 | 3 | #include 4 | #include 5 | #include 6 | 7 | #ifndef LINUX_BUILD 8 | #include "nvs_flash.h" 9 | 10 | extern "C" void app_main(void) { 11 | esp_err_t ret = nvs_flash_init(); 12 | if (ret == ESP_ERR_NVS_NO_FREE_PAGES || 13 | ret == ESP_ERR_NVS_NEW_VERSION_FOUND) { 14 | ESP_ERROR_CHECK(nvs_flash_erase()); 15 | ret = nvs_flash_init(); 16 | } 17 | ESP_ERROR_CHECK(ret); 18 | 19 | ESP_ERROR_CHECK(esp_event_loop_create_default()); 20 | peer_init(); 21 | pipecat_init_audio_capture(); 22 | pipecat_init_audio_decoder(); 23 | pipecat_init_wifi(); 24 | pipecat_init_webrtc(); 25 | 26 | while (1) { 27 | pipecat_webrtc_loop(); 28 | vTaskDelay(pdMS_TO_TICKS(TICK_INTERVAL)); 29 | } 30 | } 31 | #else 32 | int main(void) { 33 | ESP_ERROR_CHECK(esp_event_loop_create_default()); 34 | peer_init(); 35 | pipecat_webrtc(); 36 | 37 | while (1) { 38 | pipecat_webrtc_loop(); 39 | vTaskDelay(pdMS_TO_TICKS(TICK_INTERVAL)); 40 | } 41 | } 42 | #endif 43 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2025 Daily 4 | Copyright (c) 2024 OpenAI 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 7 | 8 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 9 | 10 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 11 | -------------------------------------------------------------------------------- /esp32-s3-box-3/src/main.cpp: -------------------------------------------------------------------------------- 1 | #include "main.h" 2 | 3 | #include 4 | #include 5 | #include 6 | 7 | #ifndef LINUX_BUILD 8 | #include "nvs_flash.h" 9 | 10 | extern "C" void app_main(void) { 11 | esp_err_t ret = nvs_flash_init(); 12 | if (ret == ESP_ERR_NVS_NO_FREE_PAGES || 13 | ret == ESP_ERR_NVS_NEW_VERSION_FOUND) { 14 | ESP_ERROR_CHECK(nvs_flash_erase()); 15 | ret = nvs_flash_init(); 16 | } 17 | ESP_ERROR_CHECK(ret); 18 | 19 | ESP_ERROR_CHECK(esp_event_loop_create_default()); 20 | pipecat_init_screen(); 21 | peer_init(); 22 | pipecat_init_audio_capture(); 23 | pipecat_init_audio_decoder(); 24 | pipecat_init_wifi(); 25 | pipecat_init_webrtc(); 26 | 27 | pipecat_screen_system_log("Pipecat ESP32 client initialized\n"); 28 | 29 | while (1) { 30 | pipecat_webrtc_loop(); 31 | vTaskDelay(pdMS_TO_TICKS(TICK_INTERVAL)); 32 | } 33 | } 34 | #else 35 | int main(void) { 36 | ESP_ERROR_CHECK(esp_event_loop_create_default()); 37 | peer_init(); 38 | pipecat_webrtc(); 39 | 40 | while (1) { 41 | pipecat_webrtc_loop(); 42 | vTaskDelay(pdMS_TO_TICKS(TICK_INTERVAL)); 43 | } 44 | } 45 | #endif 46 | -------------------------------------------------------------------------------- /esp32-m5stack-cores3/src/main.cpp: -------------------------------------------------------------------------------- 1 | #include "main.h" 2 | 3 | #include 4 | #include 5 | #include 6 | 7 | #ifndef LINUX_BUILD 8 | #include "nvs_flash.h" 9 | 10 | extern "C" void app_main(void) { 11 | esp_err_t ret = nvs_flash_init(); 12 | if (ret == ESP_ERR_NVS_NO_FREE_PAGES || 13 | ret == ESP_ERR_NVS_NEW_VERSION_FOUND) { 14 | ESP_ERROR_CHECK(nvs_flash_erase()); 15 | ret = nvs_flash_init(); 16 | } 17 | ESP_ERROR_CHECK(ret); 18 | 19 | auto cfg = M5.config(); 20 | M5.begin(cfg); 21 | 22 | M5.Display.setBrightness(70); 23 | M5.Display.setTextSize(1.5); 24 | M5.Display.fillScreen(BLACK); 25 | M5.Display.println("Pipecat ESP32 client initialized\n"); 26 | 27 | ESP_ERROR_CHECK(esp_event_loop_create_default()); 28 | peer_init(); 29 | pipecat_init_audio_capture(); 30 | pipecat_init_audio_decoder(); 31 | pipecat_init_wifi(); 32 | pipecat_init_webrtc(); 33 | 34 | while (1) { 35 | pipecat_webrtc_loop(); 36 | vTaskDelay(pdMS_TO_TICKS(TICK_INTERVAL)); 37 | } 38 | } 39 | #else 40 | int main(void) { 41 | ESP_ERROR_CHECK(esp_event_loop_create_default()); 42 | peer_init(); 43 | pipecat_webrtc(); 44 | 45 | while (1) { 46 | pipecat_webrtc_loop(); 47 | vTaskDelay(pdMS_TO_TICKS(TICK_INTERVAL)); 48 | } 49 | } 50 | #endif 51 | -------------------------------------------------------------------------------- /esp32-s3-box-3/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.19) 2 | 3 | if(NOT IDF_TARGET STREQUAL linux) 4 | if(NOT DEFINED ENV{WIFI_SSID} OR NOT DEFINED ENV{WIFI_PASSWORD}) 5 | message(FATAL_ERROR "Env variables WIFI_SSID and WIFI_PASSWORD must be set") 6 | endif() 7 | 8 | add_compile_definitions(WIFI_SSID="$ENV{WIFI_SSID}") 9 | add_compile_definitions(WIFI_PASSWORD="$ENV{WIFI_PASSWORD}") 10 | endif() 11 | 12 | if(NOT DEFINED ENV{PIPECAT_SMALLWEBRTC_URL}) 13 | message(FATAL_ERROR "Env variable PIPECAT_SMALLWEBRTC_URL must be set") 14 | endif() 15 | 16 | if(DEFINED ENV{LOG_DATACHANNEL_MESSAGES}) 17 | add_compile_definitions(LOG_DATACHANNEL_MESSAGES="1") 18 | endif() 19 | 20 | add_compile_definitions(PIPECAT_SMALLWEBRTC_URL="$ENV{PIPECAT_SMALLWEBRTC_URL}") 21 | 22 | set(COMPONENTS src) 23 | set(EXTRA_COMPONENT_DIRS "src" "components/srtp" "components/peer" "components/esp-libopus") 24 | 25 | if(IDF_TARGET STREQUAL linux) 26 | add_compile_definitions(LINUX_BUILD=1) 27 | list(APPEND EXTRA_COMPONENT_DIRS 28 | $ENV{IDF_PATH}/examples/protocols/linux_stubs/esp_stubs 29 | "components/esp-protocols/common_components/linux_compat/esp_timer" 30 | "components/esp-protocols/common_components/linux_compat/freertos" 31 | ) 32 | endif() 33 | 34 | include($ENV{IDF_PATH}/tools/cmake/project.cmake) 35 | project(src) 36 | -------------------------------------------------------------------------------- /esp32-s3-box-3/src/main.h: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #define LOG_TAG "pipecat" 4 | #define MAX_HTTP_OUTPUT_BUFFER 4096 5 | #define HTTP_TIMEOUT_MS 10000 6 | #define TICK_INTERVAL 15 7 | 8 | // Wifi 9 | extern void pipecat_init_wifi(); 10 | 11 | // WebRTC / Media 12 | extern void pipecat_init_audio_capture(); 13 | extern void pipecat_init_audio_decoder(); 14 | extern void pipecat_init_audio_encoder(); 15 | extern void pipecat_send_audio(PeerConnection *peer_connection); 16 | extern void pipecat_audio_decode(uint8_t *data, size_t size); 17 | 18 | // WebRTC / Signalling 19 | extern void pipecat_init_webrtc(); 20 | extern void pipecat_webrtc_loop(); 21 | extern void pipecat_http_request(char *offer, char *answer); 22 | 23 | // RTVI 24 | typedef struct { 25 | void (*on_bot_started_speaking)(); 26 | void (*on_bot_stopped_speaking)(); 27 | void (*on_bot_tts_text)(const char *text); 28 | } rtvi_callbacks_t; 29 | 30 | extern rtvi_callbacks_t pipecat_rtvi_callbacks; 31 | 32 | extern void pipecat_init_rtvi(PeerConnection *peer_connection, rtvi_callbacks_t *callbacks); 33 | extern void pipecat_rtvi_send_client_ready(); 34 | extern void pipecat_rtvi_handle_message(const char* msg); 35 | 36 | // Screen 37 | extern void pipecat_init_screen(); 38 | extern void pipecat_screen_system_log(const char *text); 39 | extern void pipecat_screen_new_log(); 40 | extern void pipecat_screen_log(const char *text); 41 | -------------------------------------------------------------------------------- /esp32-m5stack-atoms3r/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.19) 2 | 3 | if(NOT IDF_TARGET STREQUAL linux) 4 | if(NOT DEFINED ENV{WIFI_SSID} OR NOT DEFINED ENV{WIFI_PASSWORD}) 5 | message(FATAL_ERROR "Env variables WIFI_SSID and WIFI_PASSWORD must be set") 6 | endif() 7 | 8 | add_compile_definitions(WIFI_SSID="$ENV{WIFI_SSID}") 9 | add_compile_definitions(WIFI_PASSWORD="$ENV{WIFI_PASSWORD}") 10 | endif() 11 | 12 | if(NOT DEFINED ENV{PIPECAT_SMALLWEBRTC_URL}) 13 | message(FATAL_ERROR "Env variable PIPECAT_SMALLWEBRTC_URL must be set") 14 | endif() 15 | 16 | if(DEFINED ENV{LOG_DATACHANNEL_MESSAGES}) 17 | add_compile_definitions(LOG_DATACHANNEL_MESSAGES="1") 18 | endif() 19 | 20 | add_compile_definitions(PIPECAT_SMALLWEBRTC_URL="$ENV{PIPECAT_SMALLWEBRTC_URL}") 21 | 22 | set(COMPONENTS src) 23 | set(EXTRA_COMPONENT_DIRS "src" "../esp32-s3-box-3/components/srtp" "../esp32-s3-box-3/components/peer" "../esp32-s3-box-3/components/esp-libopus") 24 | 25 | if(IDF_TARGET STREQUAL linux) 26 | add_compile_definitions(LINUX_BUILD=1) 27 | list(APPEND EXTRA_COMPONENT_DIRS 28 | $ENV{IDF_PATH}/examples/protocols/linux_stubs/esp_stubs 29 | "components/esp-protocols/common_components/linux_compat/esp_timer" 30 | "components/esp-protocols/common_components/linux_compat/freertos" 31 | ) 32 | endif() 33 | 34 | include($ENV{IDF_PATH}/tools/cmake/project.cmake) 35 | project(src) 36 | -------------------------------------------------------------------------------- /esp32-m5stack-atoms3r/src/main.h: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #define LOG_TAG "pipecat" 4 | #define MAX_HTTP_OUTPUT_BUFFER 4096 5 | #define HTTP_TIMEOUT_MS 10000 6 | #define TICK_INTERVAL 15 7 | 8 | // Wifi 9 | extern void pipecat_init_wifi(); 10 | 11 | // WebRTC / Media 12 | extern void pipecat_init_audio_capture(); 13 | extern void pipecat_init_audio_decoder(); 14 | extern void pipecat_init_audio_encoder(); 15 | extern void pipecat_send_audio(PeerConnection *peer_connection); 16 | extern void pipecat_audio_decode(uint8_t *data, size_t size); 17 | 18 | // WebRTC / Signalling 19 | extern void pipecat_init_webrtc(); 20 | extern void pipecat_webrtc_loop(); 21 | extern void pipecat_http_request(char *offer, char *answer); 22 | 23 | // RTVI 24 | typedef struct { 25 | void (*on_bot_started_speaking)(); 26 | void (*on_bot_stopped_speaking)(); 27 | void (*on_bot_tts_text)(const char *text); 28 | } rtvi_callbacks_t; 29 | 30 | extern rtvi_callbacks_t pipecat_rtvi_callbacks; 31 | 32 | extern void pipecat_init_rtvi(PeerConnection *peer_connection, rtvi_callbacks_t *callbacks); 33 | extern void pipecat_rtvi_send_client_ready(); 34 | extern void pipecat_rtvi_handle_message(const char* msg); 35 | 36 | // Screen 37 | extern void pipecat_init_screen(); 38 | extern void pipecat_screen_system_log(const char *text); 39 | extern void pipecat_screen_new_log(); 40 | extern void pipecat_screen_log(const char *text); 41 | -------------------------------------------------------------------------------- /esp32-m5stack-cores3/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.19) 2 | 3 | if(NOT IDF_TARGET STREQUAL linux) 4 | if(NOT DEFINED ENV{WIFI_SSID} OR NOT DEFINED ENV{WIFI_PASSWORD}) 5 | message(FATAL_ERROR "Env variables WIFI_SSID and WIFI_PASSWORD must be set") 6 | endif() 7 | 8 | add_compile_definitions(WIFI_SSID="$ENV{WIFI_SSID}") 9 | add_compile_definitions(WIFI_PASSWORD="$ENV{WIFI_PASSWORD}") 10 | endif() 11 | 12 | if(NOT DEFINED ENV{PIPECAT_SMALLWEBRTC_URL}) 13 | message(FATAL_ERROR "Env variable PIPECAT_SMALLWEBRTC_URL must be set") 14 | endif() 15 | 16 | if(DEFINED ENV{LOG_DATACHANNEL_MESSAGES}) 17 | add_compile_definitions(LOG_DATACHANNEL_MESSAGES="1") 18 | endif() 19 | 20 | add_compile_definitions(PIPECAT_SMALLWEBRTC_URL="$ENV{PIPECAT_SMALLWEBRTC_URL}") 21 | 22 | set(COMPONENTS src) 23 | set(EXTRA_COMPONENT_DIRS "src" "../esp32-s3-box-3/components/srtp" "../esp32-s3-box-3/components/peer" "../esp32-s3-box-3/components/esp-libopus") 24 | 25 | if(IDF_TARGET STREQUAL linux) 26 | add_compile_definitions(LINUX_BUILD=1) 27 | list(APPEND EXTRA_COMPONENT_DIRS 28 | $ENV{IDF_PATH}/examples/protocols/linux_stubs/esp_stubs 29 | "components/esp-protocols/common_components/linux_compat/esp_timer" 30 | "components/esp-protocols/common_components/linux_compat/freertos" 31 | ) 32 | endif() 33 | 34 | include($ENV{IDF_PATH}/tools/cmake/project.cmake) 35 | project(src) 36 | -------------------------------------------------------------------------------- /esp32-m5stack-cores3/src/main.h: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #ifndef LINUX_BUILD 4 | #include 5 | #endif 6 | 7 | #define LOG_TAG "pipecat" 8 | #define MAX_HTTP_OUTPUT_BUFFER 4096 9 | #define HTTP_TIMEOUT_MS 10000 10 | #define TICK_INTERVAL 15 11 | 12 | // Wifi 13 | extern void pipecat_init_wifi(); 14 | 15 | // WebRTC / Media 16 | extern void pipecat_init_audio_capture(); 17 | extern void pipecat_init_audio_decoder(); 18 | extern void pipecat_init_audio_encoder(); 19 | extern void pipecat_send_audio(PeerConnection *peer_connection); 20 | extern void pipecat_audio_decode(uint8_t *data, size_t size); 21 | 22 | // WebRTC / Signalling 23 | extern void pipecat_init_webrtc(); 24 | extern void pipecat_webrtc_loop(); 25 | extern void pipecat_http_request(char *offer, char *answer); 26 | 27 | // RTVI 28 | typedef struct { 29 | void (*on_bot_started_speaking)(); 30 | void (*on_bot_stopped_speaking)(); 31 | void (*on_bot_tts_text)(const char *text); 32 | } rtvi_callbacks_t; 33 | 34 | extern rtvi_callbacks_t pipecat_rtvi_callbacks; 35 | 36 | extern void pipecat_init_rtvi(PeerConnection *peer_connection, rtvi_callbacks_t *callbacks); 37 | extern void pipecat_rtvi_send_client_ready(); 38 | extern void pipecat_rtvi_handle_message(const char* msg); 39 | 40 | // Screen 41 | extern void pipecat_init_screen(); 42 | extern void pipecat_screen_system_log(const char *text); 43 | extern void pipecat_screen_new_log(); 44 | extern void pipecat_screen_log(const char *text); 45 | -------------------------------------------------------------------------------- /esp32-s3-box-3/src/wifi.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | #include "main.h" 10 | 11 | static bool g_wifi_connected = false; 12 | 13 | static void pipecat_event_handler(void *arg, esp_event_base_t event_base, 14 | int32_t event_id, void *event_data) { 15 | static int s_retry_num = 0; 16 | if (event_base == WIFI_EVENT && event_id == WIFI_EVENT_STA_DISCONNECTED) { 17 | if (s_retry_num < 5) { 18 | esp_wifi_connect(); 19 | s_retry_num++; 20 | ESP_LOGI(LOG_TAG, "retry to connect to the AP"); 21 | } 22 | ESP_LOGI(LOG_TAG, "connect to the AP fail"); 23 | } else if (event_base == IP_EVENT && event_id == IP_EVENT_STA_GOT_IP) { 24 | ip_event_got_ip_t *event = (ip_event_got_ip_t *)event_data; 25 | ESP_LOGI(LOG_TAG, "got ip:" IPSTR, IP2STR(&event->ip_info.ip)); 26 | g_wifi_connected = true; 27 | } 28 | } 29 | 30 | void pipecat_init_wifi() { 31 | ESP_ERROR_CHECK(esp_event_handler_register(WIFI_EVENT, ESP_EVENT_ANY_ID, 32 | &pipecat_event_handler, NULL)); 33 | ESP_ERROR_CHECK(esp_event_handler_register(IP_EVENT, IP_EVENT_STA_GOT_IP, 34 | &pipecat_event_handler, NULL)); 35 | 36 | ESP_ERROR_CHECK(esp_netif_init()); 37 | esp_netif_t *sta_netif = esp_netif_create_default_wifi_sta(); 38 | assert(sta_netif); 39 | 40 | wifi_init_config_t cfg = WIFI_INIT_CONFIG_DEFAULT(); 41 | ESP_ERROR_CHECK(esp_wifi_init(&cfg)); 42 | ESP_ERROR_CHECK(esp_wifi_set_mode(WIFI_MODE_STA)); 43 | ESP_ERROR_CHECK(esp_wifi_start()); 44 | 45 | ESP_LOGI(LOG_TAG, "Connecting to WiFi SSID: %s", WIFI_SSID); 46 | wifi_config_t wifi_config; 47 | memset(&wifi_config, 0, sizeof(wifi_config)); 48 | strncpy((char *)wifi_config.sta.ssid, (char *)WIFI_SSID, 49 | sizeof(wifi_config.sta.ssid)); 50 | strncpy((char *)wifi_config.sta.password, (char *)WIFI_PASSWORD, 51 | sizeof(wifi_config.sta.password)); 52 | 53 | ESP_ERROR_CHECK(esp_wifi_set_config( 54 | static_cast(ESP_IF_WIFI_STA), &wifi_config)); 55 | ESP_ERROR_CHECK(esp_wifi_connect()); 56 | 57 | // block until we get an IP address 58 | while (!g_wifi_connected) { 59 | vTaskDelay(pdMS_TO_TICKS(200)); 60 | } 61 | } 62 | -------------------------------------------------------------------------------- /esp32-m5stack-atoms3r/src/wifi.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | #include "main.h" 10 | 11 | static bool g_wifi_connected = false; 12 | 13 | static void pipecat_event_handler(void *arg, esp_event_base_t event_base, 14 | int32_t event_id, void *event_data) { 15 | static int s_retry_num = 0; 16 | if (event_base == WIFI_EVENT && event_id == WIFI_EVENT_STA_DISCONNECTED) { 17 | if (s_retry_num < 5) { 18 | esp_wifi_connect(); 19 | s_retry_num++; 20 | ESP_LOGI(LOG_TAG, "retry to connect to the AP"); 21 | } 22 | ESP_LOGI(LOG_TAG, "connect to the AP fail"); 23 | } else if (event_base == IP_EVENT && event_id == IP_EVENT_STA_GOT_IP) { 24 | ip_event_got_ip_t *event = (ip_event_got_ip_t *)event_data; 25 | ESP_LOGI(LOG_TAG, "got ip:" IPSTR, IP2STR(&event->ip_info.ip)); 26 | g_wifi_connected = true; 27 | } 28 | } 29 | 30 | void pipecat_init_wifi() { 31 | ESP_ERROR_CHECK(esp_event_handler_register(WIFI_EVENT, ESP_EVENT_ANY_ID, 32 | &pipecat_event_handler, NULL)); 33 | ESP_ERROR_CHECK(esp_event_handler_register(IP_EVENT, IP_EVENT_STA_GOT_IP, 34 | &pipecat_event_handler, NULL)); 35 | 36 | ESP_ERROR_CHECK(esp_netif_init()); 37 | esp_netif_t *sta_netif = esp_netif_create_default_wifi_sta(); 38 | assert(sta_netif); 39 | 40 | wifi_init_config_t cfg = WIFI_INIT_CONFIG_DEFAULT(); 41 | ESP_ERROR_CHECK(esp_wifi_init(&cfg)); 42 | ESP_ERROR_CHECK(esp_wifi_set_mode(WIFI_MODE_STA)); 43 | ESP_ERROR_CHECK(esp_wifi_start()); 44 | 45 | ESP_LOGI(LOG_TAG, "Connecting to WiFi SSID: %s", WIFI_SSID); 46 | wifi_config_t wifi_config; 47 | memset(&wifi_config, 0, sizeof(wifi_config)); 48 | strncpy((char *)wifi_config.sta.ssid, (char *)WIFI_SSID, 49 | sizeof(wifi_config.sta.ssid)); 50 | strncpy((char *)wifi_config.sta.password, (char *)WIFI_PASSWORD, 51 | sizeof(wifi_config.sta.password)); 52 | 53 | ESP_ERROR_CHECK(esp_wifi_set_config( 54 | static_cast(ESP_IF_WIFI_STA), &wifi_config)); 55 | ESP_ERROR_CHECK(esp_wifi_connect()); 56 | 57 | // block until we get an IP address 58 | while (!g_wifi_connected) { 59 | vTaskDelay(pdMS_TO_TICKS(200)); 60 | } 61 | } 62 | -------------------------------------------------------------------------------- /esp32-m5stack-cores3/src/wifi.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | #include "main.h" 10 | 11 | static bool g_wifi_connected = false; 12 | 13 | static void pipecat_event_handler(void *arg, esp_event_base_t event_base, 14 | int32_t event_id, void *event_data) { 15 | static int s_retry_num = 0; 16 | if (event_base == WIFI_EVENT && event_id == WIFI_EVENT_STA_DISCONNECTED) { 17 | if (s_retry_num < 5) { 18 | esp_wifi_connect(); 19 | s_retry_num++; 20 | ESP_LOGI(LOG_TAG, "retry to connect to the AP"); 21 | } 22 | ESP_LOGI(LOG_TAG, "connect to the AP fail"); 23 | } else if (event_base == IP_EVENT && event_id == IP_EVENT_STA_GOT_IP) { 24 | ip_event_got_ip_t *event = (ip_event_got_ip_t *)event_data; 25 | ESP_LOGI(LOG_TAG, "got ip:" IPSTR, IP2STR(&event->ip_info.ip)); 26 | g_wifi_connected = true; 27 | } 28 | } 29 | 30 | void pipecat_init_wifi() { 31 | ESP_ERROR_CHECK(esp_event_handler_register(WIFI_EVENT, ESP_EVENT_ANY_ID, 32 | &pipecat_event_handler, NULL)); 33 | ESP_ERROR_CHECK(esp_event_handler_register(IP_EVENT, IP_EVENT_STA_GOT_IP, 34 | &pipecat_event_handler, NULL)); 35 | 36 | ESP_ERROR_CHECK(esp_netif_init()); 37 | esp_netif_t *sta_netif = esp_netif_create_default_wifi_sta(); 38 | assert(sta_netif); 39 | 40 | wifi_init_config_t cfg = WIFI_INIT_CONFIG_DEFAULT(); 41 | ESP_ERROR_CHECK(esp_wifi_init(&cfg)); 42 | ESP_ERROR_CHECK(esp_wifi_set_mode(WIFI_MODE_STA)); 43 | ESP_ERROR_CHECK(esp_wifi_start()); 44 | 45 | ESP_LOGI(LOG_TAG, "Connecting to WiFi SSID: %s", WIFI_SSID); 46 | wifi_config_t wifi_config; 47 | memset(&wifi_config, 0, sizeof(wifi_config)); 48 | strncpy((char *)wifi_config.sta.ssid, (char *)WIFI_SSID, 49 | sizeof(wifi_config.sta.ssid)); 50 | strncpy((char *)wifi_config.sta.password, (char *)WIFI_PASSWORD, 51 | sizeof(wifi_config.sta.password)); 52 | 53 | ESP_ERROR_CHECK(esp_wifi_set_config( 54 | static_cast(ESP_IF_WIFI_STA), &wifi_config)); 55 | ESP_ERROR_CHECK(esp_wifi_connect()); 56 | 57 | // block until we get an IP address 58 | while (!g_wifi_connected) { 59 | vTaskDelay(pdMS_TO_TICKS(200)); 60 | } 61 | } 62 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Pipecat ESP32 Client SDK 2 | 3 | ## 💻 Platform/Device Support 4 | 5 | This SDK has been developed and tested on a `esp32s3` and `linux`. You don't 6 | need any physical hardware to run this SDK, but it will be very limited 7 | (e.g. you won't hear an audio on Linux). 8 | 9 | To use it on hardware purchase any of these microcontrollers. Others may 10 | work, but this is what has been developed against. 11 | 12 | * [Espressif - ESP32-S3-BOX-3](https://www.digikey.com/short/fb2vjrpn) 13 | * [M5Stack - CoreS3 ESP32S3 loT Development Kit](https://shop.m5stack.com/products/m5stack-cores3-esp32s3-lotdevelopment-kit) 14 | 15 | ## 📋 Pre-requisites 16 | 17 | Clone this repository: 18 | 19 | ``` 20 | git clone --recursive https://github.com/pipecat-ai/pipecat-esp32.git 21 | ``` 22 | 23 | Install the ESP-IDF toolchain following these 24 | [instructions](https://docs.espressif.com/projects/esp-idf/en/stable/esp32/get-started/linux-macos-setup.html). 25 | 26 | After that, just open a terminal and load ESP-IDF tools: 27 | 28 | ``` 29 | source PATH_TO_ESP_IDF/export.sh 30 | ``` 31 | 32 | We also need to set a few environment variables before we can build: 33 | 34 | ``` 35 | export WIFI_SSID=foo 36 | export WIFI_PASSWORD=bar 37 | export PIPECAT_SMALLWEBRTC_URL=URL (e.g. http://192.168.1.10:7860/api/offer) 38 | ``` 39 | 40 | where `WIFI_SSID` and `WIFI_PASSWORD` are just needed to connect the device to 41 | the network. `PIPECAT_SMALLWEBRTC_URL` is the URL endpoint to connect to your 42 | Pipecat bot. 43 | 44 | ## 🛠️ Build 45 | 46 | Go inside the `esp32-s3-box-3` directory. 47 | 48 | The first thing to do is to set the desired target, for example: 49 | 50 | ``` 51 | idf.py --preview set-target esp32s3 52 | ``` 53 | 54 | You can also set `linux` instead of `esp32s3`. 55 | 56 | Then, just build: 57 | 58 | ``` 59 | idf.py build 60 | ``` 61 | 62 | If you built for `linux` you can run the binary directly: 63 | 64 | ``` 65 | ./build/src.elf 66 | ``` 67 | 68 | ## 🔌 Flash the device 69 | 70 | If you built for `esp32s3` you can flash your device using the following commands: 71 | 72 | ### Linux 73 | 74 | ``` 75 | idf.py -p /dev/ttyACM0 flash 76 | ``` 77 | 78 | where `/dev/ttyACM0` is the device where your ESP32 is connected. You can run 79 | `sudo dmesg` to know the device on your system. 80 | 81 | On Debian systems, you will want to add your user to the `dialout` group so you 82 | don't need root access. 83 | 84 | ### macOS 85 | 86 | ``` 87 | idf.py flash 88 | ``` 89 | 90 | ## ▶️ Usage 91 | 92 | Currently, you can try `pipecat-esp32` with one of the Pipecat foundational 93 | examples. For example, from the Pipecat repository you can run: 94 | 95 | ``` 96 | python examples/foundational/07-interruptible.py --host IP --esp32 97 | ``` 98 | 99 | where `IP` is just your machine IP address (e.g. 192.168.1.10). Then, you would 100 | set the environment variable `PIPECAT_SMALLWEBRTC_URL` as explained above. 101 | -------------------------------------------------------------------------------- /esp32-s3-box-3/src/screen.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #include "main.h" 6 | 7 | #define SCREEN_TICK_INTERVAL 50 8 | #define MAX_LOG_LINES 10 9 | 10 | static lv_obj_t *screen = NULL; 11 | static lv_obj_t *log_container = NULL; 12 | static lv_obj_t *log_label = NULL; 13 | static lv_obj_t *log_labels[MAX_LOG_LINES]; 14 | static int log_line_count = 0; 15 | 16 | static lv_style_t STYLE_BLUE; 17 | static lv_style_t STYLE_GREEN; 18 | static lv_style_t STYLE_DEFAULT; 19 | 20 | static void init_styles() { 21 | lv_style_init(&STYLE_BLUE); 22 | lv_style_set_text_color(&STYLE_BLUE, lv_color_hex(0x3333FF)); 23 | 24 | lv_style_init(&STYLE_GREEN); 25 | lv_style_set_text_color(&STYLE_GREEN, lv_color_hex(0x33FF33)); 26 | 27 | lv_style_init(&STYLE_DEFAULT); 28 | lv_style_set_text_color(&STYLE_DEFAULT, lv_color_hex(0x333333)); 29 | } 30 | 31 | static lv_obj_t *create_scrollable_log(lv_obj_t *parent) { 32 | lv_obj_t *container = lv_obj_create(parent); 33 | lv_obj_set_size(container, LV_PCT(100), LV_PCT(100)); 34 | lv_obj_align(container, LV_ALIGN_CENTER, 0, 0); 35 | 36 | // Enable vertical scrolling 37 | lv_obj_set_scroll_dir(container, LV_DIR_VER); 38 | // Enable flex layout 39 | lv_obj_set_layout(container, LV_LAYOUT_FLEX); 40 | // Vertical stacking 41 | lv_obj_set_flex_flow(container, LV_FLEX_FLOW_COLUMN); 42 | // Space between rows 43 | lv_obj_set_style_pad_row(container, 4, 0); 44 | return container; 45 | } 46 | 47 | static lv_obj_t *create_label(lv_obj_t *container, lv_style_t *style) { 48 | lv_obj_t *label = lv_label_create(container); 49 | lv_obj_add_style(label, style, 0); 50 | lv_obj_set_width(label, LV_PCT(100)); 51 | lv_label_set_long_mode(label, LV_LABEL_LONG_WRAP); 52 | lv_label_set_text(label, ""); 53 | return label; 54 | } 55 | 56 | static void create_current_label(lv_obj_t *container, lv_style_t *style) { 57 | // Delete oldest labels. 58 | if (log_line_count >= MAX_LOG_LINES) { 59 | lv_obj_del(log_labels[0]); 60 | 61 | // Shift remaining labels 62 | for (int i = 1; i < MAX_LOG_LINES; ++i) { 63 | log_labels[i - 1] = log_labels[i]; 64 | } 65 | log_line_count--; 66 | } 67 | 68 | log_label = create_label(container, style); 69 | log_labels[log_line_count++] = log_label; 70 | } 71 | 72 | static void screen_task(void *pvParameter) { 73 | while (1) { 74 | lv_timer_handler(); 75 | vTaskDelay(pdMS_TO_TICKS(SCREEN_TICK_INTERVAL)); 76 | } 77 | } 78 | 79 | void pipecat_init_screen() { 80 | bsp_display_start(); 81 | 82 | bsp_display_backlight_on(); 83 | 84 | bsp_display_lock(0); 85 | 86 | screen = lv_scr_act(); 87 | 88 | init_styles(); 89 | 90 | log_container = create_scrollable_log(screen); 91 | 92 | xTaskCreatePinnedToCore(screen_task, "Screen Task", 8192, NULL, 1, NULL, 1); 93 | 94 | ESP_LOGI(LOG_TAG, "Display initialized"); 95 | } 96 | 97 | void pipecat_screen_system_log(const char *text) { 98 | if (!log_container) { 99 | return; 100 | } 101 | 102 | create_current_label(log_container, &STYLE_DEFAULT); 103 | pipecat_screen_log(text); 104 | } 105 | 106 | void pipecat_screen_new_log() { 107 | if (!log_container) { 108 | return; 109 | } 110 | 111 | create_current_label(log_container, &STYLE_BLUE); 112 | } 113 | 114 | void pipecat_screen_log(const char *text) { 115 | if (!log_container) { 116 | return; 117 | } 118 | 119 | const char *current_text = lv_label_get_text(log_label); 120 | size_t current_len = strlen(current_text); 121 | size_t new_len = current_len + strlen(text) + 1; 122 | 123 | char *combined = (char *)malloc(new_len); 124 | if (!combined) { 125 | ESP_LOGW(LOG_TAG, "Failed to allocate memory for log text."); 126 | return; 127 | } 128 | 129 | strcpy(combined, current_text); 130 | strcat(combined, text); 131 | 132 | lv_label_set_text(log_label, combined); 133 | free(combined); 134 | 135 | lv_obj_scroll_to_view_recursive(log_label, LV_ANIM_OFF); 136 | } 137 | -------------------------------------------------------------------------------- /esp32-s3-box-3/src/webrtc.cpp: -------------------------------------------------------------------------------- 1 | #ifndef LINUX_BUILD 2 | #include 3 | #include 4 | #endif 5 | 6 | #include 7 | #include 8 | #include 9 | 10 | #include "main.h" 11 | 12 | static PeerConnection *peer_connection = NULL; 13 | 14 | #ifndef LINUX_BUILD 15 | StaticTask_t task_buffer; 16 | void pipecat_send_audio_task(void *user_data) { 17 | pipecat_init_audio_encoder(); 18 | 19 | while (1) { 20 | pipecat_send_audio(peer_connection); 21 | vTaskDelay(pdMS_TO_TICKS(TICK_INTERVAL)); 22 | } 23 | } 24 | #endif 25 | 26 | static void pipecat_ondatachannel_onmessage_task(char *msg, size_t len, 27 | void *userdata, uint16_t sid) { 28 | #ifdef LOG_DATACHANNEL_MESSAGES 29 | ESP_LOGI(LOG_TAG, "DataChannel Message: %s", msg); 30 | #endif 31 | pipecat_rtvi_handle_message(msg); 32 | } 33 | 34 | static void pipecat_ondatachannel_onopen_task(void *userdata) { 35 | if (peer_connection_create_datachannel(peer_connection, DATA_CHANNEL_RELIABLE, 36 | 0, 0, (char *)"rtvi-ai", 37 | (char *)"") != -1) { 38 | ESP_LOGI(LOG_TAG, "DataChannel created"); 39 | } else { 40 | ESP_LOGE(LOG_TAG, "Failed to create DataChannel"); 41 | } 42 | } 43 | 44 | static void pipecat_onconnectionstatechange_task(PeerConnectionState state, 45 | void *user_data) { 46 | ESP_LOGI(LOG_TAG, "PeerConnectionState: %s", 47 | peer_connection_state_to_string(state)); 48 | 49 | if (state == PEER_CONNECTION_DISCONNECTED || 50 | state == PEER_CONNECTION_CLOSED) { 51 | #ifndef LINUX_BUILD 52 | esp_restart(); 53 | #endif 54 | } else if (state == PEER_CONNECTION_CONNECTED) { 55 | #ifndef LINUX_BUILD 56 | StackType_t *stack_memory = (StackType_t *)heap_caps_malloc( 57 | 30000 * sizeof(StackType_t), MALLOC_CAP_SPIRAM); 58 | xTaskCreateStaticPinnedToCore(pipecat_send_audio_task, "audio_publisher", 59 | 30000, NULL, 7, stack_memory, &task_buffer, 60 | 0); 61 | pipecat_init_rtvi(peer_connection, &pipecat_rtvi_callbacks); 62 | #endif 63 | } 64 | } 65 | 66 | static void pipecat_on_icecandidate_task(char *description, void *user_data) { 67 | char *local_buffer = (char *)malloc(MAX_HTTP_OUTPUT_BUFFER + 1); 68 | memset(local_buffer, 0, MAX_HTTP_OUTPUT_BUFFER + 1); 69 | pipecat_http_request(description, local_buffer); 70 | peer_connection_set_remote_description(peer_connection, local_buffer, 71 | SDP_TYPE_ANSWER); 72 | free(local_buffer); 73 | } 74 | 75 | void pipecat_init_webrtc() { 76 | PeerConfiguration peer_connection_config = { 77 | .ice_servers = {}, 78 | .audio_codec = CODEC_OPUS, 79 | .video_codec = CODEC_NONE, 80 | .datachannel = DATA_CHANNEL_STRING, 81 | .onaudiotrack = [](uint8_t *data, size_t size, void *userdata) -> void { 82 | #ifndef LINUX_BUILD 83 | pipecat_audio_decode(data, size); 84 | #endif 85 | }, 86 | .onvideotrack = NULL, 87 | .on_request_keyframe = NULL, 88 | .user_data = NULL, 89 | }; 90 | 91 | peer_connection = peer_connection_create(&peer_connection_config); 92 | if (peer_connection == NULL) { 93 | ESP_LOGE(LOG_TAG, "Failed to create peer connection"); 94 | #ifndef LINUX_BUILD 95 | esp_restart(); 96 | #endif 97 | } 98 | 99 | peer_connection_oniceconnectionstatechange( 100 | peer_connection, pipecat_onconnectionstatechange_task); 101 | peer_connection_onicecandidate(peer_connection, pipecat_on_icecandidate_task); 102 | peer_connection_ondatachannel(peer_connection, 103 | pipecat_ondatachannel_onmessage_task, 104 | pipecat_ondatachannel_onopen_task, NULL); 105 | 106 | peer_connection_create_offer(peer_connection); 107 | } 108 | 109 | void pipecat_webrtc_loop() { 110 | peer_connection_loop(peer_connection); 111 | } 112 | -------------------------------------------------------------------------------- /esp32-m5stack-cores3/src/webrtc.cpp: -------------------------------------------------------------------------------- 1 | #ifndef LINUX_BUILD 2 | #include 3 | #include 4 | #endif 5 | 6 | #include 7 | #include 8 | #include 9 | 10 | #include "main.h" 11 | 12 | static PeerConnection *peer_connection = NULL; 13 | 14 | #ifndef LINUX_BUILD 15 | StaticTask_t task_buffer; 16 | void pipecat_send_audio_task(void *user_data) { 17 | pipecat_init_audio_encoder(); 18 | 19 | while (1) { 20 | pipecat_send_audio(peer_connection); 21 | vTaskDelay(pdMS_TO_TICKS(TICK_INTERVAL)); 22 | } 23 | } 24 | #endif 25 | 26 | static void pipecat_ondatachannel_onmessage_task(char *msg, size_t len, 27 | void *userdata, uint16_t sid) { 28 | #ifdef LOG_DATACHANNEL_MESSAGES 29 | ESP_LOGI(LOG_TAG, "DataChannel Message: %s", msg); 30 | #endif 31 | pipecat_rtvi_handle_message(msg); 32 | } 33 | 34 | static void pipecat_ondatachannel_onopen_task(void *userdata) { 35 | if (peer_connection_create_datachannel(peer_connection, DATA_CHANNEL_RELIABLE, 36 | 0, 0, (char *)"rtvi-ai", 37 | (char *)"") != -1) { 38 | ESP_LOGI(LOG_TAG, "DataChannel created"); 39 | } else { 40 | ESP_LOGE(LOG_TAG, "Failed to create DataChannel"); 41 | } 42 | } 43 | 44 | static void pipecat_onconnectionstatechange_task(PeerConnectionState state, 45 | void *user_data) { 46 | ESP_LOGI(LOG_TAG, "PeerConnectionState: %s", 47 | peer_connection_state_to_string(state)); 48 | 49 | if (state == PEER_CONNECTION_DISCONNECTED || 50 | state == PEER_CONNECTION_CLOSED) { 51 | #ifndef LINUX_BUILD 52 | esp_restart(); 53 | #endif 54 | } else if (state == PEER_CONNECTION_CONNECTED) { 55 | #ifndef LINUX_BUILD 56 | StackType_t *stack_memory = (StackType_t *)heap_caps_malloc( 57 | 30000 * sizeof(StackType_t), MALLOC_CAP_SPIRAM); 58 | xTaskCreateStaticPinnedToCore(pipecat_send_audio_task, "audio_publisher", 59 | 30000, NULL, 7, stack_memory, &task_buffer, 60 | 0); 61 | pipecat_init_rtvi(peer_connection, &pipecat_rtvi_callbacks); 62 | #endif 63 | } 64 | } 65 | 66 | static void pipecat_on_icecandidate_task(char *description, void *user_data) { 67 | char *local_buffer = (char *)malloc(MAX_HTTP_OUTPUT_BUFFER + 1); 68 | memset(local_buffer, 0, MAX_HTTP_OUTPUT_BUFFER + 1); 69 | pipecat_http_request(description, local_buffer); 70 | peer_connection_set_remote_description(peer_connection, local_buffer, 71 | SDP_TYPE_ANSWER); 72 | free(local_buffer); 73 | } 74 | 75 | void pipecat_init_webrtc() { 76 | PeerConfiguration peer_connection_config = { 77 | .ice_servers = {}, 78 | .audio_codec = CODEC_OPUS, 79 | .video_codec = CODEC_NONE, 80 | .datachannel = DATA_CHANNEL_STRING, 81 | .onaudiotrack = [](uint8_t *data, size_t size, void *userdata) -> void { 82 | #ifndef LINUX_BUILD 83 | pipecat_audio_decode(data, size); 84 | #endif 85 | }, 86 | .onvideotrack = NULL, 87 | .on_request_keyframe = NULL, 88 | .user_data = NULL, 89 | }; 90 | 91 | peer_connection = peer_connection_create(&peer_connection_config); 92 | if (peer_connection == NULL) { 93 | ESP_LOGE(LOG_TAG, "Failed to create peer connection"); 94 | #ifndef LINUX_BUILD 95 | esp_restart(); 96 | #endif 97 | } 98 | 99 | peer_connection_oniceconnectionstatechange( 100 | peer_connection, pipecat_onconnectionstatechange_task); 101 | peer_connection_onicecandidate(peer_connection, pipecat_on_icecandidate_task); 102 | peer_connection_ondatachannel(peer_connection, 103 | pipecat_ondatachannel_onmessage_task, 104 | pipecat_ondatachannel_onopen_task, NULL); 105 | 106 | peer_connection_create_offer(peer_connection); 107 | } 108 | 109 | void pipecat_webrtc_loop() { 110 | peer_connection_loop(peer_connection); 111 | } 112 | -------------------------------------------------------------------------------- /esp32-m5stack-atoms3r/src/webrtc.cpp: -------------------------------------------------------------------------------- 1 | #ifndef LINUX_BUILD 2 | #include 3 | #include 4 | #endif 5 | 6 | #include 7 | #include 8 | #include 9 | 10 | #include "main.h" 11 | 12 | static PeerConnection *peer_connection = NULL; 13 | 14 | #ifndef LINUX_BUILD 15 | StaticTask_t task_buffer; 16 | void pipecat_send_audio_task(void *user_data) { 17 | pipecat_init_audio_encoder(); 18 | 19 | while (1) { 20 | pipecat_send_audio(peer_connection); 21 | vTaskDelay(pdMS_TO_TICKS(TICK_INTERVAL)); 22 | } 23 | } 24 | #endif 25 | 26 | static void pipecat_ondatachannel_onmessage_task(char *msg, size_t len, 27 | void *userdata, uint16_t sid) { 28 | #ifdef LOG_DATACHANNEL_MESSAGES 29 | ESP_LOGI(LOG_TAG, "DataChannel Message: %s", msg); 30 | #endif 31 | pipecat_rtvi_handle_message(msg); 32 | } 33 | 34 | static void pipecat_ondatachannel_onopen_task(void *userdata) { 35 | if (peer_connection_create_datachannel(peer_connection, DATA_CHANNEL_RELIABLE, 36 | 0, 0, (char *)"rtvi-ai", 37 | (char *)"") != -1) { 38 | ESP_LOGI(LOG_TAG, "DataChannel created"); 39 | } else { 40 | ESP_LOGE(LOG_TAG, "Failed to create DataChannel"); 41 | } 42 | } 43 | 44 | static void pipecat_onconnectionstatechange_task(PeerConnectionState state, 45 | void *user_data) { 46 | ESP_LOGI(LOG_TAG, "PeerConnectionState: %s", 47 | peer_connection_state_to_string(state)); 48 | 49 | if (state == PEER_CONNECTION_DISCONNECTED || 50 | state == PEER_CONNECTION_CLOSED) { 51 | #ifndef LINUX_BUILD 52 | esp_restart(); 53 | #endif 54 | } else if (state == PEER_CONNECTION_CONNECTED) { 55 | #ifndef LINUX_BUILD 56 | StackType_t *stack_memory = (StackType_t *)heap_caps_malloc( 57 | 30000 * sizeof(StackType_t), MALLOC_CAP_SPIRAM); 58 | xTaskCreateStaticPinnedToCore(pipecat_send_audio_task, "audio_publisher", 59 | 30000, NULL, 7, stack_memory, &task_buffer, 60 | 0); 61 | pipecat_init_rtvi(peer_connection, &pipecat_rtvi_callbacks); 62 | #endif 63 | } 64 | } 65 | 66 | static void pipecat_on_icecandidate_task(char *description, void *user_data) { 67 | char *local_buffer = (char *)malloc(MAX_HTTP_OUTPUT_BUFFER + 1); 68 | memset(local_buffer, 0, MAX_HTTP_OUTPUT_BUFFER + 1); 69 | pipecat_http_request(description, local_buffer); 70 | peer_connection_set_remote_description(peer_connection, local_buffer, 71 | SDP_TYPE_ANSWER); 72 | free(local_buffer); 73 | } 74 | 75 | void pipecat_init_webrtc() { 76 | PeerConfiguration peer_connection_config = { 77 | .ice_servers = {}, 78 | .audio_codec = CODEC_OPUS, 79 | .video_codec = CODEC_NONE, 80 | .datachannel = DATA_CHANNEL_STRING, 81 | .onaudiotrack = [](uint8_t *data, size_t size, void *userdata) -> void { 82 | #ifndef LINUX_BUILD 83 | pipecat_audio_decode(data, size); 84 | #endif 85 | }, 86 | .onvideotrack = NULL, 87 | .on_request_keyframe = NULL, 88 | .user_data = NULL, 89 | }; 90 | 91 | peer_connection = peer_connection_create(&peer_connection_config); 92 | if (peer_connection == NULL) { 93 | ESP_LOGE(LOG_TAG, "Failed to create peer connection"); 94 | #ifndef LINUX_BUILD 95 | esp_restart(); 96 | #endif 97 | } 98 | 99 | peer_connection_oniceconnectionstatechange( 100 | peer_connection, pipecat_onconnectionstatechange_task); 101 | peer_connection_onicecandidate(peer_connection, pipecat_on_icecandidate_task); 102 | peer_connection_ondatachannel(peer_connection, 103 | pipecat_ondatachannel_onmessage_task, 104 | pipecat_ondatachannel_onopen_task, NULL); 105 | 106 | peer_connection_create_offer(peer_connection); 107 | } 108 | 109 | void pipecat_webrtc_loop() { 110 | peer_connection_loop(peer_connection); 111 | } 112 | -------------------------------------------------------------------------------- /esp32-s3-box-3/src/media.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include 5 | #include 6 | 7 | #include "esp_log.h" 8 | #include "main.h" 9 | 10 | #define SAMPLE_RATE (16000) 11 | 12 | #define OPUS_BUFFER_SIZE 1276 // 1276 bytes is recommended by opus_encode 13 | #define PCM_BUFFER_SIZE 640 14 | 15 | #define OPUS_ENCODER_BITRATE 30000 16 | #define OPUS_ENCODER_COMPLEXITY 0 17 | 18 | std::atomic is_playing = false; 19 | void set_is_playing(int16_t *in_buf, size_t in_samples) { 20 | bool any_set = false; 21 | for (size_t i = 0; i < in_samples; i++) { 22 | if (in_buf[i] != -1 && in_buf[i] != 0 && in_buf[i] != 1) { 23 | any_set = true; 24 | } 25 | } 26 | is_playing = any_set; 27 | } 28 | 29 | esp_codec_dev_handle_t mic_codec_dev = NULL; 30 | esp_codec_dev_handle_t spk_codec_dev = NULL; 31 | 32 | void pipecat_init_audio_capture() { 33 | mic_codec_dev = bsp_audio_codec_microphone_init(); 34 | spk_codec_dev = bsp_audio_codec_speaker_init(); 35 | 36 | esp_codec_dev_set_in_gain(mic_codec_dev, 42.0); 37 | esp_codec_dev_set_out_vol(spk_codec_dev, 255); 38 | 39 | esp_codec_dev_sample_info_t fs = { 40 | .bits_per_sample = 16, 41 | .channel = 1, 42 | .sample_rate = SAMPLE_RATE, 43 | }; 44 | esp_codec_dev_open(mic_codec_dev, &fs); 45 | esp_codec_dev_open(spk_codec_dev, &fs); 46 | } 47 | 48 | opus_int16 *decoder_buffer = NULL; 49 | OpusDecoder *opus_decoder = NULL; 50 | 51 | void pipecat_init_audio_decoder() { 52 | int decoder_error = 0; 53 | opus_decoder = opus_decoder_create(SAMPLE_RATE, 1, &decoder_error); 54 | if (decoder_error != OPUS_OK) { 55 | printf("Failed to create OPUS decoder"); 56 | return; 57 | } 58 | 59 | decoder_buffer = (opus_int16 *)malloc(PCM_BUFFER_SIZE); 60 | } 61 | 62 | void pipecat_audio_decode(uint8_t *data, size_t size) { 63 | esp_err_t ret; 64 | int decoded_size = 65 | opus_decode(opus_decoder, data, size, decoder_buffer, PCM_BUFFER_SIZE, 0); 66 | 67 | if (decoded_size > 0) { 68 | set_is_playing(decoder_buffer, decoded_size); 69 | if ((ret = esp_codec_dev_write(spk_codec_dev, decoder_buffer, 70 | decoded_size * sizeof(uint16_t))) != 71 | ESP_OK) { 72 | ESP_LOGE(LOG_TAG, "esp_codec_dev_write failed: %s", esp_err_to_name(ret)); 73 | } 74 | } 75 | } 76 | 77 | OpusEncoder *opus_encoder = NULL; 78 | uint8_t *encoder_output_buffer = NULL; 79 | uint8_t *read_buffer = NULL; 80 | 81 | void pipecat_init_audio_encoder() { 82 | int encoder_error; 83 | opus_encoder = opus_encoder_create(SAMPLE_RATE, 1, OPUS_APPLICATION_VOIP, 84 | &encoder_error); 85 | if (encoder_error != OPUS_OK) { 86 | printf("Failed to create OPUS encoder"); 87 | return; 88 | } 89 | 90 | if (opus_encoder_init(opus_encoder, SAMPLE_RATE, 1, OPUS_APPLICATION_VOIP) != 91 | OPUS_OK) { 92 | printf("Failed to initialize OPUS encoder"); 93 | return; 94 | } 95 | 96 | opus_encoder_ctl(opus_encoder, OPUS_SET_BITRATE(OPUS_ENCODER_BITRATE)); 97 | opus_encoder_ctl(opus_encoder, OPUS_SET_COMPLEXITY(OPUS_ENCODER_COMPLEXITY)); 98 | opus_encoder_ctl(opus_encoder, OPUS_SET_SIGNAL(OPUS_SIGNAL_VOICE)); 99 | 100 | read_buffer = 101 | (uint8_t *)heap_caps_malloc(PCM_BUFFER_SIZE, MALLOC_CAP_DEFAULT); 102 | encoder_output_buffer = (uint8_t *)malloc(OPUS_BUFFER_SIZE); 103 | } 104 | 105 | void pipecat_send_audio(PeerConnection *peer_connection) { 106 | if (esp_codec_dev_read(mic_codec_dev, read_buffer, PCM_BUFFER_SIZE) != 107 | ESP_OK) { 108 | printf("esp_codec_dev_read failed"); 109 | return; 110 | } 111 | 112 | if (is_playing) { 113 | memset(read_buffer, 0, PCM_BUFFER_SIZE); 114 | } 115 | 116 | auto encoded_size = opus_encode(opus_encoder, (const opus_int16 *)read_buffer, 117 | PCM_BUFFER_SIZE / sizeof(uint16_t), 118 | encoder_output_buffer, OPUS_BUFFER_SIZE); 119 | peer_connection_send_audio(peer_connection, encoder_output_buffer, 120 | encoded_size); 121 | } 122 | -------------------------------------------------------------------------------- /esp32-s3-box-3/src/rtvi.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | #include "main.h" 9 | 10 | #define MAX_TYPE_LEN 32 11 | #define MAX_ID_LEN 64 12 | 13 | static int rtvi_id = 0; 14 | static QueueHandle_t rtvi_queue; 15 | static PeerConnection *peer_connection = NULL; 16 | static rtvi_callbacks_t *rtvi_callbacks = NULL; 17 | 18 | typedef struct { 19 | cJSON *msg; 20 | } rtvi_msg_t; 21 | 22 | // Simple hashing function so we can fake pattern matching and switch on strings 23 | // as a constexpr so it gets evaluated in compile time for static strings 24 | static constexpr unsigned int hash(const char *s, int off = 0) { 25 | return !s[off] ? 5381 : (hash(s, off + 1) * 33) ^ s[off]; 26 | } 27 | 28 | static rtvi_msg_t *create_rtvi_message(const char *type) { 29 | cJSON *j_msg = cJSON_CreateObject(); 30 | 31 | if (j_msg == NULL) { 32 | ESP_LOGE(LOG_TAG, "Unable to create RTVI message"); 33 | return NULL; 34 | } 35 | if (cJSON_AddStringToObject(j_msg, "label", "rtvi-ai") == NULL) { 36 | cJSON_Delete(j_msg); 37 | ESP_LOGE(LOG_TAG, "Unable to create RTVI message"); 38 | return NULL; 39 | } 40 | if (cJSON_AddStringToObject(j_msg, "type", type) == NULL) { 41 | cJSON_Delete(j_msg); 42 | ESP_LOGE(LOG_TAG, "Unable to create RTVI message"); 43 | return NULL; 44 | } 45 | 46 | char id[MAX_ID_LEN]; 47 | sprintf(id, "%d", rtvi_id++); 48 | if (cJSON_AddStringToObject(j_msg, "id", id) == NULL) { 49 | cJSON_Delete(j_msg); 50 | ESP_LOGE(LOG_TAG, "Unable to create RTVI message"); 51 | return NULL; 52 | } 53 | 54 | rtvi_msg_t *msg = (rtvi_msg_t *)malloc(sizeof(rtvi_msg_t)); 55 | msg->msg = j_msg; 56 | 57 | return msg; 58 | } 59 | 60 | static void destroy_rtvi_message(rtvi_msg_t *msg) { 61 | cJSON_Delete(msg->msg); 62 | free(msg); 63 | } 64 | 65 | static char *rtvi_message_to_string(rtvi_msg_t *msg) { 66 | if (msg == NULL || msg->msg == NULL) { 67 | return NULL; 68 | } 69 | 70 | char *msg_str = cJSON_Print(msg->msg); 71 | 72 | return msg_str; 73 | } 74 | 75 | static void rtvi_handle_message(const rtvi_msg_t *msg) { 76 | cJSON *j_type = cJSON_GetObjectItem(msg->msg, "type"); 77 | if (j_type == NULL) { 78 | ESP_LOGE(LOG_TAG, "Unable to find `type` field in RTVI message"); 79 | return; 80 | } 81 | 82 | switch (hash(j_type->valuestring)) { 83 | case hash("bot-started-speaking"): 84 | rtvi_callbacks->on_bot_started_speaking(); 85 | break; 86 | case hash("bot-stopped-speaking"): 87 | rtvi_callbacks->on_bot_stopped_speaking(); 88 | break; 89 | case hash("bot-tts-text"): { 90 | cJSON *j_data = cJSON_GetObjectItem(msg->msg, "data"); 91 | cJSON *j_text = cJSON_GetObjectItem(j_data, "text"); 92 | rtvi_callbacks->on_bot_tts_text(j_text->valuestring); 93 | break; 94 | } 95 | default: 96 | break; 97 | } 98 | } 99 | 100 | static void rtvi_task(void *pvParameter) { 101 | rtvi_msg_t msg; 102 | 103 | while (1) { 104 | if (xQueueReceive(rtvi_queue, &msg, portMAX_DELAY)) { 105 | rtvi_handle_message(&msg); 106 | cJSON_Delete(msg.msg); 107 | } 108 | } 109 | } 110 | 111 | void pipecat_init_rtvi(PeerConnection *connection, 112 | rtvi_callbacks_t *callbacks) { 113 | peer_connection = connection; 114 | rtvi_callbacks = callbacks; 115 | 116 | rtvi_queue = xQueueCreate(10, sizeof(rtvi_msg_t)); 117 | xTaskCreatePinnedToCore(rtvi_task, "RTVI Task", 4096, NULL, 2, NULL, 1); 118 | } 119 | 120 | void pipecat_rtvi_send_client_ready() { 121 | rtvi_msg_t *msg = create_rtvi_message("client-ready"); 122 | 123 | char *msg_str = rtvi_message_to_string(msg); 124 | 125 | peer_connection_datachannel_send(peer_connection, msg_str, strlen(msg_str)); 126 | 127 | cJSON_free(msg_str); 128 | 129 | destroy_rtvi_message(msg); 130 | } 131 | 132 | void pipecat_rtvi_handle_message(const char *msg) { 133 | cJSON *j_msg = cJSON_Parse(msg); 134 | if (j_msg == NULL) { 135 | ESP_LOGE(LOG_TAG, "Error parsing RTVI message"); 136 | return; 137 | } 138 | 139 | rtvi_msg_t rtvi_msg = {.msg = j_msg}; 140 | 141 | xQueueSend(rtvi_queue, &rtvi_msg, portMAX_DELAY); 142 | } 143 | -------------------------------------------------------------------------------- /esp32-m5stack-atoms3r/src/rtvi.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | #include "freertos/FreeRTOS.h" 7 | #include "freertos/task.h" 8 | 9 | #include "main.h" 10 | 11 | #define MAX_TYPE_LEN 32 12 | #define MAX_ID_LEN 64 13 | 14 | static int rtvi_id = 0; 15 | static QueueHandle_t rtvi_queue; 16 | static PeerConnection *peer_connection = NULL; 17 | static rtvi_callbacks_t *rtvi_callbacks = NULL; 18 | 19 | typedef struct { 20 | cJSON *msg; 21 | } rtvi_msg_t; 22 | 23 | // Simple hashing function so we can fake pattern matching and switch on strings 24 | // as a constexpr so it gets evaluated in compile time for static strings 25 | static constexpr unsigned int hash(const char *s, int off = 0) { 26 | return !s[off] ? 5381 : (hash(s, off + 1) * 33) ^ s[off]; 27 | } 28 | 29 | static rtvi_msg_t *create_rtvi_message(const char *type) { 30 | cJSON *j_msg = cJSON_CreateObject(); 31 | 32 | if (j_msg == NULL) { 33 | ESP_LOGE(LOG_TAG, "Unable to create RTVI message"); 34 | return NULL; 35 | } 36 | if (cJSON_AddStringToObject(j_msg, "label", "rtvi-ai") == NULL) { 37 | cJSON_Delete(j_msg); 38 | ESP_LOGE(LOG_TAG, "Unable to create RTVI message"); 39 | return NULL; 40 | } 41 | if (cJSON_AddStringToObject(j_msg, "type", type) == NULL) { 42 | cJSON_Delete(j_msg); 43 | ESP_LOGE(LOG_TAG, "Unable to create RTVI message"); 44 | return NULL; 45 | } 46 | 47 | char id[MAX_ID_LEN]; 48 | sprintf(id, "%d", rtvi_id++); 49 | if (cJSON_AddStringToObject(j_msg, "id", id) == NULL) { 50 | cJSON_Delete(j_msg); 51 | ESP_LOGE(LOG_TAG, "Unable to create RTVI message"); 52 | return NULL; 53 | } 54 | 55 | rtvi_msg_t *msg = (rtvi_msg_t *)malloc(sizeof(rtvi_msg_t)); 56 | msg->msg = j_msg; 57 | 58 | return msg; 59 | } 60 | 61 | static void destroy_rtvi_message(rtvi_msg_t *msg) { 62 | cJSON_Delete(msg->msg); 63 | free(msg); 64 | } 65 | 66 | static char *rtvi_message_to_string(rtvi_msg_t *msg) { 67 | if (msg == NULL || msg->msg == NULL) { 68 | return NULL; 69 | } 70 | 71 | char *msg_str = cJSON_Print(msg->msg); 72 | 73 | return msg_str; 74 | } 75 | 76 | static void rtvi_handle_message(const rtvi_msg_t *msg) { 77 | cJSON *j_type = cJSON_GetObjectItem(msg->msg, "type"); 78 | if (j_type == NULL) { 79 | ESP_LOGE(LOG_TAG, "Unable to find `type` field in RTVI message"); 80 | return; 81 | } 82 | 83 | switch (hash(j_type->valuestring)) { 84 | case hash("bot-started-speaking"): 85 | rtvi_callbacks->on_bot_started_speaking(); 86 | break; 87 | case hash("bot-stopped-speaking"): 88 | rtvi_callbacks->on_bot_stopped_speaking(); 89 | break; 90 | case hash("bot-tts-text"): { 91 | cJSON *j_data = cJSON_GetObjectItem(msg->msg, "data"); 92 | cJSON *j_text = cJSON_GetObjectItem(j_data, "text"); 93 | rtvi_callbacks->on_bot_tts_text(j_text->valuestring); 94 | break; 95 | } 96 | default: 97 | break; 98 | } 99 | } 100 | 101 | static void rtvi_task(void *pvParameter) { 102 | rtvi_msg_t msg; 103 | 104 | while (1) { 105 | if (xQueueReceive(rtvi_queue, &msg, portMAX_DELAY)) { 106 | rtvi_handle_message(&msg); 107 | cJSON_Delete(msg.msg); 108 | } 109 | } 110 | } 111 | 112 | void pipecat_init_rtvi(PeerConnection *connection, 113 | rtvi_callbacks_t *callbacks) { 114 | peer_connection = connection; 115 | rtvi_callbacks = callbacks; 116 | 117 | rtvi_queue = xQueueCreate(10, sizeof(rtvi_msg_t)); 118 | xTaskCreatePinnedToCore(rtvi_task, "RTVI Task", 4096, NULL, 2, NULL, 1); 119 | } 120 | 121 | void pipecat_rtvi_send_client_ready() { 122 | rtvi_msg_t *msg = create_rtvi_message("client-ready"); 123 | 124 | char *msg_str = rtvi_message_to_string(msg); 125 | 126 | peer_connection_datachannel_send(peer_connection, msg_str, strlen(msg_str)); 127 | 128 | cJSON_free(msg_str); 129 | 130 | destroy_rtvi_message(msg); 131 | } 132 | 133 | void pipecat_rtvi_handle_message(const char *msg) { 134 | cJSON *j_msg = cJSON_Parse(msg); 135 | if (j_msg == NULL) { 136 | ESP_LOGE(LOG_TAG, "Error parsing RTVI message"); 137 | return; 138 | } 139 | 140 | rtvi_msg_t rtvi_msg = {.msg = j_msg}; 141 | 142 | xQueueSend(rtvi_queue, &rtvi_msg, portMAX_DELAY); 143 | } 144 | -------------------------------------------------------------------------------- /esp32-m5stack-cores3/src/rtvi.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | #include "freertos/FreeRTOS.h" 7 | #include "freertos/task.h" 8 | 9 | #include "main.h" 10 | 11 | #define MAX_TYPE_LEN 32 12 | #define MAX_ID_LEN 64 13 | 14 | static int rtvi_id = 0; 15 | static QueueHandle_t rtvi_queue; 16 | static PeerConnection *peer_connection = NULL; 17 | static rtvi_callbacks_t *rtvi_callbacks = NULL; 18 | 19 | typedef struct { 20 | cJSON *msg; 21 | } rtvi_msg_t; 22 | 23 | // Simple hashing function so we can fake pattern matching and switch on strings 24 | // as a constexpr so it gets evaluated in compile time for static strings 25 | static constexpr unsigned int hash(const char *s, int off = 0) { 26 | return !s[off] ? 5381 : (hash(s, off + 1) * 33) ^ s[off]; 27 | } 28 | 29 | static rtvi_msg_t *create_rtvi_message(const char *type) { 30 | cJSON *j_msg = cJSON_CreateObject(); 31 | 32 | if (j_msg == NULL) { 33 | ESP_LOGE(LOG_TAG, "Unable to create RTVI message"); 34 | return NULL; 35 | } 36 | if (cJSON_AddStringToObject(j_msg, "label", "rtvi-ai") == NULL) { 37 | cJSON_Delete(j_msg); 38 | ESP_LOGE(LOG_TAG, "Unable to create RTVI message"); 39 | return NULL; 40 | } 41 | if (cJSON_AddStringToObject(j_msg, "type", type) == NULL) { 42 | cJSON_Delete(j_msg); 43 | ESP_LOGE(LOG_TAG, "Unable to create RTVI message"); 44 | return NULL; 45 | } 46 | 47 | char id[MAX_ID_LEN]; 48 | sprintf(id, "%d", rtvi_id++); 49 | if (cJSON_AddStringToObject(j_msg, "id", id) == NULL) { 50 | cJSON_Delete(j_msg); 51 | ESP_LOGE(LOG_TAG, "Unable to create RTVI message"); 52 | return NULL; 53 | } 54 | 55 | rtvi_msg_t *msg = (rtvi_msg_t *)malloc(sizeof(rtvi_msg_t)); 56 | msg->msg = j_msg; 57 | 58 | return msg; 59 | } 60 | 61 | static void destroy_rtvi_message(rtvi_msg_t *msg) { 62 | cJSON_Delete(msg->msg); 63 | free(msg); 64 | } 65 | 66 | static char *rtvi_message_to_string(rtvi_msg_t *msg) { 67 | if (msg == NULL || msg->msg == NULL) { 68 | return NULL; 69 | } 70 | 71 | char *msg_str = cJSON_Print(msg->msg); 72 | 73 | return msg_str; 74 | } 75 | 76 | static void rtvi_handle_message(const rtvi_msg_t *msg) { 77 | cJSON *j_type = cJSON_GetObjectItem(msg->msg, "type"); 78 | if (j_type == NULL) { 79 | ESP_LOGE(LOG_TAG, "Unable to find `type` field in RTVI message"); 80 | return; 81 | } 82 | 83 | switch (hash(j_type->valuestring)) { 84 | case hash("bot-started-speaking"): 85 | rtvi_callbacks->on_bot_started_speaking(); 86 | break; 87 | case hash("bot-stopped-speaking"): 88 | rtvi_callbacks->on_bot_stopped_speaking(); 89 | break; 90 | case hash("bot-tts-text"): { 91 | cJSON *j_data = cJSON_GetObjectItem(msg->msg, "data"); 92 | cJSON *j_text = cJSON_GetObjectItem(j_data, "text"); 93 | rtvi_callbacks->on_bot_tts_text(j_text->valuestring); 94 | break; 95 | } 96 | default: 97 | break; 98 | } 99 | } 100 | 101 | static void rtvi_task(void *pvParameter) { 102 | rtvi_msg_t msg; 103 | 104 | while (1) { 105 | if (xQueueReceive(rtvi_queue, &msg, portMAX_DELAY)) { 106 | rtvi_handle_message(&msg); 107 | cJSON_Delete(msg.msg); 108 | } 109 | } 110 | } 111 | 112 | void pipecat_init_rtvi(PeerConnection *connection, 113 | rtvi_callbacks_t *callbacks) { 114 | peer_connection = connection; 115 | rtvi_callbacks = callbacks; 116 | 117 | rtvi_queue = xQueueCreate(10, sizeof(rtvi_msg_t)); 118 | xTaskCreatePinnedToCore(rtvi_task, "RTVI Task", 4096, NULL, 2, NULL, 1); 119 | } 120 | 121 | void pipecat_rtvi_send_client_ready() { 122 | rtvi_msg_t *msg = create_rtvi_message("client-ready"); 123 | 124 | char *msg_str = rtvi_message_to_string(msg); 125 | 126 | peer_connection_datachannel_send(peer_connection, msg_str, strlen(msg_str)); 127 | 128 | cJSON_free(msg_str); 129 | 130 | destroy_rtvi_message(msg); 131 | } 132 | 133 | void pipecat_rtvi_handle_message(const char *msg) { 134 | cJSON *j_msg = cJSON_Parse(msg); 135 | if (j_msg == NULL) { 136 | ESP_LOGE(LOG_TAG, "Error parsing RTVI message"); 137 | return; 138 | } 139 | 140 | rtvi_msg_t rtvi_msg = {.msg = j_msg}; 141 | 142 | xQueueSend(rtvi_queue, &rtvi_msg, portMAX_DELAY); 143 | } 144 | -------------------------------------------------------------------------------- /esp32-m5stack-cores3/src/media.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include 4 | #include 5 | #include 6 | 7 | #include 8 | #include 9 | #include 10 | 11 | #include "esp_check.h" 12 | #include "esp_log.h" 13 | #include "esp_heap_caps.h" 14 | #include "main.h" 15 | 16 | #define SAMPLE_RATE (16000) 17 | 18 | #define OPUS_BUFFER_SIZE 1276 // 1276 bytes is recommended by opus_encode 19 | #define PCM_BUFFER_SIZE 640 20 | 21 | #define OPUS_ENCODER_BITRATE 30000 22 | #define OPUS_ENCODER_COMPLEXITY 0 23 | 24 | std::atomic is_playing = false; 25 | unsigned int silence_count = 0; 26 | 27 | void set_is_playing(int16_t *in_buf, size_t in_samples) { 28 | bool any_set = false; 29 | for (size_t i = 0; i < in_samples; i++) { 30 | if (in_buf[i] != -1 && in_buf[i] != 0 && in_buf[i] != 1) { 31 | any_set = true; 32 | } 33 | } 34 | 35 | if (any_set) { 36 | silence_count = 0; 37 | } else { 38 | silence_count++; 39 | } 40 | 41 | if (silence_count >= 20 && is_playing) { 42 | M5.Speaker.end(); 43 | M5.Mic.begin(); 44 | is_playing = false; 45 | } else if (any_set && !is_playing) { 46 | M5.Mic.end(); 47 | M5.Speaker.begin(); 48 | is_playing = true; 49 | } 50 | } 51 | 52 | void pipecat_init_audio_capture() { 53 | M5.Speaker.setVolume(255); 54 | } 55 | 56 | opus_int16 *decoder_buffer = NULL; 57 | OpusDecoder *opus_decoder = NULL; 58 | 59 | void pipecat_init_audio_decoder() { 60 | int decoder_error = 0; 61 | opus_decoder = opus_decoder_create(SAMPLE_RATE, 1, &decoder_error); 62 | if (decoder_error != OPUS_OK) { 63 | printf("Failed to create OPUS decoder"); 64 | return; 65 | } 66 | 67 | decoder_buffer = (opus_int16 *)malloc(PCM_BUFFER_SIZE); 68 | } 69 | 70 | void double_volume(int16_t *samples, size_t num_samples) { 71 | for (size_t i = 0; i < num_samples; i++) { 72 | int32_t amplified = (int32_t)samples[i] * 2; 73 | 74 | // Clamp to 16-bit range 75 | if (amplified > INT16_MAX) { 76 | amplified = INT16_MAX; 77 | } else if (amplified < INT16_MIN) { 78 | amplified = INT16_MIN; 79 | } 80 | 81 | samples[i] = (int16_t)amplified; 82 | } 83 | } 84 | 85 | void pipecat_audio_decode(uint8_t *data, size_t size) { 86 | int decoded_size = 87 | opus_decode(opus_decoder, data, size, decoder_buffer, PCM_BUFFER_SIZE, 0); 88 | 89 | if (decoded_size > 0) { 90 | set_is_playing(decoder_buffer, decoded_size); 91 | if (is_playing) { 92 | double_volume(decoder_buffer, decoded_size); 93 | M5.Speaker.playRaw(decoder_buffer, decoded_size, SAMPLE_RATE); 94 | } 95 | } 96 | } 97 | 98 | OpusEncoder *opus_encoder = NULL; 99 | uint8_t *encoder_output_buffer = NULL; 100 | int16_t *read_buffer = NULL; 101 | 102 | void pipecat_init_audio_encoder() { 103 | int encoder_error; 104 | opus_encoder = opus_encoder_create(SAMPLE_RATE, 1, OPUS_APPLICATION_VOIP, 105 | &encoder_error); 106 | if (encoder_error != OPUS_OK) { 107 | printf("Failed to create OPUS encoder"); 108 | return; 109 | } 110 | 111 | if (opus_encoder_init(opus_encoder, SAMPLE_RATE, 1, OPUS_APPLICATION_VOIP) != 112 | OPUS_OK) { 113 | printf("Failed to initialize OPUS encoder"); 114 | return; 115 | } 116 | 117 | opus_encoder_ctl(opus_encoder, OPUS_SET_BITRATE(OPUS_ENCODER_BITRATE)); 118 | opus_encoder_ctl(opus_encoder, OPUS_SET_COMPLEXITY(OPUS_ENCODER_COMPLEXITY)); 119 | opus_encoder_ctl(opus_encoder, OPUS_SET_SIGNAL(OPUS_SIGNAL_VOICE)); 120 | 121 | read_buffer = (int16_t *)heap_caps_malloc(PCM_BUFFER_SIZE, MALLOC_CAP_DEFAULT); 122 | encoder_output_buffer = (uint8_t *)malloc(OPUS_BUFFER_SIZE); 123 | } 124 | 125 | void pipecat_send_audio(PeerConnection *peer_connection) { 126 | if (is_playing) { 127 | memset(read_buffer, 0, PCM_BUFFER_SIZE); 128 | vTaskDelay(pdMS_TO_TICKS(20)); 129 | } else { 130 | M5.Mic.record(read_buffer, PCM_BUFFER_SIZE / sizeof(uint16_t), SAMPLE_RATE); 131 | } 132 | 133 | auto encoded_size = opus_encode(opus_encoder, (const opus_int16 *)read_buffer, 134 | PCM_BUFFER_SIZE / sizeof(uint16_t), 135 | encoder_output_buffer, OPUS_BUFFER_SIZE); 136 | peer_connection_send_audio(peer_connection, encoder_output_buffer, 137 | encoded_size); 138 | } 139 | -------------------------------------------------------------------------------- /esp32-s3-box-3/src/http.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | #include "main.h" 7 | 8 | #ifndef MIN 9 | #define MIN(a, b) (((a) < (b)) ? (a) : (b)) 10 | #endif 11 | 12 | static esp_err_t http_event_handler(esp_http_client_event_t *evt) { 13 | static int output_len; 14 | switch (evt->event_id) { 15 | case HTTP_EVENT_REDIRECT: 16 | ESP_LOGD(LOG_TAG, "HTTP_EVENT_REDIRECT"); 17 | esp_http_client_set_header(evt->client, "From", "user@example.com"); 18 | esp_http_client_set_header(evt->client, "Accept", "text/html"); 19 | esp_http_client_set_redirection(evt->client); 20 | break; 21 | case HTTP_EVENT_ERROR: 22 | ESP_LOGD(LOG_TAG, "HTTP_EVENT_ERROR"); 23 | break; 24 | case HTTP_EVENT_ON_CONNECTED: 25 | ESP_LOGD(LOG_TAG, "HTTP_EVENT_ON_CONNECTED"); 26 | break; 27 | case HTTP_EVENT_HEADER_SENT: 28 | ESP_LOGD(LOG_TAG, "HTTP_EVENT_HEADER_SENT"); 29 | break; 30 | case HTTP_EVENT_ON_HEADER: 31 | ESP_LOGD(LOG_TAG, "HTTP_EVENT_ON_HEADER, key=%s, value=%s", 32 | evt->header_key, evt->header_value); 33 | break; 34 | case HTTP_EVENT_ON_DATA: { 35 | ESP_LOGD(LOG_TAG, "HTTP_EVENT_ON_DATA, len=%d", evt->data_len); 36 | if (esp_http_client_is_chunked_response(evt->client)) { 37 | ESP_LOGE(LOG_TAG, "Chunked HTTP response not supported"); 38 | #ifndef LINUX_BUILD 39 | esp_restart(); 40 | #endif 41 | } 42 | 43 | if (output_len == 0 && evt->user_data) { 44 | memset(evt->user_data, 0, MAX_HTTP_OUTPUT_BUFFER); 45 | } 46 | 47 | // If user_data buffer is configured, copy the response into the buffer 48 | int copy_len = 0; 49 | if (evt->user_data) { 50 | // The last byte in evt->user_data is kept for the NULL character in 51 | // case of out-of-bound access. 52 | copy_len = MIN(evt->data_len, (MAX_HTTP_OUTPUT_BUFFER - output_len)); 53 | if (copy_len) { 54 | memcpy(((char *)evt->user_data) + output_len, evt->data, copy_len); 55 | } 56 | } 57 | output_len += copy_len; 58 | 59 | break; 60 | } 61 | case HTTP_EVENT_ON_FINISH: 62 | ESP_LOGD(LOG_TAG, "HTTP_EVENT_ON_FINISH"); 63 | output_len = 0; 64 | break; 65 | case HTTP_EVENT_DISCONNECTED: 66 | ESP_LOGI(LOG_TAG, "HTTP_EVENT_DISCONNECTED"); 67 | output_len = 0; 68 | break; 69 | } 70 | return ESP_OK; 71 | } 72 | 73 | void pipecat_http_request(char *offer, char *answer) { 74 | esp_http_client_config_t config; 75 | memset(&config, 0, sizeof(esp_http_client_config_t)); 76 | 77 | config.url = PIPECAT_SMALLWEBRTC_URL; 78 | config.event_handler = http_event_handler; 79 | config.timeout_ms = HTTP_TIMEOUT_MS; 80 | config.user_data = answer; 81 | 82 | ESP_LOGI(LOG_TAG, "Connecting to %s", config.url); 83 | 84 | cJSON *j_offer = cJSON_CreateObject(); 85 | if (j_offer == NULL) { 86 | ESP_LOGE(LOG_TAG, "Unable to create JSON offer"); 87 | return; 88 | } 89 | if (cJSON_AddStringToObject(j_offer, "sdp", offer) == NULL) { 90 | cJSON_Delete(j_offer); 91 | ESP_LOGE(LOG_TAG, "Unable to create JSON offer"); 92 | return; 93 | } 94 | if (cJSON_AddStringToObject(j_offer, "type", "offer") == NULL) { 95 | cJSON_Delete(j_offer); 96 | ESP_LOGE(LOG_TAG, "Unable to create JSON offer"); 97 | return; 98 | } 99 | 100 | ESP_LOGD(LOG_TAG, "OFFER\n%s", offer); 101 | 102 | char *j_offer_str = cJSON_Print(j_offer); 103 | 104 | cJSON_Delete(j_offer); 105 | 106 | esp_http_client_handle_t client = esp_http_client_init(&config); 107 | esp_http_client_set_method(client, HTTP_METHOD_POST); 108 | esp_http_client_set_header(client, "Content-Type", "application/json"); 109 | esp_http_client_set_post_field(client, j_offer_str, strlen(j_offer_str)); 110 | 111 | esp_err_t err = esp_http_client_perform(client); 112 | int status_code = esp_http_client_get_status_code(client); 113 | if (err != ESP_OK || status_code != 200) { 114 | ESP_LOGE(LOG_TAG, "Error perform http request %s (status %d)", 115 | esp_err_to_name(err), status_code); 116 | #ifndef LINUX_BUILD 117 | esp_restart(); 118 | #endif 119 | } 120 | 121 | cJSON *j_response = cJSON_Parse((const char *)answer); 122 | if (j_response == NULL) { 123 | ESP_LOGE(LOG_TAG, "Error parsing HTTP response"); 124 | #ifndef LINUX_BUILD 125 | esp_restart(); 126 | #endif 127 | } 128 | 129 | cJSON *j_answer = cJSON_GetObjectItem(j_response, "sdp"); 130 | if (j_answer == NULL) { 131 | ESP_LOGE(LOG_TAG, "Unable to find `sdp` field in response"); 132 | #ifndef LINUX_BUILD 133 | esp_restart(); 134 | #endif 135 | } 136 | 137 | memset(answer, 0, MAX_HTTP_OUTPUT_BUFFER + 1); 138 | memcpy(answer, j_answer->valuestring, strlen(j_answer->valuestring)); 139 | 140 | ESP_LOGD(LOG_TAG, "ANSWER\n%s", answer); 141 | 142 | cJSON_Delete(j_response); 143 | 144 | esp_http_client_cleanup(client); 145 | } 146 | -------------------------------------------------------------------------------- /esp32-m5stack-atoms3r/src/http.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | #include "main.h" 7 | 8 | #ifndef MIN 9 | #define MIN(a, b) (((a) < (b)) ? (a) : (b)) 10 | #endif 11 | 12 | static esp_err_t http_event_handler(esp_http_client_event_t *evt) { 13 | static int output_len; 14 | switch (evt->event_id) { 15 | case HTTP_EVENT_REDIRECT: 16 | ESP_LOGD(LOG_TAG, "HTTP_EVENT_REDIRECT"); 17 | esp_http_client_set_header(evt->client, "From", "user@example.com"); 18 | esp_http_client_set_header(evt->client, "Accept", "text/html"); 19 | esp_http_client_set_redirection(evt->client); 20 | break; 21 | case HTTP_EVENT_ERROR: 22 | ESP_LOGD(LOG_TAG, "HTTP_EVENT_ERROR"); 23 | break; 24 | case HTTP_EVENT_ON_CONNECTED: 25 | ESP_LOGD(LOG_TAG, "HTTP_EVENT_ON_CONNECTED"); 26 | break; 27 | case HTTP_EVENT_HEADER_SENT: 28 | ESP_LOGD(LOG_TAG, "HTTP_EVENT_HEADER_SENT"); 29 | break; 30 | case HTTP_EVENT_ON_HEADER: 31 | ESP_LOGD(LOG_TAG, "HTTP_EVENT_ON_HEADER, key=%s, value=%s", 32 | evt->header_key, evt->header_value); 33 | break; 34 | case HTTP_EVENT_ON_DATA: { 35 | ESP_LOGD(LOG_TAG, "HTTP_EVENT_ON_DATA, len=%d", evt->data_len); 36 | if (esp_http_client_is_chunked_response(evt->client)) { 37 | ESP_LOGE(LOG_TAG, "Chunked HTTP response not supported"); 38 | #ifndef LINUX_BUILD 39 | esp_restart(); 40 | #endif 41 | } 42 | 43 | if (output_len == 0 && evt->user_data) { 44 | memset(evt->user_data, 0, MAX_HTTP_OUTPUT_BUFFER); 45 | } 46 | 47 | // If user_data buffer is configured, copy the response into the buffer 48 | int copy_len = 0; 49 | if (evt->user_data) { 50 | // The last byte in evt->user_data is kept for the NULL character in 51 | // case of out-of-bound access. 52 | copy_len = MIN(evt->data_len, (MAX_HTTP_OUTPUT_BUFFER - output_len)); 53 | if (copy_len) { 54 | memcpy(((char *)evt->user_data) + output_len, evt->data, copy_len); 55 | } 56 | } 57 | output_len += copy_len; 58 | 59 | break; 60 | } 61 | case HTTP_EVENT_ON_FINISH: 62 | ESP_LOGD(LOG_TAG, "HTTP_EVENT_ON_FINISH"); 63 | output_len = 0; 64 | break; 65 | case HTTP_EVENT_DISCONNECTED: 66 | ESP_LOGI(LOG_TAG, "HTTP_EVENT_DISCONNECTED"); 67 | output_len = 0; 68 | break; 69 | } 70 | return ESP_OK; 71 | } 72 | 73 | void pipecat_http_request(char *offer, char *answer) { 74 | esp_http_client_config_t config; 75 | memset(&config, 0, sizeof(esp_http_client_config_t)); 76 | 77 | config.url = PIPECAT_SMALLWEBRTC_URL; 78 | config.event_handler = http_event_handler; 79 | config.timeout_ms = HTTP_TIMEOUT_MS; 80 | config.user_data = answer; 81 | 82 | ESP_LOGI(LOG_TAG, "Connecting to %s", config.url); 83 | 84 | cJSON *j_offer = cJSON_CreateObject(); 85 | if (j_offer == NULL) { 86 | ESP_LOGE(LOG_TAG, "Unable to create JSON offer"); 87 | return; 88 | } 89 | if (cJSON_AddStringToObject(j_offer, "sdp", offer) == NULL) { 90 | cJSON_Delete(j_offer); 91 | ESP_LOGE(LOG_TAG, "Unable to create JSON offer"); 92 | return; 93 | } 94 | if (cJSON_AddStringToObject(j_offer, "type", "offer") == NULL) { 95 | cJSON_Delete(j_offer); 96 | ESP_LOGE(LOG_TAG, "Unable to create JSON offer"); 97 | return; 98 | } 99 | 100 | ESP_LOGD(LOG_TAG, "OFFER\n%s", offer); 101 | 102 | char *j_offer_str = cJSON_Print(j_offer); 103 | 104 | cJSON_Delete(j_offer); 105 | 106 | esp_http_client_handle_t client = esp_http_client_init(&config); 107 | esp_http_client_set_method(client, HTTP_METHOD_POST); 108 | esp_http_client_set_header(client, "Content-Type", "application/json"); 109 | esp_http_client_set_post_field(client, j_offer_str, strlen(j_offer_str)); 110 | 111 | esp_err_t err = esp_http_client_perform(client); 112 | int status_code = esp_http_client_get_status_code(client); 113 | if (err != ESP_OK || status_code != 200) { 114 | ESP_LOGE(LOG_TAG, "Error perform http request %s (status %d)", 115 | esp_err_to_name(err), status_code); 116 | #ifndef LINUX_BUILD 117 | esp_restart(); 118 | #endif 119 | } 120 | 121 | cJSON *j_response = cJSON_Parse((const char *)answer); 122 | if (j_response == NULL) { 123 | ESP_LOGE(LOG_TAG, "Error parsing HTTP response"); 124 | #ifndef LINUX_BUILD 125 | esp_restart(); 126 | #endif 127 | } 128 | 129 | cJSON *j_answer = cJSON_GetObjectItem(j_response, "sdp"); 130 | if (j_answer == NULL) { 131 | ESP_LOGE(LOG_TAG, "Unable to find `sdp` field in response"); 132 | #ifndef LINUX_BUILD 133 | esp_restart(); 134 | #endif 135 | } 136 | 137 | memset(answer, 0, MAX_HTTP_OUTPUT_BUFFER + 1); 138 | memcpy(answer, j_answer->valuestring, strlen(j_answer->valuestring)); 139 | 140 | ESP_LOGD(LOG_TAG, "ANSWER\n%s", answer); 141 | 142 | cJSON_Delete(j_response); 143 | 144 | esp_http_client_cleanup(client); 145 | } 146 | -------------------------------------------------------------------------------- /esp32-m5stack-cores3/src/http.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | #include "main.h" 7 | 8 | #ifndef MIN 9 | #define MIN(a, b) (((a) < (b)) ? (a) : (b)) 10 | #endif 11 | 12 | static esp_err_t http_event_handler(esp_http_client_event_t *evt) { 13 | static int output_len; 14 | switch (evt->event_id) { 15 | case HTTP_EVENT_REDIRECT: 16 | ESP_LOGD(LOG_TAG, "HTTP_EVENT_REDIRECT"); 17 | esp_http_client_set_header(evt->client, "From", "user@example.com"); 18 | esp_http_client_set_header(evt->client, "Accept", "text/html"); 19 | esp_http_client_set_redirection(evt->client); 20 | break; 21 | case HTTP_EVENT_ERROR: 22 | ESP_LOGD(LOG_TAG, "HTTP_EVENT_ERROR"); 23 | break; 24 | case HTTP_EVENT_ON_CONNECTED: 25 | ESP_LOGD(LOG_TAG, "HTTP_EVENT_ON_CONNECTED"); 26 | break; 27 | case HTTP_EVENT_HEADER_SENT: 28 | ESP_LOGD(LOG_TAG, "HTTP_EVENT_HEADER_SENT"); 29 | break; 30 | case HTTP_EVENT_ON_HEADER: 31 | ESP_LOGD(LOG_TAG, "HTTP_EVENT_ON_HEADER, key=%s, value=%s", 32 | evt->header_key, evt->header_value); 33 | break; 34 | case HTTP_EVENT_ON_DATA: { 35 | ESP_LOGD(LOG_TAG, "HTTP_EVENT_ON_DATA, len=%d", evt->data_len); 36 | if (esp_http_client_is_chunked_response(evt->client)) { 37 | ESP_LOGE(LOG_TAG, "Chunked HTTP response not supported"); 38 | #ifndef LINUX_BUILD 39 | esp_restart(); 40 | #endif 41 | } 42 | 43 | if (output_len == 0 && evt->user_data) { 44 | memset(evt->user_data, 0, MAX_HTTP_OUTPUT_BUFFER); 45 | } 46 | 47 | // If user_data buffer is configured, copy the response into the buffer 48 | int copy_len = 0; 49 | if (evt->user_data) { 50 | // The last byte in evt->user_data is kept for the NULL character in 51 | // case of out-of-bound access. 52 | copy_len = MIN(evt->data_len, (MAX_HTTP_OUTPUT_BUFFER - output_len)); 53 | if (copy_len) { 54 | memcpy(((char *)evt->user_data) + output_len, evt->data, copy_len); 55 | } 56 | } 57 | output_len += copy_len; 58 | 59 | break; 60 | } 61 | case HTTP_EVENT_ON_FINISH: 62 | ESP_LOGD(LOG_TAG, "HTTP_EVENT_ON_FINISH"); 63 | output_len = 0; 64 | break; 65 | case HTTP_EVENT_DISCONNECTED: 66 | ESP_LOGI(LOG_TAG, "HTTP_EVENT_DISCONNECTED"); 67 | output_len = 0; 68 | break; 69 | } 70 | return ESP_OK; 71 | } 72 | 73 | void pipecat_http_request(char *offer, char *answer) { 74 | esp_http_client_config_t config; 75 | memset(&config, 0, sizeof(esp_http_client_config_t)); 76 | 77 | config.url = PIPECAT_SMALLWEBRTC_URL; 78 | config.event_handler = http_event_handler; 79 | config.timeout_ms = HTTP_TIMEOUT_MS; 80 | config.user_data = answer; 81 | 82 | ESP_LOGI(LOG_TAG, "Connecting to %s", config.url); 83 | 84 | cJSON *j_offer = cJSON_CreateObject(); 85 | if (j_offer == NULL) { 86 | ESP_LOGE(LOG_TAG, "Unable to create JSON offer"); 87 | return; 88 | } 89 | if (cJSON_AddStringToObject(j_offer, "sdp", offer) == NULL) { 90 | cJSON_Delete(j_offer); 91 | ESP_LOGE(LOG_TAG, "Unable to create JSON offer"); 92 | return; 93 | } 94 | if (cJSON_AddStringToObject(j_offer, "type", "offer") == NULL) { 95 | cJSON_Delete(j_offer); 96 | ESP_LOGE(LOG_TAG, "Unable to create JSON offer"); 97 | return; 98 | } 99 | 100 | ESP_LOGD(LOG_TAG, "OFFER\n%s", offer); 101 | 102 | char *j_offer_str = cJSON_Print(j_offer); 103 | 104 | cJSON_Delete(j_offer); 105 | 106 | esp_http_client_handle_t client = esp_http_client_init(&config); 107 | esp_http_client_set_method(client, HTTP_METHOD_POST); 108 | esp_http_client_set_header(client, "Content-Type", "application/json"); 109 | esp_http_client_set_post_field(client, j_offer_str, strlen(j_offer_str)); 110 | 111 | esp_err_t err = esp_http_client_perform(client); 112 | int status_code = esp_http_client_get_status_code(client); 113 | if (err != ESP_OK || status_code != 200) { 114 | ESP_LOGE(LOG_TAG, "Error perform http request %s (status %d)", 115 | esp_err_to_name(err), status_code); 116 | #ifndef LINUX_BUILD 117 | esp_restart(); 118 | #endif 119 | } 120 | 121 | cJSON *j_response = cJSON_Parse((const char *)answer); 122 | if (j_response == NULL) { 123 | ESP_LOGE(LOG_TAG, "Error parsing HTTP response"); 124 | #ifndef LINUX_BUILD 125 | esp_restart(); 126 | #endif 127 | } 128 | 129 | cJSON *j_answer = cJSON_GetObjectItem(j_response, "sdp"); 130 | if (j_answer == NULL) { 131 | ESP_LOGE(LOG_TAG, "Unable to find `sdp` field in response"); 132 | #ifndef LINUX_BUILD 133 | esp_restart(); 134 | #endif 135 | } 136 | 137 | memset(answer, 0, MAX_HTTP_OUTPUT_BUFFER + 1); 138 | memcpy(answer, j_answer->valuestring, strlen(j_answer->valuestring)); 139 | 140 | ESP_LOGD(LOG_TAG, "ANSWER\n%s", answer); 141 | 142 | cJSON_Delete(j_response); 143 | 144 | esp_http_client_cleanup(client); 145 | } 146 | -------------------------------------------------------------------------------- /esp32-s3-box-3/dependencies.lock: -------------------------------------------------------------------------------- 1 | dependencies: 2 | espressif/button: 3 | component_hash: f53face2ab21fa0ffaf4cf0f6e513d393f56df6586bb2ad1146120f03f19ee05 4 | dependencies: 5 | - name: espressif/cmake_utilities 6 | registry_url: https://components.espressif.com 7 | require: private 8 | version: '*' 9 | - name: idf 10 | require: private 11 | version: '>=4.0' 12 | source: 13 | registry_url: https://components.espressif.com 14 | type: service 15 | version: 4.1.3 16 | espressif/cmake_utilities: 17 | component_hash: 351350613ceafba240b761b4ea991e0f231ac7a9f59a9ee901f751bddc0bb18f 18 | dependencies: 19 | - name: idf 20 | require: private 21 | version: '>=4.1' 22 | source: 23 | registry_url: https://components.espressif.com 24 | type: service 25 | version: 0.5.3 26 | espressif/esp-box-3: 27 | component_hash: e064b82b6866f3d7c9d2165403bb1d0d426403b3f45e4be95c361f855f8843b4 28 | dependencies: 29 | - name: espressif/button 30 | registry_url: https://components.espressif.com 31 | require: public 32 | version: ^4 33 | - name: espressif/esp_codec_dev 34 | registry_url: https://components.espressif.com 35 | require: public 36 | version: ~1.3.1 37 | - name: espressif/esp_lcd_ili9341 38 | registry_url: https://components.espressif.com 39 | require: private 40 | version: ^1 41 | - name: espressif/esp_lcd_touch_gt911 42 | registry_url: https://components.espressif.com 43 | require: private 44 | version: ^1 45 | - name: espressif/esp_lcd_touch_tt21100 46 | registry_url: https://components.espressif.com 47 | require: private 48 | version: ^1 49 | - name: espressif/esp_lvgl_port 50 | registry_url: https://components.espressif.com 51 | require: public 52 | version: ^2 53 | - name: espressif/icm42670 54 | registry_url: https://components.espressif.com 55 | require: public 56 | version: ^2.0.1 57 | - name: idf 58 | require: private 59 | version: '>=5.3' 60 | source: 61 | registry_url: https://components.espressif.com/ 62 | type: service 63 | targets: 64 | - esp32s3 65 | version: 3.0.0~1 66 | espressif/esp_codec_dev: 67 | component_hash: c71e2d13dad6fc41561590dd88dbc45c79e3f4ef48d5ee3575c60e8b6c8e79d5 68 | dependencies: 69 | - name: idf 70 | require: private 71 | version: '>=4.0' 72 | source: 73 | registry_url: https://components.espressif.com 74 | type: service 75 | version: 1.3.5 76 | espressif/esp_lcd_ili9341: 77 | component_hash: 31f1b793aa2110dd2ae071c21ccbff0a4eb20d9a4ee40b6294c0dc0ad9552c4e 78 | dependencies: 79 | - name: idf 80 | require: private 81 | version: '>=4.4' 82 | - name: espressif/cmake_utilities 83 | registry_url: https://components.espressif.com 84 | require: private 85 | version: 0.* 86 | source: 87 | registry_url: https://components.espressif.com 88 | type: service 89 | version: 1.2.0 90 | espressif/esp_lcd_touch: 91 | component_hash: 779b4ba2464a3ae85681e4b860caa5fdc35801458c23f3039ee761bae7f442a4 92 | dependencies: 93 | - name: idf 94 | require: private 95 | version: '>=4.4.2' 96 | source: 97 | registry_url: https://components.espressif.com 98 | type: service 99 | version: 1.1.2 100 | espressif/esp_lcd_touch_gt911: 101 | component_hash: acc1c184358aa29ef72506f618c9c76a8cc2bf12af38a2bff3d44d84f3a08857 102 | dependencies: 103 | - name: espressif/esp_lcd_touch 104 | registry_url: https://components.espressif.com 105 | require: public 106 | version: ^1.1.0 107 | - name: idf 108 | require: private 109 | version: '>=4.4.2' 110 | source: 111 | registry_url: https://components.espressif.com 112 | type: service 113 | version: 1.1.3 114 | espressif/esp_lcd_touch_tt21100: 115 | component_hash: 31894c5572927cb2dc65e5087572c4bd71d572fda3ef7e3320cb735af228c423 116 | dependencies: 117 | - name: espressif/esp_lcd_touch 118 | registry_url: https://components.espressif.com 119 | require: public 120 | version: ^1.1.0 121 | - name: idf 122 | require: private 123 | version: '>=4.4.2' 124 | source: 125 | registry_url: https://components.espressif.com 126 | type: service 127 | version: 1.1.1 128 | espressif/esp_lvgl_port: 129 | component_hash: e720c95cf0667554a204591bb5fade4655fb2990465557041200fa44b5bc7556 130 | dependencies: 131 | - name: idf 132 | require: private 133 | version: '>=4.4' 134 | - name: lvgl/lvgl 135 | registry_url: https://components.espressif.com 136 | require: public 137 | version: '>=8,<10' 138 | source: 139 | registry_url: https://components.espressif.com 140 | type: service 141 | version: 2.6.0 142 | espressif/icm42670: 143 | component_hash: 28b56e174f75c70037f5208aaed6c3789f0d243500d975519584bf9dc8c0836c 144 | dependencies: 145 | - name: idf 146 | require: private 147 | version: '>=5.2' 148 | source: 149 | registry_url: https://components.espressif.com 150 | type: service 151 | version: 2.0.2 152 | idf: 153 | source: 154 | type: idf 155 | version: 5.4.2 156 | lvgl/lvgl: 157 | component_hash: b702d642e03e95928046d5c6726558e6444e112420c77efa5fdb6650b0a13c5d 158 | dependencies: [] 159 | source: 160 | registry_url: https://components.espressif.com 161 | type: service 162 | version: 9.3.0 163 | direct_dependencies: 164 | - espressif/esp-box-3 165 | - idf 166 | manifest_hash: 55f6a702498de8e87201da504a38c5c958e2094ec832c4f8d6d368369d5d0625 167 | target: esp32s3 168 | version: 2.0.0 169 | -------------------------------------------------------------------------------- /esp32-m5stack-atoms3r/src/media.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include 4 | #include 5 | #include 6 | 7 | #include 8 | #include 9 | #include 10 | 11 | #include "esp_check.h" 12 | #include "esp_log.h" 13 | #include "esp_heap_caps.h" 14 | 15 | #include 16 | #include 17 | #include 18 | 19 | #include "driver/i2c_master.h" 20 | #include 21 | #include "freertos/ringbuf.h" 22 | 23 | #include "main.h" 24 | 25 | #define SAMPLE_RATE (16000) 26 | 27 | #define OPUS_BUFFER_SIZE 1276 // 1276 bytes is recommended by opus_encode 28 | #define PCM_BUFFER_SIZE 640 29 | #define PLAY_BUFFER_SIZE 50 30 | 31 | #define OPUS_ENCODER_BITRATE 30000 32 | #define OPUS_ENCODER_COMPLEXITY 0 33 | 34 | i2c_master_bus_handle_t i2c_bus; 35 | esp_codec_dev_handle_t audio_dev; 36 | 37 | void configure_pi4ioe(void) { 38 | i2c_master_dev_handle_t i2c_device; 39 | i2c_device_config_t i2c_device_cfg = { 40 | .dev_addr_length = I2C_ADDR_BIT_LEN_7, 41 | .device_address = 0x43, // PI4IOE Address 42 | .scl_speed_hz = 400 * 1000, 43 | .scl_wait_us = 0, 44 | .flags = { 45 | .disable_ack_check = 0, 46 | }, 47 | }; 48 | ESP_ERROR_CHECK(i2c_master_bus_add_device(i2c_bus, &i2c_device_cfg, &i2c_device)); 49 | 50 | auto writeRegister = [=](uint8_t reg, uint8_t value) { 51 | uint8_t buffer[2] = {reg, value}; 52 | ESP_ERROR_CHECK(i2c_master_transmit(i2c_device, buffer, 2, 100)); 53 | }; 54 | 55 | writeRegister(0x07, 0x00); // Set to high-impedance 56 | writeRegister(0x0D, 0xFF); // Enable pull-up 57 | writeRegister(0x03, 0x6E); // Set input=0, output=1 58 | writeRegister(0x05, 0xFF); // Unmute speaker 59 | i2c_master_bus_rm_device(i2c_device); 60 | } 61 | 62 | void configure_es8311(void) { 63 | i2s_chan_config_t chan_cfg = { 64 | .id = I2S_NUM_0, 65 | .role = I2S_ROLE_MASTER, 66 | .dma_desc_num = 6, 67 | .dma_frame_num = 240, 68 | .auto_clear_after_cb = true, 69 | .auto_clear_before_cb = false, 70 | .intr_priority = 0, 71 | }; 72 | 73 | i2s_chan_handle_t tx_handle = nullptr, rx_handle = nullptr; 74 | ESP_ERROR_CHECK(i2s_new_channel(&chan_cfg, &tx_handle, &rx_handle)); 75 | 76 | i2s_std_config_t std_cfg = { 77 | .clk_cfg = { 78 | .sample_rate_hz = SAMPLE_RATE, 79 | .clk_src = I2S_CLK_SRC_DEFAULT, 80 | .ext_clk_freq_hz = 0, 81 | .mclk_multiple = I2S_MCLK_MULTIPLE_256, 82 | }, 83 | .slot_cfg = { 84 | .data_bit_width = I2S_DATA_BIT_WIDTH_16BIT, 85 | .slot_bit_width = I2S_SLOT_BIT_WIDTH_AUTO, 86 | .slot_mode = I2S_SLOT_MODE_STEREO, 87 | .slot_mask = I2S_STD_SLOT_BOTH, 88 | .ws_width = I2S_DATA_BIT_WIDTH_16BIT, 89 | .ws_pol = false, 90 | .bit_shift = true, 91 | .left_align = true, 92 | .big_endian = false, 93 | .bit_order_lsb = false 94 | }, 95 | .gpio_cfg = { 96 | .mclk = GPIO_NUM_NC, 97 | .bclk = GPIO_NUM_8, 98 | .ws = GPIO_NUM_6, 99 | .dout = GPIO_NUM_5, 100 | .din = GPIO_NUM_7, 101 | .invert_flags = { 102 | .mclk_inv = false, 103 | .bclk_inv = false, 104 | .ws_inv = false 105 | } 106 | } 107 | }; 108 | 109 | ESP_ERROR_CHECK(i2s_channel_init_std_mode(tx_handle, &std_cfg)); 110 | ESP_ERROR_CHECK(i2s_channel_init_std_mode(rx_handle, &std_cfg)); 111 | 112 | audio_codec_i2s_cfg_t i2s_cfg = { 113 | .port = I2S_NUM_0, 114 | .rx_handle = rx_handle, 115 | .tx_handle = tx_handle, 116 | }; 117 | audio_codec_i2c_cfg_t i2c_cfg = { 118 | .port = I2C_NUM_1, 119 | .addr = ES8311_CODEC_DEFAULT_ADDR, 120 | .bus_handle = i2c_bus, 121 | }; 122 | es8311_codec_cfg_t es8311_cfg = { 123 | .ctrl_if = audio_codec_new_i2c_ctrl(&i2c_cfg), 124 | .gpio_if = audio_codec_new_gpio(), 125 | .codec_mode = ESP_CODEC_DEV_WORK_MODE_BOTH, 126 | .pa_pin = GPIO_NUM_NC, 127 | .use_mclk = false, 128 | .hw_gain = { 129 | .pa_voltage = 5.0, 130 | .codec_dac_voltage = 3.3 131 | } 132 | }; 133 | 134 | esp_codec_dev_cfg_t dev_cfg = { 135 | .dev_type = ESP_CODEC_DEV_TYPE_IN_OUT, 136 | .codec_if = es8311_codec_new(&es8311_cfg), 137 | .data_if = audio_codec_new_i2s_data(&i2s_cfg), 138 | }; 139 | audio_dev = esp_codec_dev_new(&dev_cfg); 140 | 141 | esp_codec_dev_sample_info_t fs = { 142 | .bits_per_sample = 16, 143 | .channel = 1, 144 | .channel_mask = 0, 145 | .sample_rate = SAMPLE_RATE, 146 | .mclk_multiple = 0, 147 | }; 148 | ESP_ERROR_CHECK(esp_codec_dev_open(audio_dev, &fs)); 149 | ESP_ERROR_CHECK(esp_codec_dev_set_in_gain(audio_dev, 30.0)); 150 | ESP_ERROR_CHECK(esp_codec_dev_set_out_vol(audio_dev, 100)); 151 | } 152 | 153 | int record_audio(void* dest, int size) { 154 | ESP_ERROR_CHECK_WITHOUT_ABORT(esp_codec_dev_read(audio_dev, (void*)dest, size)); 155 | return size; 156 | } 157 | 158 | void pipecat_init_audio_capture() { 159 | i2c_master_bus_config_t i2c_bus_cfg = { 160 | .i2c_port = I2C_NUM_1, 161 | .sda_io_num = GPIO_NUM_38, 162 | .scl_io_num = GPIO_NUM_39, 163 | .clk_source = I2C_CLK_SRC_DEFAULT, 164 | .glitch_ignore_cnt = 7, 165 | .intr_priority = 0, 166 | .trans_queue_depth = 0, 167 | .flags = { 168 | .enable_internal_pullup = 1, 169 | }, 170 | }; 171 | ESP_ERROR_CHECK(i2c_new_master_bus(&i2c_bus_cfg, &i2c_bus)); 172 | configure_pi4ioe(); 173 | configure_es8311(); 174 | } 175 | 176 | opus_int16 *decoder_buffer = NULL; 177 | RingbufHandle_t decoder_buffer_queue; 178 | std::atomic play_task_buffer_idx = 0; 179 | 180 | int play_audio(const void* data, int size) { 181 | ESP_ERROR_CHECK_WITHOUT_ABORT(esp_codec_dev_write(audio_dev, (void*)data, size)); 182 | return size; 183 | } 184 | 185 | static void play_task(void *arg) { 186 | size_t len; 187 | uint8_t *play_task_buffer[PLAY_BUFFER_SIZE + 1] = {0}; 188 | 189 | while (1) { 190 | auto audio_buffer = (uint8_t *) xRingbufferReceive(decoder_buffer_queue, &len, portMAX_DELAY); 191 | play_task_buffer_idx++; 192 | 193 | if (play_task_buffer_idx < PLAY_BUFFER_SIZE) { 194 | play_task_buffer[play_task_buffer_idx] = audio_buffer; 195 | continue; 196 | } 197 | 198 | if (play_task_buffer_idx == PLAY_BUFFER_SIZE) { 199 | for (auto i = 1; i < PLAY_BUFFER_SIZE; i++) { 200 | play_audio(play_task_buffer[i], PCM_BUFFER_SIZE); 201 | vRingbufferReturnItem(decoder_buffer_queue, play_task_buffer[i]); 202 | } 203 | } 204 | 205 | play_audio(audio_buffer, PCM_BUFFER_SIZE); 206 | vRingbufferReturnItem(decoder_buffer_queue, audio_buffer); 207 | } 208 | } 209 | 210 | OpusDecoder *opus_decoder = NULL; 211 | StaticRingbuffer_t rb_struct; 212 | 213 | void pipecat_init_audio_decoder() { 214 | int decoder_error = 0; 215 | opus_decoder = opus_decoder_create(SAMPLE_RATE, 1, &decoder_error); 216 | if (decoder_error != OPUS_OK) { 217 | printf("Failed to create OPUS decoder"); 218 | return; 219 | } 220 | 221 | decoder_buffer = (opus_int16 *)malloc(PCM_BUFFER_SIZE); 222 | 223 | auto ring_buffer_size = PCM_BUFFER_SIZE * (PLAY_BUFFER_SIZE) + (PLAY_BUFFER_SIZE * 10); 224 | decoder_buffer_queue = xRingbufferCreateStatic(ring_buffer_size, RINGBUF_TYPE_NOSPLIT, (uint8_t *) malloc(ring_buffer_size), &rb_struct); 225 | xTaskCreate(play_task, "play_task", 4096, NULL, 5, NULL); 226 | } 227 | 228 | 229 | std::atomic is_playing = false; 230 | 231 | unsigned int silence_count = 0; 232 | 233 | void set_is_playing(int16_t *in_buf, size_t in_samples) { 234 | bool any_set = false; 235 | for (size_t i = 0; i < in_samples; i++) { 236 | if (in_buf[i] != -1 && in_buf[i] != 0 && in_buf[i] != 1) { 237 | any_set = true; 238 | } 239 | } 240 | 241 | if (any_set) { 242 | silence_count = 0; 243 | } else { 244 | silence_count++; 245 | } 246 | 247 | if (silence_count >= 20 && is_playing) { 248 | is_playing = false; 249 | play_task_buffer_idx = 0; 250 | } else if (any_set && !is_playing) { 251 | is_playing = true; 252 | } 253 | } 254 | 255 | void pipecat_audio_decode(uint8_t *data, size_t size) { 256 | int decoded_size = opus_decode(opus_decoder, data, size, decoder_buffer, PCM_BUFFER_SIZE, 0); 257 | 258 | if (decoded_size > 0) { 259 | set_is_playing(decoder_buffer, decoded_size); 260 | if (!is_playing) { 261 | return; 262 | } 263 | 264 | xRingbufferSend(decoder_buffer_queue, decoder_buffer, PCM_BUFFER_SIZE, 0); 265 | } 266 | } 267 | 268 | OpusEncoder *opus_encoder = NULL; 269 | uint8_t *encoder_output_buffer = NULL; 270 | int16_t *read_buffer = NULL; 271 | 272 | void pipecat_init_audio_encoder() { 273 | int encoder_error; 274 | opus_encoder = opus_encoder_create(SAMPLE_RATE, 1, OPUS_APPLICATION_VOIP, 275 | &encoder_error); 276 | if (encoder_error != OPUS_OK) { 277 | printf("Failed to create OPUS encoder"); 278 | return; 279 | } 280 | 281 | if (opus_encoder_init(opus_encoder, SAMPLE_RATE, 1, OPUS_APPLICATION_VOIP) != 282 | OPUS_OK) { 283 | printf("Failed to initialize OPUS encoder"); 284 | return; 285 | } 286 | 287 | opus_encoder_ctl(opus_encoder, OPUS_SET_BITRATE(OPUS_ENCODER_BITRATE)); 288 | opus_encoder_ctl(opus_encoder, OPUS_SET_COMPLEXITY(OPUS_ENCODER_COMPLEXITY)); 289 | opus_encoder_ctl(opus_encoder, OPUS_SET_SIGNAL(OPUS_SIGNAL_VOICE)); 290 | 291 | read_buffer = (int16_t *)heap_caps_malloc(PCM_BUFFER_SIZE, MALLOC_CAP_DEFAULT); 292 | encoder_output_buffer = (uint8_t *)malloc(OPUS_BUFFER_SIZE); 293 | } 294 | 295 | void pipecat_send_audio(PeerConnection *peer_connection) { 296 | if (is_playing) { 297 | memset(read_buffer, 0, PCM_BUFFER_SIZE); 298 | vTaskDelay(pdMS_TO_TICKS(20)); 299 | } else { 300 | record_audio(read_buffer, PCM_BUFFER_SIZE); 301 | } 302 | 303 | auto encoded_size = opus_encode(opus_encoder, (const opus_int16 *)read_buffer, 304 | PCM_BUFFER_SIZE / sizeof(uint16_t), 305 | encoder_output_buffer, OPUS_BUFFER_SIZE); 306 | peer_connection_send_audio(peer_connection, encoder_output_buffer, 307 | encoded_size); 308 | } 309 | --------------------------------------------------------------------------------