├── components └── peer │ ├── include │ ├── peer.h │ ├── peer_signaling.h │ └── peer_connection.h │ ├── CMakeLists.txt │ ├── Kconfig.projbuild │ └── config.h ├── .gitignore ├── src ├── bsp.h ├── idf_component.yml ├── main.h ├── CMakeLists.txt ├── bsp.cpp ├── main.cpp ├── index.html ├── http.cpp ├── webrtc.cpp ├── Kconfig.projbuild ├── media.cpp └── wifi.cpp ├── api_key_configuration.png ├── partitions.csv ├── .clang-format ├── .github └── workflows │ ├── clang-format-check.yml │ ├── build.yaml │ └── release.yaml ├── sdkconfig.defaults.cores3 ├── .gitmodules ├── sdkconfig.defaults.atoms3 ├── sdkconfig.defaults.atom_lite ├── dependencies.lock ├── LICENSE ├── CMakeLists.txt ├── sdkconfig.defaults ├── script └── pack_firmware.py └── README.md /components/peer/include/peer.h: -------------------------------------------------------------------------------- 1 | ../../../deps/libpeer/src/peer.h -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | build 2 | sdkconfig 3 | sdkconfig.old 4 | managed_components 5 | -------------------------------------------------------------------------------- /components/peer/include/peer_signaling.h: -------------------------------------------------------------------------------- 1 | ../../../deps/libpeer/src/peer_signaling.h -------------------------------------------------------------------------------- /components/peer/include/peer_connection.h: -------------------------------------------------------------------------------- 1 | ../../../deps/libpeer/src/peer_connection.h -------------------------------------------------------------------------------- /src/bsp.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | // @brief Reset WiFi and API key provisioning. 4 | bool bsp_check_reset_provisioning(); -------------------------------------------------------------------------------- /api_key_configuration.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ciniml/openai-realtime-embedded-sdk/HEAD/api_key_configuration.png -------------------------------------------------------------------------------- /src/idf_component.yml: -------------------------------------------------------------------------------- 1 | dependencies: 2 | idf: 3 | version: ">=4.1.0" 4 | qrcode: "^0.1.0" 5 | espressif/mdns: "^1.4.2" 6 | -------------------------------------------------------------------------------- /partitions.csv: -------------------------------------------------------------------------------- 1 | # ESP-IDF Partition Table 2 | # Name, Type, SubType, Offset, Size, Flags 3 | nvs, data, nvs, 0x9000, 0x6000, 4 | phy_init, data, phy, 0xf000, 0x1000, 5 | factory, app, factory, 0x10000, 0x180000, 6 | 7 | -------------------------------------------------------------------------------- /.clang-format: -------------------------------------------------------------------------------- 1 | BasedOnStyle: Google 2 | IndentWidth: 2 3 | ColumnLimit: 80 4 | AllowShortFunctionsOnASingleLine: Empty 5 | AllowShortIfStatementsOnASingleLine: false 6 | AllowShortLoopsOnASingleLine: false 7 | BreakBeforeBraces: Attach 8 | DerivePointerAlignment: false 9 | PointerAlignment: Right 10 | -------------------------------------------------------------------------------- /src/main.h: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #define LOG_TAG "realtimeapi-sdk" 4 | #define MAX_HTTP_OUTPUT_BUFFER 2048 5 | 6 | void oai_wifi(void); 7 | void oai_init_audio_capture(void); 8 | void oai_init_audio_decoder(void); 9 | void oai_init_audio_encoder(); 10 | void oai_send_audio(PeerConnection *peer_connection); 11 | void oai_audio_decode(uint8_t *data, size_t size); 12 | void oai_webrtc(); 13 | void oai_http_request(char *offer, char *answer); 14 | -------------------------------------------------------------------------------- /.github/workflows/clang-format-check.yml: -------------------------------------------------------------------------------- 1 | name: clang-format Check 2 | on: [push, pull_request] 3 | jobs: 4 | formatting-check: 5 | name: Formatting Check 6 | runs-on: ubuntu-latest 7 | steps: 8 | - uses: actions/checkout@v4 9 | - name: Run clang-format style check for C/C++/Protobuf programs. 10 | uses: jidicula/clang-format-action@v4.13.0 11 | with: 12 | clang-format-version: '17' 13 | check-path: 'src' 14 | -------------------------------------------------------------------------------- /sdkconfig.defaults.cores3: -------------------------------------------------------------------------------- 1 | ## For CoreS3 2 | CONFIG_ESPTOOLPY_FLASHMODE_QIO=y 3 | CONFIG_ESPTOOLPY_FLASHSIZE_16MB=y 4 | CONFIG_SPIRAM_MODE_QUAD=y 5 | CONFIG_SPIRAM_SPEED_80M=y 6 | CONFIG_SPIRAM_SPEED=80 7 | CONFIG_FLASHMODE_QIO=y 8 | 9 | CONFIG_MEDIA_INIT_MICROPHONE_AND_SPEAKER=y 10 | CONFIG_MEDIA_I2S_RX_TX_SHARED=y 11 | CONFIG_MEDIA_I2S_RX_DATA_PIN=14 12 | CONFIG_MEDIA_I2S_TX_MCLK_PIN=0 13 | CONFIG_MEDIA_I2S_TX_BCLK_PIN=34 14 | CONFIG_MEDIA_I2S_TX_LRCLK_PIN=33 15 | CONFIG_MEDIA_I2S_TX_DATA_PIN=13 16 | CONFIG_MEDIA_I2S_TX_SLOT_LEFT_ONLY=y 17 | 18 | CONFIG_USE_WIFI_PROVISIONING_SOFTAP=y 19 | CONFIG_BSP_RESET_PROVISIONING_NONE=y -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "components/srtp"] 2 | path = components/srtp 3 | url = https://git@github.com/sepfy/esp_ports 4 | [submodule "deps/libpeer"] 5 | path = deps/libpeer 6 | url = https://github.com/ciniml/libpeer.git 7 | [submodule "components/esp-libopus"] 8 | path = components/esp-libopus 9 | url = https://github.com/XasWorks/esp-libopus.git 10 | [submodule "components/esp-protocols"] 11 | path = components/esp-protocols 12 | url = https://github.com/espressif/esp-protocols.git 13 | [submodule "components/M5Unified"] 14 | path = components/M5Unified 15 | url = https://github.com/m5stack/M5Unified.git 16 | [submodule "components/M5GFX"] 17 | path = components/M5GFX 18 | url = https://github.com/m5stack/M5GFX.git 19 | -------------------------------------------------------------------------------- /.github/workflows/build.yaml: -------------------------------------------------------------------------------- 1 | name: Build 2 | on: 3 | push: 4 | branches: 5 | - master 6 | pull_request: 7 | 8 | jobs: 9 | build: 10 | strategy: 11 | matrix: 12 | target: [esp32s3, linux] 13 | 14 | runs-on: ubuntu-latest 15 | 16 | steps: 17 | - name: Checkout repo 18 | uses: actions/checkout@v2 19 | with: 20 | submodules: 'recursive' 21 | 22 | - name: Build 23 | run: | 24 | docker run -v $PWD:/project -w /project -u 0 \ 25 | -e HOME=/tmp -e WIFI_SSID=A -e WIFI_PASSWORD=B -e OPENAI_API_KEY=X \ 26 | espressif/idf:latest \ 27 | /bin/bash -c 'idf.py --preview set-target ${{ matrix.target }} && idf.py build' 28 | shell: bash 29 | -------------------------------------------------------------------------------- /components/peer/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | set(PEER_PROJECT_PATH "../../deps/libpeer") 2 | file(GLOB CODES "${PEER_PROJECT_PATH}/src/*.c") 3 | 4 | idf_component_register( 5 | SRCS ${CODES} 6 | INCLUDE_DIRS include 7 | PRIV_INCLUDE_DIRS "${PEER_PROJECT_PATH}/src" 8 | REQUIRES mbedtls srtp json esp_netif 9 | ) 10 | 11 | # Overwrite config.h 12 | file(READ ${CMAKE_CURRENT_SOURCE_DIR}/config.h INPUT_CONTENT) 13 | file(WRITE ${CMAKE_CURRENT_SOURCE_DIR}/../../deps/libpeer/src/config.h ${INPUT_CONTENT}) 14 | 15 | if(NOT IDF_TARGET STREQUAL linux) 16 | add_definitions("-DESP32 -DCONFIG_USE_LWIP=1 -D__BYTE_ORDER=__LITTLE_ENDIAN") 17 | endif() 18 | 19 | add_definitions("-DHTTP_DO_NOT_USE_CUSTOM_CONFIG -DMQTT_DO_NOT_USE_CUSTOM_CONFIG -DCONFIG_USE_USRSCTP=0 -DDISABLE_PEER_SIGNALING=0") 20 | -------------------------------------------------------------------------------- /sdkconfig.defaults.atoms3: -------------------------------------------------------------------------------- 1 | ## For AtomS3 + Atomic Speaker + PDM Unit 2 | CONFIG_ESPTOOLPY_FLASHMODE_QIO=y 3 | CONFIG_ESPTOOLPY_FLASHSIZE_8MB=y 4 | 5 | CONFIG_MEDIA_I2S_RX_PDM=y 6 | CONFIG_MEDIA_I2S_RX_MCLK_PIN=-1 7 | CONFIG_MEDIA_I2S_RX_BCLK_PIN=-1 8 | CONFIG_MEDIA_I2S_RX_LRCLK_PIN=1 9 | CONFIG_MEDIA_I2S_RX_DATA_PIN=2 10 | CONFIG_MEDIA_I2S_TX_MCLK_PIN=-1 11 | CONFIG_MEDIA_I2S_TX_BCLK_PIN=5 12 | CONFIG_MEDIA_I2S_TX_LRCLK_PIN=39 13 | CONFIG_MEDIA_I2S_TX_DATA_PIN=38 14 | 15 | CONFIG_LIBPEER_VIDEO_BUFFER_SIZE_MTUS=32 16 | CONFIG_LIBPEER_AUDIO_BUFFER_SIZE_MTUS=32 17 | CONFIG_LIBPEER_DATA_BUFFER_SIZE_MTUS=8 18 | 19 | CONFIG_USE_WIFI_PROVISIONING_SOFTAP=y 20 | CONFIG_BSP_RESET_PROVISIONING_GPIO=y 21 | CONFIG_BSP_RESET_PROVISIONING_GPIO_NUM=41 22 | CONFIG_BSP_RESET_PROVISIONING_GPIO_LEVEL_LOW=y -------------------------------------------------------------------------------- /sdkconfig.defaults.atom_lite: -------------------------------------------------------------------------------- 1 | ## For Atom Lite + Atomic Speaker + PDM Unit 2 | CONFIG_ESPTOOLPY_FLASHMODE_QIO=y 3 | CONFIG_ESPTOOLPY_FLASHSIZE_4MB=y 4 | 5 | CONFIG_MEDIA_I2S_RX_PDM=y 6 | CONFIG_MEDIA_I2S_RX_MCLK_PIN=-1 7 | CONFIG_MEDIA_I2S_RX_BCLK_PIN=-1 8 | CONFIG_MEDIA_I2S_RX_LRCLK_PIN=32 9 | CONFIG_MEDIA_I2S_RX_DATA_PIN=26 10 | CONFIG_MEDIA_I2S_TX_MCLK_PIN=-1 11 | CONFIG_MEDIA_I2S_TX_BCLK_PIN=22 12 | CONFIG_MEDIA_I2S_TX_LRCLK_PIN=21 13 | CONFIG_MEDIA_I2S_TX_DATA_PIN=25 14 | 15 | CONFIG_LIBPEER_VIDEO_BUFFER_SIZE_MTUS=32 16 | CONFIG_LIBPEER_AUDIO_BUFFER_SIZE_MTUS=32 17 | CONFIG_LIBPEER_DATA_BUFFER_SIZE_MTUS=64 18 | 19 | CONFIG_USE_WIFI_PROVISIONING_SOFTAP=y 20 | CONFIG_BSP_RESET_PROVISIONING_GPIO=y 21 | CONFIG_BSP_RESET_PROVISIONING_GPIO_NUM=39 22 | CONFIG_BSP_RESET_PROVISIONING_GPIO_LEVEL_LOW=y -------------------------------------------------------------------------------- /dependencies.lock: -------------------------------------------------------------------------------- 1 | dependencies: 2 | espressif/mdns: 3 | component_hash: 3ec0af5f6bce310512e90f482388d21cc7c0e99668172d2f895356165fc6f7c5 4 | dependencies: 5 | - name: idf 6 | require: private 7 | version: '>=5.0' 8 | source: 9 | registry_url: https://components.espressif.com/ 10 | type: service 11 | version: 1.8.2 12 | espressif/qrcode: 13 | component_hash: 3b493771bc5d6ad30cbf87c25bf784aada8a08c941504355b55d6b75518ed7bc 14 | dependencies: [] 15 | source: 16 | registry_url: https://components.espressif.com/ 17 | type: service 18 | version: 0.1.0~2 19 | idf: 20 | source: 21 | type: idf 22 | version: 5.3.2 23 | direct_dependencies: 24 | - espressif/mdns 25 | - espressif/qrcode 26 | - idf 27 | manifest_hash: 293c615b22f7162bb2fa099ee58691e9c61c54a6b4b6d288ab4959021bbffddf 28 | target: esp32s3 29 | version: 2.0.0 30 | -------------------------------------------------------------------------------- /src/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | set(COMMON_SRC "webrtc.cpp" "main.cpp" "http.cpp" "bsp.cpp") 2 | 3 | if(IDF_TARGET STREQUAL linux) 4 | idf_component_register( 5 | SRCS ${COMMON_SRC} 6 | REQUIRES peer esp-libopus esp_http_client) 7 | else() 8 | idf_component_register( 9 | SRCS ${COMMON_SRC} "wifi.cpp" "media.cpp" 10 | REQUIRES driver esp_wifi nvs_flash peer esp_psram esp-libopus esp_http_client esp_timer esp_driver_gpio wifi_provisioning esp_http_server mdns M5Unified 11 | EMBED_FILES index.html) 12 | endif() 13 | 14 | idf_component_get_property(lib peer COMPONENT_LIB) 15 | target_compile_options(${lib} PRIVATE -Wno-error=restrict) 16 | target_compile_options(${lib} PRIVATE -Wno-error=stringop-truncation) 17 | 18 | idf_component_get_property(lib srtp COMPONENT_LIB) 19 | target_compile_options(${lib} PRIVATE -Wno-error=incompatible-pointer-types) 20 | 21 | idf_component_get_property(lib esp-libopus COMPONENT_LIB) 22 | target_compile_options(${lib} PRIVATE -Wno-error=maybe-uninitialized) 23 | target_compile_options(${lib} PRIVATE -Wno-error=stringop-overread) 24 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2024 OpenAI 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 6 | 7 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 8 | 9 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 10 | -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.19) 2 | 3 | if(IDF_TARGET STREQUAL linux) 4 | if(NOT DEFINED ENV{OPENAI_API_KEY}) 5 | message(FATAL_ERROR "Env variable OPENAI_API_KEY must be set") 6 | endif() 7 | add_compile_definitions(OPENAI_API_KEY="$ENV{OPENAI_API_KEY}") 8 | endif() 9 | 10 | add_compile_definitions(OPENAI_REALTIMEAPI="https://api.openai.com/v1/realtime?model=gpt-4o-mini-realtime-preview-2024-12-17") 11 | 12 | if(DEFINED ENV{LOG_DATACHANNEL_MESSAGES}) 13 | add_compile_definitions(LOG_DATACHANNEL_MESSAGES="1") 14 | endif() 15 | 16 | set(COMPONENTS src) 17 | set(EXTRA_COMPONENT_DIRS "src" "components/srtp" "components/peer" "components/esp-libopus") 18 | 19 | if(IDF_TARGET STREQUAL linux) 20 | add_compile_definitions(LINUX_BUILD=1) 21 | list(APPEND EXTRA_COMPONENT_DIRS 22 | $ENV{IDF_PATH}/examples/protocols/linux_stubs/esp_stubs 23 | "components/esp-protocols/common_components/linux_compat/esp_timer" 24 | "components/esp-protocols/common_components/linux_compat/freertos" 25 | ) 26 | endif() 27 | 28 | include($ENV{IDF_PATH}/tools/cmake/project.cmake) 29 | project(src) 30 | -------------------------------------------------------------------------------- /sdkconfig.defaults: -------------------------------------------------------------------------------- 1 | # ESP Event Loop on Linux 2 | CONFIG_ESP_EVENT_POST_FROM_ISR=n 3 | CONFIG_ESP_EVENT_POST_FROM_IRAM_ISR=n 4 | 5 | # Disable TLS verification 6 | # Production needs to include specific cert chain you care about 7 | CONFIG_ESP_TLS_INSECURE=y 8 | CONFIG_ESP_TLS_SKIP_SERVER_CERT_VERIFY=y 9 | 10 | # Enable DTLS-SRTP 11 | CONFIG_MBEDTLS_SSL_PROTO_DTLS=y 12 | 13 | # libpeer requires large stack allocations 14 | CONFIG_ESP_MAIN_TASK_STACK_SIZE=10240 15 | 16 | # Defaults to partitions.csv 17 | CONFIG_PARTITION_TABLE_CUSTOM=y 18 | 19 | # Set highest CPU Freq 20 | CONFIG_ESP_DEFAULT_CPU_FREQ_MHZ_240=y 21 | 22 | # No PSRAM by default. 23 | # CONFIG_SPIRAM=y 24 | # CONFIG_SPIRAM_MODE_OCT=y 25 | 26 | # Disable Watchdog 27 | # CONFIG_ESP_INT_WDT is not set 28 | # CONFIG_ESP_TASK_WDT_EN is not set 29 | 30 | # Enable Compiler Optimization 31 | CONFIG_COMPILER_OPTIMIZATION_PERF=y 32 | CONFIG_COMPILER_OPTIMIZATION_ASSERTIONS_DISABLE=y 33 | 34 | # Optimize internal SRAM utilization. 35 | # Allocate WiFi TX buffer dynamically to reduce max RAM usage. 36 | CONFIG_ESP_WIFI_DYNAMIC_TX_BUFFER=y 37 | 38 | # Increase HTTPD max request header length 39 | CONFIG_HTTPD_MAX_REQ_HDR_LEN=1024 -------------------------------------------------------------------------------- /src/bsp.cpp: -------------------------------------------------------------------------------- 1 | #include "bsp.h" 2 | 3 | #include 4 | 5 | constexpr const char* TAG = "oai_bsp"; 6 | 7 | #ifdef CONFIG_BSP_RESET_PROVISIONING_NONE 8 | bool bsp_check_reset_provisioning() { 9 | return false; 10 | } 11 | #endif 12 | #ifdef CONFIG_BSP_RESET_PROVISIONING_GPIO 13 | #include 14 | #include 15 | 16 | bool bsp_check_reset_provisioning() { 17 | gpio_reset_pin(gpio_num_t(CONFIG_BSP_RESET_PROVISIONING_GPIO_NUM)); 18 | const gpio_config_t config = { 19 | .pin_bit_mask = 1ULL << CONFIG_BSP_RESET_PROVISIONING_GPIO_NUM, 20 | .mode = GPIO_MODE_INPUT, 21 | .pull_up_en = GPIO_PULLUP_ENABLE, 22 | .pull_down_en = GPIO_PULLDOWN_DISABLE, 23 | .intr_type = GPIO_INTR_DISABLE, 24 | }; 25 | if( auto err = gpio_config(&config); err != ESP_OK ) { 26 | ESP_LOGE(TAG, "Failed to configure reset provisioning GPIO - %d", err); 27 | return false; 28 | } 29 | vTaskDelay(pdMS_TO_TICKS(100)); 30 | bool pressed = gpio_get_level(gpio_num_t(CONFIG_BSP_RESET_PROVISIONING_GPIO_NUM)) == (CONFIG_BSP_RESET_PROVISIONING_GPIO_LEVEL_LOW ? 0 : 1); 31 | ESP_LOGI(TAG, "Reset provisioning button %s", pressed ? "pressed" : "not pressed"); 32 | return pressed; 33 | } 34 | #endif -------------------------------------------------------------------------------- /script/pack_firmware.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # Pack ESP32 firmware binary for burn with M5Burner 3 | 4 | import re 5 | import pathlib 6 | import sys 7 | 8 | target_pattern = re.compile(r'^(0x[0-9a-fA-F]{1,8})\s+([\w\.\/-]+)') 9 | 10 | targets = [] 11 | with open('build/flash_args') as f: 12 | for line in iter(f.readline, ''): 13 | m = target_pattern.match(line) 14 | if m: 15 | start_address = int(m.group(1), 16) 16 | path = m.group(2) 17 | targets.append((start_address, path)) 18 | 19 | targets.sort(key=lambda x: x[0]) 20 | print(targets) 21 | 22 | output_path = sys.argv[1] if len(sys.argv) > 1 else 'firmware.bin' 23 | 24 | with open(output_path, 'wb') as f: 25 | current_address = 0 26 | for start_address, path in targets: 27 | if current_address < start_address: 28 | data = b'\xff' * (start_address - current_address) 29 | bytes_written = 0 30 | while bytes_written < len(data): 31 | bytes_written += f.write(data[bytes_written:]) 32 | 33 | current_address = start_address 34 | bin_path = pathlib.Path('build').joinpath(path) 35 | with open(bin_path, 'rb') as g: 36 | while True: 37 | data = g.read() 38 | bytes_read = len(data) 39 | if bytes_read == 0: 40 | break 41 | bytes_written = 0 42 | while bytes_written < bytes_read: 43 | bytes_written += f.write(data[bytes_written:]) 44 | current_address += bytes_read -------------------------------------------------------------------------------- /src/main.cpp: -------------------------------------------------------------------------------- 1 | #include "main.h" 2 | 3 | #include 4 | #include 5 | #include 6 | 7 | #ifndef LINUX_BUILD 8 | #include "nvs_flash.h" 9 | #include 10 | #include 11 | 12 | #include 13 | 14 | constexpr const char* TAG = "main"; 15 | 16 | #ifdef CONFIG_ENABLE_HEAP_MONITOR 17 | static esp_timer_handle_t s_monitor_timer; 18 | #endif // CONFIG_ENABLE_HEAP_MONITOR 19 | 20 | extern "C" void app_main(void) { 21 | esp_err_t ret = nvs_flash_init(); 22 | if (ret == ESP_ERR_NVS_NO_FREE_PAGES || 23 | ret == ESP_ERR_NVS_NEW_VERSION_FOUND) { 24 | ESP_ERROR_CHECK(nvs_flash_erase()); 25 | ret = nvs_flash_init(); 26 | } 27 | ESP_ERROR_CHECK(ret); 28 | 29 | #ifdef CONFIG_ENABLE_HEAP_MONITOR 30 | esp_timer_create_args_t timer_args = { 31 | .callback = [](void* arg) { 32 | ESP_LOGW(TAG, "current heap %7d | minimum ever %7d | largest free %7d ", 33 | xPortGetFreeHeapSize(), 34 | xPortGetMinimumEverFreeHeapSize(), 35 | heap_caps_get_largest_free_block(MALLOC_CAP_DEFAULT)); 36 | }, 37 | .arg = nullptr, 38 | .dispatch_method = ESP_TIMER_TASK, 39 | .name = "monitor_timer" 40 | }; 41 | ESP_ERROR_CHECK(esp_timer_create(&timer_args, &s_monitor_timer)); 42 | ESP_ERROR_CHECK(esp_timer_start_periodic(s_monitor_timer, CONFIG_HEAP_MONITOR_INTERVAL_MS * 1000ULL)); 43 | #endif // CONFIG_ENABLE_HEAP_MONITOR 44 | 45 | auto cfg = M5.config(); 46 | cfg.internal_spk = false; 47 | cfg.internal_mic = false; 48 | M5.begin(cfg); 49 | 50 | ESP_ERROR_CHECK(esp_event_loop_create_default()); 51 | peer_init(); 52 | oai_wifi(); 53 | 54 | oai_init_audio_capture(); 55 | oai_init_audio_decoder(); 56 | 57 | oai_webrtc(); 58 | } 59 | #else 60 | int main(void) { 61 | ESP_ERROR_CHECK(esp_event_loop_create_default()); 62 | peer_init(); 63 | oai_webrtc(); 64 | } 65 | #endif 66 | -------------------------------------------------------------------------------- /components/peer/Kconfig.projbuild: -------------------------------------------------------------------------------- 1 | menu "libpeer" 2 | config LIBPEER_SCTP_MTU 3 | int "SCTP MTU" 4 | default 1200 5 | help 6 | The SCTP MTU size. 7 | config LIBPEER_CONFIG_MTU 8 | int "Config MTU" 9 | default 1300 10 | help 11 | The Config MTU size. 12 | config LIBPEER_RSA_KEY_LENGTH 13 | int "RSA Key Length" 14 | default 1024 15 | help 16 | The RSA Key Length. 17 | config LIBPEER_VIDEO_RB_DATA_MTUS 18 | int "Video RB Data MTUs" 19 | default 64 20 | help 21 | The Video RB Data MTUs. 22 | config LIBPEER_AUDIO_RB_DATA_MTUS 23 | int "Audio RB Data MTUs" 24 | default 64 25 | help 26 | The Audio RB Data MTUs. 27 | config LIBPEER_DATA_RB_DATA_MTUS 28 | int "Data RB Data MTUs" 29 | default 128 30 | help 31 | The Data RB Data MTUs. 32 | config LIBPEER_AUDIO_LATENCY_MS 33 | int "Audio Latency (ms)" 34 | default 20 35 | help 36 | The Audio Latency in milliseconds. 37 | config LIBPEER_KEEPALIVE_CONNCHECK 38 | int "Keepalive Conncheck interval" 39 | default 0 40 | help 41 | The Keepalive Conncheck. 42 | config LIBPEER_IPV6 43 | bool "Enable IPv6" 44 | default n 45 | help 46 | Enable IPv6 for libpeer. 47 | config LIBPEER_IFACE_PREFIX 48 | string "Interface Prefix" 49 | default "" 50 | help 51 | The Interface Prefix. 52 | choice LIBPEER_LOG_LEVEL 53 | prompt "Log Level" 54 | default LIBPEER_LOG_LEVEL_INFO 55 | help 56 | The Log Level. 57 | config LIBPEER_LOG_LEVEL_DEBUG 58 | bool "Debug" 59 | config LIBPEER_LOG_LEVEL_INFO 60 | bool "Info" 61 | config LIBPEER_LOG_LEVEL_WARN 62 | bool "Warn" 63 | config LIBPEER_LOG_LEVEL_ERROR 64 | bool "Error" 65 | endchoice 66 | config LIBPEER_LOG_REDIRECT 67 | bool "Redirect Log" 68 | default n 69 | help 70 | Redirect Log. 71 | config LIBPEER_DISABLE_PEER_SIGNALING 72 | bool "Disable Peer Signaling" 73 | default n 74 | help 75 | Disable Peer Signaling. 76 | endmenu 77 | -------------------------------------------------------------------------------- /components/peer/config.h: -------------------------------------------------------------------------------- 1 | #ifndef CONFIG_H_ 2 | #define CONFIG_H_ 3 | 4 | // uncomment this if you want to handshake with a aiortc 5 | // #define CONFIG_DTLS_USE_ECDSA 1 6 | 7 | #define SCTP_MTU (1200) 8 | #define CONFIG_MTU (1300) 9 | 10 | #ifndef CONFIG_USE_LWIP 11 | #define CONFIG_USE_LWIP 0 12 | #endif 13 | 14 | #ifndef CONFIG_MBEDTLS_DEBUG 15 | #define CONFIG_MBEDTLS_DEBUG 0 16 | #endif 17 | 18 | #ifndef CONFIG_MBEDTLS_2_X 19 | #define CONFIG_MBEDTLS_2_X 0 20 | #endif 21 | 22 | #if CONFIG_MBEDTLS_2_X 23 | #define RSA_KEY_LENGTH 512 24 | #else 25 | #define RSA_KEY_LENGTH 1024 26 | #endif 27 | 28 | #ifndef CONFIG_DTLS_USE_ECDSA 29 | #define CONFIG_DTLS_USE_ECDSA 0 30 | #endif 31 | 32 | #ifndef CONFIG_USE_USRSCTP 33 | #define CONFIG_USE_USRSCTP 1 34 | #endif 35 | 36 | #ifndef CONFIG_VIDEO_BUFFER_SIZE 37 | #define CONFIG_VIDEO_BUFFER_SIZE (CONFIG_MTU * CONFIG_LIBPEER_VIDEO_BUFFER_SIZE_MTUS) 38 | #endif 39 | 40 | #ifndef CONFIG_AUDIO_BUFFER_SIZE 41 | #define CONFIG_AUDIO_BUFFER_SIZE (CONFIG_MTU * CONFIG_LIBPEER_AUDIO_BUFFER_SIZE_MTUS) 42 | #endif 43 | 44 | #ifndef CONFIG_DATA_BUFFER_SIZE 45 | #define CONFIG_DATA_BUFFER_SIZE (SCTP_MTU * CONFIG_LIBPEER_DATA_BUFFER_SIZE_MTUS) 46 | #endif 47 | 48 | #ifndef CONFIG_SDP_BUFFER_SIZE 49 | #define CONFIG_SDP_BUFFER_SIZE 8096 50 | #endif 51 | 52 | #ifndef CONFIG_MQTT_BUFFER_SIZE 53 | #define CONFIG_MQTT_BUFFER_SIZE 4096 54 | #endif 55 | 56 | #ifndef CONFIG_HTTP_BUFFER_SIZE 57 | #define CONFIG_HTTP_BUFFER_SIZE 4096 58 | #endif 59 | 60 | #ifndef CONFIG_TLS_READ_TIMEOUT 61 | #define CONFIG_TLS_READ_TIMEOUT 3000 62 | #endif 63 | 64 | #define AUDIO_LATENCY CONFIG_LIBPEER_AUDIO_LATENCY_MS 65 | #define KEEPALIVE_CONNCHECK CONFIG_LIBPEER_KEEPALIVE_CONNCHECK 66 | #define CONFIG_IPV6 0 67 | // empty will use first active interface 68 | #define CONFIG_IFACE_PREFIX "" 69 | 70 | #ifdef CONFIG_LIBPEER_LOG_LEVEL_ERROR 71 | #define LOG_LEVEL LEVEL_ERROR 72 | #endif // CONFIG_LIBPEER_LOG_LEVEL_ERROR 73 | #ifdef CONFIG_LIBPEER_LOG_LEVEL_WARN 74 | #define LOG_LEVEL LEVEL_WARN 75 | #endif // CONFIG_LIBPEER_LOG_LEVEL_WARN 76 | #ifdef CONFIG_LIBPEER_LOG_LEVEL_INFO 77 | #define LOG_LEVEL LEVEL_INFO 78 | #endif // CONFIG_LIBPEER_LOG_LEVEL_INFO 79 | #ifdef CONFIG_LIBPEER_LOG_LEVEL_DEBUG 80 | #define LOG_LEVEL LEVEL_DEBUG 81 | #endif // CONFIG_LIBPEER_LOG_LEVEL_DEBUG 82 | 83 | // #define LOG_LEVEL LEVEL_DEBUG 84 | #define LOG_REDIRECT 0 85 | 86 | // Disable MQTT and HTTP signaling 87 | // #define DISABLE_PEER_SIGNALING 1 88 | 89 | #endif // CONFIG_H_ 90 | -------------------------------------------------------------------------------- /.github/workflows/release.yaml: -------------------------------------------------------------------------------- 1 | name: release-m5stack-binaries 2 | on: 3 | push: 4 | # Sequence of patterns matched against refs/tags 5 | tags: 6 | - 'v*' # Push events to matching v*, i.e. v1.0, v20.15.10 7 | 8 | jobs: 9 | release: 10 | runs-on: ubuntu-latest 11 | container: 12 | image: espressif/idf:release-v5.3 13 | env: 14 | CI_BUILD: 1 15 | timeout-minutes: 30 16 | steps: 17 | - name: Release version 18 | run: echo "RELEASE_VERSION=${GITHUB_REF#refs/*/}" >> $GITHUB_ENV 19 | - uses: actions/checkout@v4 20 | with: 21 | ref: '${{ github.ref }}' 22 | submodules: recursive 23 | - name: Build AtomS3 image 24 | run: | 25 | . $IDF_PATH/export.sh 26 | rm -rf build sdkconfig 27 | SDKCONFIG_DEFAULTS="sdkconfig.defaults;sdkconfig.defaults.atoms3" idf.py set-target esp32-s3 28 | idf.py build 29 | python script/pack_firmware.py oai_res_example.atoms3.bin 30 | working-directory: . 31 | - name: Build CoreS3 image 32 | run: | 33 | . $IDF_PATH/export.sh 34 | rm -rf build sdkconfig 35 | SDKCONFIG_DEFAULTS="sdkconfig.defaults;sdkconfig.defaults.cores3" idf.py set-target esp32-s3 36 | idf.py build 37 | python script/pack_firmware.py oai_res_example.cores3.bin 38 | working-directory: . 39 | - name: Build Atom Lite image 40 | run: | 41 | . $IDF_PATH/export.sh 42 | rm -rf build sdkconfig 43 | SDKCONFIG_DEFAULTS="sdkconfig.defaults;sdkconfig.defaults.atom_lite" idf.py set-target esp32 44 | idf.py build 45 | python script/pack_firmware.py oai_res_example.atom_lite.bin 46 | working-directory: . 47 | - name: Collect images 48 | run: | 49 | zip oai_res_example.$RELEASE_VERSION.zip oai_res_example.*.bin 50 | working-directory: . 51 | - name: Create Release 52 | id: create_release 53 | uses: actions/create-release@v1 54 | env: 55 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 56 | with: 57 | tag_name: ${{ github.ref }} 58 | release_name: Release ${{ env.RELEASE_VERSION }} 59 | draft: false 60 | prerelease: false 61 | - name: Upload Release Asset 62 | id: upload-release-asset 63 | uses: actions/upload-release-asset@v1 64 | env: 65 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 66 | with: 67 | upload_url: ${{ steps.create_release.outputs.upload_url }} 68 | asset_path: oai_res_example.${{ env.RELEASE_VERSION }}.zip 69 | asset_name: oai_res_example.${{ env.RELEASE_VERSION }}.zip 70 | asset_content_type: application/zip -------------------------------------------------------------------------------- /src/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | OpenAI Realtime Embedded SDK Example Configuration 4 | 50 | 51 | 52 |

OpenAI Realtime Embedded SDK Example Configuration

53 |
54 |

Configuration

55 |
56 | 57 | 58 | 59 |
60 |
61 | 62 | 63 | 64 |
65 |
66 | 67 |
68 |
69 | 70 | -------------------------------------------------------------------------------- /src/http.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | #include "main.h" 8 | 9 | #ifndef MIN 10 | #define MIN(a, b) (((a) < (b)) ? (a) : (b)) 11 | #endif 12 | 13 | esp_err_t oai_http_event_handler(esp_http_client_event_t *evt) { 14 | static int output_len; 15 | switch (evt->event_id) { 16 | case HTTP_EVENT_REDIRECT: 17 | ESP_LOGD(LOG_TAG, "HTTP_EVENT_REDIRECT"); 18 | esp_http_client_set_header(evt->client, "From", "user@example.com"); 19 | esp_http_client_set_header(evt->client, "Accept", "text/html"); 20 | esp_http_client_set_redirection(evt->client); 21 | break; 22 | case HTTP_EVENT_ERROR: 23 | ESP_LOGD(LOG_TAG, "HTTP_EVENT_ERROR"); 24 | break; 25 | case HTTP_EVENT_ON_CONNECTED: 26 | ESP_LOGD(LOG_TAG, "HTTP_EVENT_ON_CONNECTED"); 27 | break; 28 | case HTTP_EVENT_HEADER_SENT: 29 | ESP_LOGD(LOG_TAG, "HTTP_EVENT_HEADER_SENT"); 30 | break; 31 | case HTTP_EVENT_ON_HEADER: 32 | ESP_LOGD(LOG_TAG, "HTTP_EVENT_ON_HEADER, key=%s, value=%s", 33 | evt->header_key, evt->header_value); 34 | break; 35 | case HTTP_EVENT_ON_DATA: { 36 | ESP_LOGD(LOG_TAG, "HTTP_EVENT_ON_DATA, len=%d", evt->data_len); 37 | if (esp_http_client_is_chunked_response(evt->client)) { 38 | ESP_LOGE(LOG_TAG, "Chunked HTTP response not supported"); 39 | #ifndef LINUX_BUILD 40 | esp_restart(); 41 | #endif 42 | } 43 | 44 | if (output_len == 0 && evt->user_data) { 45 | memset(evt->user_data, 0, MAX_HTTP_OUTPUT_BUFFER); 46 | } 47 | 48 | // If user_data buffer is configured, copy the response into the buffer 49 | int copy_len = 0; 50 | if (evt->user_data) { 51 | // The last byte in evt->user_data is kept for the NULL character in 52 | // case of out-of-bound access. 53 | copy_len = MIN(evt->data_len, (MAX_HTTP_OUTPUT_BUFFER - output_len)); 54 | if (copy_len) { 55 | memcpy(((char *)evt->user_data) + output_len, evt->data, copy_len); 56 | } 57 | } 58 | output_len += copy_len; 59 | 60 | break; 61 | } 62 | case HTTP_EVENT_ON_FINISH: 63 | ESP_LOGD(LOG_TAG, "HTTP_EVENT_ON_FINISH"); 64 | output_len = 0; 65 | break; 66 | case HTTP_EVENT_DISCONNECTED: 67 | ESP_LOGI(LOG_TAG, "HTTP_EVENT_DISCONNECTED"); 68 | output_len = 0; 69 | break; 70 | } 71 | return ESP_OK; 72 | } 73 | 74 | void oai_http_request(char *offer, char *answer) { 75 | esp_http_client_config_t config; 76 | memset(&config, 0, sizeof(esp_http_client_config_t)); 77 | 78 | extern esp_err_t oai_get_api_uri(std::string& api_uri); 79 | std::string api_uri; 80 | if( auto err = oai_get_api_uri(api_uri); err != ESP_OK ) { 81 | api_uri = CONFIG_OPENAI_REALTIMEAPI; 82 | } 83 | config.url = api_uri.c_str(); 84 | ESP_LOGI(LOG_TAG, "Using API URI: %s", config.url); 85 | 86 | config.event_handler = oai_http_event_handler; 87 | config.user_data = answer; 88 | 89 | #ifdef CONFIG_USE_WIFI_PROVISIONING_SOFTAP 90 | extern esp_err_t oai_get_api_key(std::vector& api_key); 91 | std::vector api_key; 92 | if( auto err = oai_get_api_key(api_key); err != ESP_OK ) { 93 | ESP_LOGE(LOG_TAG, "API key not set"); 94 | } else { 95 | ESP_LOGI(LOG_TAG, "Using API key: %s", api_key.data()); 96 | snprintf(answer, MAX_HTTP_OUTPUT_BUFFER, "Bearer %s", api_key.data()); 97 | } 98 | #else // CONFIG_USE_WIFI_PROVISIONING_SOFTAP 99 | snprintf(answer, MAX_HTTP_OUTPUT_BUFFER, "Bearer %s", CONFIG_OPENAI_API_KEY); 100 | #endif 101 | 102 | if( esp_http_client_handle_t client = esp_http_client_init(&config); client == nullptr ) { 103 | ESP_LOGE(LOG_TAG, "Failed to initialize HTTP client"); 104 | } else { 105 | esp_http_client_set_method(client, HTTP_METHOD_POST); 106 | esp_http_client_set_header(client, "Content-Type", "application/sdp"); 107 | esp_http_client_set_header(client, "Authorization", answer); 108 | esp_http_client_set_post_field(client, offer, strlen(offer)); 109 | 110 | esp_err_t err = esp_http_client_perform(client); 111 | if (err != ESP_OK || esp_http_client_get_status_code(client) != 201) { 112 | ESP_LOGE(LOG_TAG, "Error perform http request %s", esp_err_to_name(err)); 113 | #if !defined(LINUX_BUILD) && defined(CONFIG_DISABLE_CONFIGURATOR_AFTER_PROVISIONED) 114 | esp_restart(); 115 | #endif 116 | } 117 | 118 | esp_http_client_cleanup(client); 119 | } 120 | } 121 | -------------------------------------------------------------------------------- /src/webrtc.cpp: -------------------------------------------------------------------------------- 1 | #ifndef LINUX_BUILD 2 | #include 3 | #include 4 | #endif 5 | 6 | #include 7 | #include 8 | #include 9 | 10 | #include "main.h" 11 | 12 | #define TICK_INTERVAL 15 13 | #define GREETING \ 14 | "{\"type\": \"response.create\", \"response\": {\"modalities\": " \ 15 | "[\"audio\", \"text\"], \"instructions\": \"Say 'How can I help?.'\"}}" 16 | 17 | PeerConnection *peer_connection = NULL; 18 | 19 | #ifndef LINUX_BUILD 20 | StaticTask_t task_buffer; 21 | void oai_send_audio_task(void *user_data) { 22 | oai_init_audio_encoder(); 23 | 24 | while (1) { 25 | oai_send_audio(peer_connection); 26 | vTaskDelay(pdMS_TO_TICKS(TICK_INTERVAL)); 27 | } 28 | } 29 | #endif 30 | 31 | static void oai_ondatachannel_onmessage_task(char *msg, size_t len, 32 | void *userdata, uint16_t sid) { 33 | #ifdef LOG_DATACHANNEL_MESSAGES 34 | ESP_LOGI(LOG_TAG, "DataChannel Message: %s", msg); 35 | #endif 36 | } 37 | 38 | static void oai_ondatachannel_onopen_task(void *userdata) { 39 | if (peer_connection_create_datachannel(peer_connection, DATA_CHANNEL_RELIABLE, 40 | 0, 0, (char *)"oai-events", 41 | (char *)"") != -1) { 42 | ESP_LOGI(LOG_TAG, "DataChannel created"); 43 | peer_connection_datachannel_send(peer_connection, (char *)GREETING, 44 | strlen(GREETING)); 45 | } else { 46 | ESP_LOGE(LOG_TAG, "Failed to create DataChannel"); 47 | } 48 | } 49 | 50 | static void oai_onconnectionstatechange_task(PeerConnectionState state, 51 | void *user_data) { 52 | ESP_LOGI(LOG_TAG, "PeerConnectionState: %s", 53 | peer_connection_state_to_string(state)); 54 | 55 | if (state == PEER_CONNECTION_DISCONNECTED || 56 | state == PEER_CONNECTION_CLOSED) { 57 | #if !defined(LINUX_BUILD) && defined(CONFIG_DISABLE_CONFIGURATOR_AFTER_PROVISIONED) 58 | esp_restart(); 59 | #endif 60 | } else if (state == PEER_CONNECTION_CONNECTED) { 61 | #ifndef LINUX_BUILD 62 | constexpr size_t stack_size = 20000; 63 | // Allocate the stack memory from the PSRAM if available. Otherwise, allocate from the internal memory. 64 | StackType_t *stack_memory = (StackType_t *)heap_caps_malloc_prefer( 65 | stack_size * sizeof(StackType_t), 2, 66 | MALLOC_CAP_SPIRAM | MALLOC_CAP_8BIT, 67 | MALLOC_CAP_INTERNAL | MALLOC_CAP_8BIT); 68 | if (stack_memory == nullptr) { 69 | ESP_LOGE(LOG_TAG, "Failed to allocate stack memory for audio publisher."); 70 | esp_restart(); 71 | } 72 | xTaskCreateStaticPinnedToCore(oai_send_audio_task, "audio_publisher", stack_size, 73 | NULL, 7, stack_memory, &task_buffer, 0); 74 | #endif 75 | } 76 | } 77 | 78 | static void oai_on_icecandidate_task(char *description, void *user_data) { 79 | char local_buffer[MAX_HTTP_OUTPUT_BUFFER + 1] = {0}; 80 | oai_http_request(description, local_buffer); 81 | peer_connection_set_remote_description(peer_connection, local_buffer); 82 | } 83 | 84 | void oai_webrtc() { 85 | PeerConfiguration peer_connection_config = { 86 | .ice_servers = {}, 87 | .audio_codec = CODEC_OPUS, 88 | .video_codec = CODEC_NONE, 89 | .datachannel = DATA_CHANNEL_STRING, 90 | .onaudiotrack = [](uint8_t *data, size_t size, void *userdata) -> void { 91 | #ifndef LINUX_BUILD 92 | oai_audio_decode(data, size); 93 | #endif 94 | }, 95 | .onvideotrack = NULL, 96 | .on_request_keyframe = NULL, 97 | .user_data = NULL, 98 | }; 99 | 100 | peer_connection = peer_connection_create(&peer_connection_config); 101 | if (peer_connection == NULL) { 102 | ESP_LOGE(LOG_TAG, "Failed to create peer connection"); 103 | #ifndef LINUX_BUILD 104 | esp_restart(); 105 | #endif 106 | } 107 | 108 | peer_connection_oniceconnectionstatechange(peer_connection, 109 | oai_onconnectionstatechange_task); 110 | peer_connection_onicecandidate(peer_connection, oai_on_icecandidate_task); 111 | peer_connection_ondatachannel(peer_connection, 112 | oai_ondatachannel_onmessage_task, 113 | oai_ondatachannel_onopen_task, NULL); 114 | 115 | peer_connection_create_offer(peer_connection); 116 | 117 | while (1) { 118 | peer_connection_loop(peer_connection); 119 | vTaskDelay(pdMS_TO_TICKS(TICK_INTERVAL)); 120 | } 121 | } 122 | -------------------------------------------------------------------------------- /src/Kconfig.projbuild: -------------------------------------------------------------------------------- 1 | menu "Embedded SDK Configuration" 2 | config MEDIA_I2S_RX_TX_SHARED 3 | bool "I2S RX/TX Shared" 4 | default n 5 | help 6 | If this option is set (not default), 7 | the I2S RX and TX will share the same I2S channel. 8 | config MEDIA_I2S_RX_PDM 9 | bool "I2S RX PDM" 10 | depends on !MEDIA_I2S_RX_TX_SHARED 11 | default n 12 | help 13 | If this option is set (not default), 14 | the I2S RX will use PDM. 15 | config MEDIA_I2S_RX_MCLK_PIN 16 | int "I2S RX MCLK Pin" 17 | default 0 18 | help 19 | The pin number for I2S RX MCLK. 20 | config MEDIA_I2S_RX_BCLK_PIN 21 | int "I2S RX BCLK Pin" 22 | default 0 23 | help 24 | The pin number for I2S RX BCLK. 25 | config MEDIA_I2S_RX_LRCLK_PIN 26 | int "I2S RX LRCLK Pin" 27 | default 0 28 | help 29 | The pin number for I2S RX LRCLK. 30 | config MEDIA_I2S_RX_DATA_PIN 31 | int "I2S RX DATA Pin" 32 | default 0 33 | help 34 | The pin number for I2S RX DATA. 35 | config MEDIA_I2S_TX_MCLK_PIN 36 | int "I2S TX MCLK Pin" 37 | default 0 38 | help 39 | The pin number for I2S TX MCLK. 40 | config MEDIA_I2S_TX_BCLK_PIN 41 | int "I2S TX BCLK Pin" 42 | default 0 43 | help 44 | The pin number for I2S TX BCLK. 45 | config MEDIA_I2S_TX_LRCLK_PIN 46 | int "I2S TX LRCLK Pin" 47 | default 0 48 | help 49 | The pin number for I2S TX LRCLK. 50 | config MEDIA_I2S_TX_DATA_PIN 51 | int "I2S TX DATA Pin" 52 | default 0 53 | help 54 | The pin number for I2S TX DATA. 55 | config MEDIA_I2S_TX_SLOT_LEFT_ONLY 56 | bool "I2S TX Slot Left Only" 57 | default n 58 | help 59 | If this option is set (not default), 60 | the I2S TX will only use the left slot. 61 | Otherwise, it will use both left and right slots. 62 | config MEDIA_INIT_MICROPHONE_AND_SPEAKER 63 | bool "Init Microphone and Speaker" 64 | default n 65 | help 66 | If this option is set (not default), 67 | the microphone and speaker will be initialized. 68 | config MEDIA_ENABLE_DEBUG_AUDIO_UDP_CLIENT 69 | bool "Enable Debug Audio UDP Client" 70 | default n 71 | help 72 | if this option is set (not default), 73 | the input/output audio will be sent to the specified host. 74 | config MEDIA_DEBUG_AUDIO_HOST 75 | string "Debug Audio Host" 76 | default "192.168.100.1" 77 | depends on MEDIA_ENABLE_DEBUG_AUDIO_UDP_CLIENT 78 | help 79 | The host to send the audio to for debugging. 80 | config MEDIA_DEBUG_AUDIO_IN_PORT 81 | int "UDP port to send microphone input audio data to" 82 | default 10000 83 | depends on MEDIA_ENABLE_DEBUG_AUDIO_UDP_CLIENT 84 | help 85 | The port to send the audio from microphone to for debugging. 86 | config MEDIA_DEBUG_AUDIO_OUT_PORT 87 | int "UDP port to send speaker output audio data to" 88 | default 10001 89 | depends on MEDIA_ENABLE_DEBUG_AUDIO_UDP_CLIENT 90 | help 91 | The port to send the audio from speaker to for debugging. 92 | config USE_WIFI_PROVISIONING_SOFTAP 93 | bool "Use SoftAP for WiFi provisioning" 94 | default n 95 | help 96 | Use SoftAP for WiFi provisioning 97 | config WIFI_SSID 98 | string "WiFi SSID" 99 | depends on !USE_WIFI_PROVISIONING_SOFTAP 100 | help 101 | The SSID of the SoftAP for WiFi 102 | config WIFI_PASSWORD 103 | string "WiFi Password" 104 | depends on !USE_WIFI_PROVISIONING_SOFTAP 105 | help 106 | The password of the SoftAP for WiFi 107 | config OPENAI_API_KEY 108 | string "OpenAI API Key" 109 | depends on !USE_WIFI_PROVISIONING_SOFTAP 110 | help 111 | The OpenAI API key 112 | config OPENAI_REALTIMEAPI 113 | string "OpenAI Realtime API" 114 | default "https://api.openai.com/v1/realtime?model=gpt-4o-mini-realtime-preview-2024-12-17" 115 | help 116 | The OpenAI Realtime API URI 117 | config DISABLE_CONFIGURATOR_AFTER_PROVISIONED 118 | bool "Disable configurator after provisioned" 119 | default n 120 | help 121 | Disables the configurator HTTP server after OpenAI API key is provisioned. 122 | choice BSP_RESET_PROVISIONING 123 | prompt "Reset Provisioning Mode" 124 | depends on USE_WIFI_PROVISIONING_SOFTAP 125 | default BSP_RESET_PROVISIONING_NONE 126 | help 127 | Reset method of the WiFi and API key provisioning. 128 | config BSP_RESET_PROVISIONING_NONE 129 | bool None 130 | config BSP_RESET_PROVISIONING_GPIO 131 | bool GPIO 132 | endchoice 133 | config BSP_RESET_PROVISIONING_GPIO_NUM 134 | int "Reset provisioning GPIO number" 135 | depends on BSP_RESET_PROVISIONING_GPIO 136 | default 0 137 | help 138 | GPIO number to detect provisioned WiFi and API key must be reset or not. 139 | choice BSP_RESET_PROVISIONING_GPIO_LEVEL 140 | prompt "Reset provisioning GPIO level" 141 | depends on BSP_RESET_PROVISIONING_GPIO 142 | default BSP_RESET_PROVISIONING_GPIO_LEVEL_LOW 143 | config BSP_RESET_PROVISIONING_GPIO_LEVEL_HIGH 144 | bool "Active High" 145 | config BSP_RESET_PROVISIONING_GPIO_LEVEL_LOW 146 | bool "Active Low" 147 | endchoice 148 | config ENABLE_HEAP_MONITOR 149 | bool "Enable Heap Monitor" 150 | default n 151 | help 152 | If this option is set (not default), 153 | the heap monitor will be enabled. 154 | config HEAP_MONITOR_INTERVAL_MS 155 | int "Heap Monitor Interval (ms)" 156 | default 1000 157 | depends on ENABLE_HEAP_MONITOR 158 | help 159 | The interval in milliseconds to print the heap monitor. 160 | config ENABLE_LOG_DATACHANNEL_MESSAGES 161 | bool "Enable Log DataChannel Messages" 162 | default n 163 | help 164 | If this option is set (not default), 165 | the data channel messages will be logged. 166 | endmenu 167 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Open RealtimeAPI Embedded SDK 2 | 3 | # Table of Contents 4 | 5 | - [Docs](#docs) 6 | - [Installation](#installation) 7 | - [Usage](#usage) 8 | 9 | ## Platform/Device Support 10 | 11 | This SDK has been developed tested on a `esp32s3` and `linux`. You don't need any physical hardware 12 | to run this SDK. You can use it from Linux directly. 13 | 14 | To use it on hardware purchase either of these microcontrollers. Others may work, but this is what 15 | has been developed against. 16 | 17 | * [Freenove ESP32-S3-WROOM](https://www.amazon.com/gp/product/B0BMQ8F7FN) 18 | * [Sonatino - ESP32-S3 Audio Development Board](https://www.amazon.com/gp/product/B0BVY8RJNP) 19 | 20 | You can get a ESP32S3 for much less money on eBay/AliExpress. 21 | 22 | ## Installation 23 | 24 | Select the target board. 25 | 26 | * CoreS3 (SE) - `export SDKCONFIG_DEFAULTS="sdkconfig.defaults;sdkconfig.defaults.cores3"` 27 | * AtomS3 (with Atomic Speaker and PDM Microphone Unit) - `export SDKCONFIG_DEFAULTS="sdkconfig.defaults;sdkconfig.defaults.atoms3"` 28 | * Atom Lite (with Atomic Speaker and PDM Microphone Unit) - `export SDKCONFIG_DEFAULTS="sdkconfig.defaults;sdkconfig.defaults.atom_lite"` 29 | 30 | Call `set-target` with the platform you are targetting. Today only `linux`, `esp32` and `esp32s3` are supported. 31 | 32 | * ESP32 targets : Atom Lite 33 | * `idf.py set-target esp32` 34 | 35 | * ESP32-S3 targets: CoreS3 (SE), AtomS3 36 | * `idf.py set-target esp32s3` 37 | 38 | Note that to change the target, you have to remove the build directory. 39 | 40 | Configure device specific settings. None needed at this time 41 | * `idf.py menuconfig` 42 | 43 | If you want to use WiFi provisioning via SoftAP and OpenAI API configuration via Web UI interface, enable `CONFIG_USE_WIFI_PROVISIONING_SOFTAP` 44 | 45 | If you did not enable `CONFIG_USE_WIFI_PROVISIONING_SOFTAP`, set your Wifi SSID + Password and OpenAI API key in the configuration. 46 | You must set `CONFIG_WIFI_SSID` `CONFIG_WIFI_PASSWORD` and `CONFIG_OPENAI_API_KEY` correctly. 47 | 48 | Build 49 | * `idf.py build` 50 | 51 | If you built for `esp32` or `esp32s3` run the following to flash to the device 52 | * `idf.py flash` 53 | 54 | If you built for `linux` you can run the binary directly 55 | * `./build/src.elf` 56 | 57 | See [build.yaml](.github/workflows/build.yaml) for a Docker command to do this all in one step. 58 | 59 | ## Debugging 60 | 61 | You can enable the debug audio stream output from menuconfig. 62 | The settings are in `Embedded SDK Configuration` menu. 63 | 64 | To enable the debug audio UDP stream output, enable `Enable Debug Audio UDP Client` and configure the host IP address to send the audio data for debugging. 65 | 66 | ``` 67 | [*] Enable Debug Audio UDP Client 68 | (192.168.100.1) Debug Audio Host (NEW) 69 | (10000) UDP port to send microphone input audio data to (NEW) 70 | (10001) UDP port to send speaker output audio data to (NEW) 71 | ``` 72 | 73 | At the host, you can receive the raw PCM audio data by UDP server software like netcat. 74 | 75 | To receive the microphone input data 76 | 77 | ``` 78 | nc -ul 10000 > audio_input.pcm 79 | ``` 80 | 81 | To receive the speaker output data 82 | 83 | ``` 84 | nc -ul 10001 > audio_output.pcm 85 | ``` 86 | 87 | You can convert the received audio data by using `ffmpeg` like this. 88 | 89 | ``` 90 | ffmpeg -y -f s16le -ar 8k -ac 1 -i audio_input.pcm audio_input.wav 91 | ffmpeg -y -f s16le -ar 8k -ac 1 -i audio_output.pcm audio_output.wav 92 | ``` 93 | 94 | ## Pre-built binaries 95 | 96 | Pre-built binaries for some boards are also provided via GitHub release page or M5Burner. 97 | You can download the pre-built binaries from the Releases page of this repository. 98 | 99 | https://github.com/ciniml/openai-realtime-embedded-sdk/releases/ 100 | 101 | ## Wi-Fi provisioning via Espressif SoftAP provisioning tool 102 | 103 | With `CONFIG_USE_WIFI_PROVISIONING_SOFTAP` enabled, Wi-Fi credentials are provisioned by Espressif SoftAP provisioning scheme instead of embedding the credentials into the firmware. 104 | 105 | You can use the Espressif official provisioning tool listed here to configure the WiFi connection. 106 | 107 | https://docs.espressif.com/projects/esp-idf/en/stable/esp32/api-reference/provisioning/wifi_provisioning.html#provisioning-tools 108 | 109 | The device will show the QR code for provisioning app on the serial console after boot. (The user name and password (pop) are hardcoded in the firmware and using the same value for all pre-built firmwares, so you can use the same QR code here.) 110 | 111 | ``` 112 | I (1149) QRCODE: {"ver":"v1","name":"OAI_RES_WIFI","username":"wifiprov","pop":"abcd1234","transport":"softap"} 113 | 114 | █▀▀▀▀▀█ ▀▄▄▀▀▀▄ █▄▄▀▀ ▀█ █ █▀▀▀▀▀█ 115 | █ ███ █ ▀ ██▀ █▄█▀ █▀▄██▄██ █ ███ █ 116 | █ ▀▀▀ █ ▀▄▀▄██ ▄▀▄▀▀██▄▄▀█▀█ █ ▀▀▀ █ 117 | ▀▀▀▀▀▀▀ █ █▄▀▄▀▄▀ ▀▄▀ ▀▄▀▄█▄█ ▀▀▀▀▀▀▀ 118 | ▀█ ██▄▀ ▄█▀▀▄ █▄▄▀▀█ ▄▄▄▀ ▄▀▀▄▀ ▄ ▄ ▀ 119 | █▀█ ▀▀ ▄▄██▄▀ █▄█▄▄ ▀█▄▀▀▀██ ▀██▀ 120 | ▄ ▄▄ █▀ ▀ ▀▀▀▄ █▀ █▀▀ ▄▄▀▄ █▀▀█▄ ▄█▀ 121 | █ ▀█▀▄█▀▀█▀▀█▀▀▀██▄ ▄▄ ▀▀█ ▀▄▄▀▄██ 122 | █ ▄▄▄▄▀ ██▄▄▀ █▀ █▄█ ▄▀ ▀▄▀▀▄█▄ ▀█▄▀ 123 | ██▀ ▀▀▄▄██▀▄█▀█▀▀▄█▄ ▀██▄▀█▄▄ ▀█ ▀ 124 | ▀ ▀██ █ ▀ ▀█ ▀██ ▄█▄█ █ ▀███▄▄█▀ 125 | ▀▀▀▀▄ ▀ ▄█▄ ▀ ▀█▀▄█▄▀▀█▀█ ▄██▀█▀▀▀█▄▀ 126 | ██ █▀▄▀▄▄█▄ ▄▄█▄█ ▀█ ▀▄▄▄ ▀▄ █▀▄ ▀▀ 127 | █▀█▀ █▀▄▀ ▄ ▄▀█ ▄█▄ █▄█ ▄▀ ▄▄█▀ █▄▀ 128 | ▀ ▀ ▀▀▀▄▄▀█▀█▄█▀▄ █ ▀▀██ ▀█▀▀▀█▀█▀ 129 | █▀▀▀▀▀█ ▀▄▀█ █ █▀█▀▀▀ ▄▄██ ▀ █▀ ▀ 130 | █ ███ █ ██▄▀▄█▄█ ▀▄█▄▄▄▄▀ █▀▀██▀▀█ █▄ 131 | █ ▀▀▀ █ ▄███▄█ █ █▀ ▀ ▀█ ██▄ ▀▀█▀▄▄█ 132 | ▀▀▀▀▀▀▀ ▀ ▀▀▀▀ ▀ ▀ ▀ ▀ ▀▀▀ ▀ 133 | ``` 134 | 135 | 136 | If you cannot see the serial console, you can use [this link](https://espressif.github.io/esp-jumpstart/qrcode.html?data={%22ver%22:%22v1%22,%22name%22:%22OAI_RES_WIFI%22,%22username%22:%22wifiprov%22,%22pop%22:%22abcd1234%22,%22transport%22:%22softap%22}) to show the QR code on the browser. 137 | 138 | ## OpenAI API key configuration 139 | 140 | With `CONFIG_USE_WIFI_PROVISIONING_SOFTAP`, OpenAI API key is configured via web browser. 141 | After Wi-Fi connection is established, you can access to the configuration page via http://oai-res-example.local/ 142 | 143 | ![API key configuration](api_key_configuration.png) 144 | 145 | ## Reset the Wi-Fi and OpenAI API configuration 146 | 147 | For some devices, you can trigger to reset the WiFi and OpenAI API configuration by pressing a button while booting. 148 | 149 | * AtomS3: Press the screen button while booting. 150 | * Atom Lite: Press the top button while booting. 151 | * CoreS3 (SE): No configuration reset button. To reset the configuration, reprogram the firmware. -------------------------------------------------------------------------------- /src/media.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include "main.h" 5 | 6 | #include 7 | #include 8 | 9 | #include 10 | #include 11 | #include 12 | 13 | #define OPUS_OUT_BUFFER_SIZE 1276 // 1276 bytes is recommended by opus_encode 14 | #define SAMPLE_RATE 8000 15 | #define BUFFER_SAMPLES 320 16 | 17 | static i2s_chan_handle_t s_i2s_tx_handle = nullptr; 18 | static i2s_chan_handle_t s_i2s_rx_handle = nullptr; 19 | static constexpr i2s_chan_handle_t get_i2s_tx_handle() { return s_i2s_tx_handle; } 20 | static constexpr i2s_chan_handle_t get_i2s_rx_handle() { return s_i2s_rx_handle; } 21 | 22 | #define RX_MCLK_PIN CONFIG_MEDIA_I2S_RX_MCLK_PIN 23 | #define RX_BCLK_PIN CONFIG_MEDIA_I2S_RX_BCLK_PIN 24 | #define RX_LRCLK_PIN CONFIG_MEDIA_I2S_RX_LRCLK_PIN 25 | #define RX_DATA_PIN CONFIG_MEDIA_I2S_RX_DATA_PIN 26 | 27 | #define TX_MCLK_PIN CONFIG_MEDIA_I2S_TX_MCLK_PIN 28 | #define TX_BCLK_PIN CONFIG_MEDIA_I2S_TX_BCLK_PIN 29 | #define TX_LRCLK_PIN CONFIG_MEDIA_I2S_TX_LRCLK_PIN 30 | #define TX_DATA_PIN CONFIG_MEDIA_I2S_TX_DATA_PIN 31 | 32 | #define OPUS_ENCODER_BITRATE 30000 33 | #define OPUS_ENCODER_COMPLEXITY 0 34 | 35 | constexpr const char *TAG = "media"; 36 | 37 | // UDP socket for audio data debugging 38 | #ifdef CONFIG_MEDIA_ENABLE_DEBUG_AUDIO_UDP_CLIENT 39 | static struct sockaddr_in s_debug_audio_in_dest_addr; 40 | static struct sockaddr_in s_debug_audio_out_dest_addr; 41 | static ssize_t s_debug_audio_sock; 42 | #endif // CONFIG_MEDIA_ENABLE_DEBUG_AUDIO_UDP_CLIENT 43 | 44 | // Initialization of AW88298 and ES7210 from M5Unified implementation. 45 | constexpr std::uint8_t aw88298_i2c_addr = 0x36; 46 | constexpr std::uint8_t es7210_i2c_addr = 0x40; 47 | constexpr std::uint8_t aw9523_i2c_addr = 0x58; 48 | static void aw88298_write_reg(std::uint8_t reg, std::uint16_t value) 49 | { 50 | value = __builtin_bswap16(value); 51 | M5.In_I2C.writeRegister(aw88298_i2c_addr, reg, (const std::uint8_t*)&value, 2, 400000); 52 | } 53 | 54 | static void es7210_write_reg(std::uint8_t reg, std::uint8_t value) 55 | { 56 | M5.In_I2C.writeRegister(es7210_i2c_addr, reg, &value, 1, 400000); 57 | } 58 | 59 | static void initialize_speaker_cores3() 60 | { 61 | M5.In_I2C.bitOn(aw9523_i2c_addr, 0x02, 0b00000100, 400000); 62 | 63 | aw88298_write_reg( 0x61, 0x0673 ); // boost mode disabled 64 | aw88298_write_reg( 0x04, 0x4040 ); // I2SEN=1 AMPPD=0 PWDN=0 65 | aw88298_write_reg( 0x05, 0x0008 ); // RMSE=0 HAGCE=0 HDCCE=0 HMUTE=0 66 | aw88298_write_reg( 0x06, 0x14C0 ); // INPLEV=0 (not attenuated), I2SRXEN=1 (enable), CHSEL=01 (left), I2SMD=00 (Philips Standard I2S), I2SFS=00 (16bit), I2SBCK=00 (32*fs), I2SSR=0000 (8kHz) 67 | aw88298_write_reg( 0x0C, 0x0064 ); // volume setting (full volume) 68 | } 69 | 70 | static void initialize_microphone_cores3() 71 | { 72 | es7210_write_reg(0x00, 0xFF); // RESET_CTL 73 | struct __attribute__((packed)) reg_data_t 74 | { 75 | uint8_t reg; 76 | uint8_t value; 77 | }; 78 | 79 | static constexpr reg_data_t data[] = 80 | { 81 | { 0x00, 0x41 }, // RESET_CTL 82 | { 0x01, 0x1f }, // CLK_ON_OFF 83 | { 0x06, 0x00 }, // DIGITAL_PDN 84 | { 0x07, 0x20 }, // ADC_OSR 85 | { 0x08, 0x10 }, // MODE_CFG 86 | { 0x09, 0x30 }, // TCT0_CHPINI 87 | { 0x0A, 0x30 }, // TCT1_CHPINI 88 | { 0x20, 0x0a }, // ADC34_HPF2 89 | { 0x21, 0x2a }, // ADC34_HPF1 90 | { 0x22, 0x0a }, // ADC12_HPF2 91 | { 0x23, 0x2a }, // ADC12_HPF1 92 | { 0x02, 0xC1 }, 93 | { 0x04, 0x01 }, 94 | { 0x05, 0x00 }, 95 | { 0x11, 0x60 }, 96 | { 0x40, 0x42 }, // ANALOG_SYS 97 | { 0x41, 0x70 }, // MICBIAS12 98 | { 0x42, 0x70 }, // MICBIAS34 99 | { 0x43, 0x1B }, // MIC1_GAIN 100 | { 0x44, 0x1B }, // MIC2_GAIN 101 | { 0x45, 0x00 }, // MIC3_GAIN 102 | { 0x46, 0x00 }, // MIC4_GAIN 103 | { 0x47, 0x00 }, // MIC1_LP 104 | { 0x48, 0x00 }, // MIC2_LP 105 | { 0x49, 0x00 }, // MIC3_LP 106 | { 0x4A, 0x00 }, // MIC4_LP 107 | { 0x4B, 0x00 }, // MIC12_PDN 108 | { 0x4C, 0xFF }, // MIC34_PDN 109 | { 0x01, 0x14 }, // CLK_ON_OFF 110 | }; 111 | for (auto& d: data) 112 | { 113 | es7210_write_reg(d.reg, d.value); 114 | } 115 | } 116 | 117 | void oai_init_audio_capture() { 118 | #ifdef CONFIG_MEDIA_INIT_MICROPHONE_AND_SPEAKER 119 | ESP_LOGI(TAG, "Initializing microphone"); 120 | initialize_microphone_cores3(); 121 | ESP_LOGI(TAG, "Initializing speaker"); 122 | initialize_speaker_cores3(); 123 | #endif 124 | 125 | #ifdef CONFIG_MEDIA_ENABLE_DEBUG_AUDIO_UDP_CLIENT 126 | // Initialize UDP socket for debug. 127 | s_debug_audio_sock = socket(AF_INET, SOCK_DGRAM, 0); 128 | if (s_debug_audio_sock < 0) { 129 | ESP_LOGE(TAG, "Failed to create socket"); 130 | return; 131 | } 132 | 133 | s_debug_audio_in_dest_addr.sin_addr.s_addr = inet_addr(CONFIG_MEDIA_DEBUG_AUDIO_HOST); 134 | s_debug_audio_in_dest_addr.sin_family = AF_INET; 135 | s_debug_audio_in_dest_addr.sin_port = htons(CONFIG_MEDIA_DEBUG_AUDIO_IN_PORT); 136 | s_debug_audio_out_dest_addr.sin_addr.s_addr = inet_addr(CONFIG_MEDIA_DEBUG_AUDIO_HOST); 137 | s_debug_audio_out_dest_addr.sin_family = AF_INET; 138 | s_debug_audio_out_dest_addr.sin_port = htons(CONFIG_MEDIA_DEBUG_AUDIO_OUT_PORT); 139 | #endif // CONFIG_MEDIA_ENABLE_DEBUG_AUDIO_UDP_CLIENT 140 | 141 | ESP_LOGI(TAG, "Initializing I2S for audio input/output"); 142 | { 143 | i2s_chan_config_t chan_config = I2S_CHANNEL_DEFAULT_CONFIG(I2S_NUM_1, I2S_ROLE_MASTER); 144 | chan_config.auto_clear = true; 145 | #ifdef CONFIG_MEDIA_I2S_RX_TX_SHARED 146 | ESP_ERROR_CHECK(i2s_new_channel(&chan_config, &s_i2s_tx_handle, &s_i2s_rx_handle)); 147 | #else // CONFIG_MEDIA_I2S_RX_TX_SHARED 148 | ESP_ERROR_CHECK(i2s_new_channel(&chan_config, &s_i2s_tx_handle, nullptr)); 149 | #endif // CONFIG_MEDIA_I2S_RX_TX_SHARED 150 | i2s_std_config_t std_cfg = { 151 | .clk_cfg = I2S_STD_CLK_DEFAULT_CONFIG(SAMPLE_RATE), 152 | .slot_cfg = I2S_STD_PHILIPS_SLOT_DEFAULT_CONFIG(I2S_DATA_BIT_WIDTH_16BIT, I2S_SLOT_MODE_MONO ), 153 | .gpio_cfg = { 154 | .mclk = gpio_num_t(CONFIG_MEDIA_I2S_TX_MCLK_PIN), 155 | .bclk = gpio_num_t(CONFIG_MEDIA_I2S_TX_BCLK_PIN), 156 | .ws = gpio_num_t(CONFIG_MEDIA_I2S_TX_LRCLK_PIN), 157 | .dout = gpio_num_t(CONFIG_MEDIA_I2S_TX_DATA_PIN), 158 | .din = gpio_num_t(CONFIG_MEDIA_I2S_RX_DATA_PIN), 159 | .invert_flags = { 160 | .mclk_inv = false, 161 | .bclk_inv = false, 162 | .ws_inv = false, 163 | }, 164 | }, 165 | }; 166 | std_cfg.slot_cfg.data_bit_width = i2s_data_bit_width_t::I2S_DATA_BIT_WIDTH_16BIT; 167 | std_cfg.slot_cfg.slot_bit_width = i2s_slot_bit_width_t::I2S_SLOT_BIT_WIDTH_16BIT; 168 | std_cfg.slot_cfg.slot_mode = i2s_slot_mode_t::I2S_SLOT_MODE_MONO; 169 | #ifdef CONFIG_MEDIA_I2S_TX_SLOT_LEFT_ONLY 170 | std_cfg.slot_cfg.slot_mask = i2s_std_slot_mask_t::I2S_STD_SLOT_LEFT; 171 | #else // CONFIG_MEDIA_I2S_TX_SLOT_LEFT_ONLY 172 | std_cfg.slot_cfg.slot_mask = i2s_std_slot_mask_t::I2S_STD_SLOT_BOTH; 173 | #endif // CONFIG_MEDIA_I2S_TX_SLOT_LEFT_ONLY 174 | std_cfg.slot_cfg.ws_width = 16; 175 | std_cfg.slot_cfg.ws_pol = false; 176 | std_cfg.slot_cfg.bit_shift = true; 177 | #if SOC_I2S_HW_VERSION_1 178 | std_cfg.slot_cfg.msb_right = false; 179 | #else 180 | std_cfg.slot_cfg.left_align = true; 181 | std_cfg.slot_cfg.big_endian = false; 182 | std_cfg.slot_cfg.bit_order_lsb = false; 183 | #endif // SOC_I2S_HW_VERSION_1 184 | i2s_channel_init_std_mode(s_i2s_tx_handle, &std_cfg); 185 | i2s_channel_enable(s_i2s_tx_handle); 186 | #ifdef CONFIG_MEDIA_I2S_RX_TX_SHARED 187 | i2s_channel_init_std_mode(s_i2s_rx_handle, &std_cfg); 188 | i2s_channel_enable(s_i2s_rx_handle); 189 | #endif // CONFIG_MEDIA_I2S_RX_TX_SHARED 190 | } 191 | 192 | #ifndef CONFIG_MEDIA_I2S_RX_TX_SHARED 193 | { 194 | i2s_chan_config_t chan_config = I2S_CHANNEL_DEFAULT_CONFIG(I2S_NUM_0, I2S_ROLE_MASTER); 195 | chan_config.auto_clear = true; 196 | ESP_ERROR_CHECK(i2s_new_channel(&chan_config, nullptr, &s_i2s_rx_handle)); 197 | #ifdef CONFIG_MEDIA_I2S_RX_PDM 198 | i2s_pdm_rx_config_t pdm_rx_cfg = { 199 | .clk_cfg = I2S_PDM_RX_CLK_DEFAULT_CONFIG(SAMPLE_RATE), 200 | .slot_cfg = I2S_PDM_RX_SLOT_DEFAULT_CONFIG(I2S_DATA_BIT_WIDTH_16BIT, I2S_SLOT_MODE_MONO), 201 | .gpio_cfg = { 202 | .clk = gpio_num_t(CONFIG_MEDIA_I2S_RX_LRCLK_PIN), 203 | .din = gpio_num_t(CONFIG_MEDIA_I2S_RX_DATA_PIN), 204 | .invert_flags = { 205 | .clk_inv = false, 206 | }, 207 | }, 208 | }; 209 | ESP_ERROR_CHECK(i2s_channel_init_pdm_rx_mode(s_i2s_rx_handle, &pdm_rx_cfg)); 210 | #else // CONFIG_MEDIA_I2S_RX_PDM 211 | i2s_std_config_t std_cfg = { 212 | .clk_cfg = I2S_STD_CLK_DEFAULT_CONFIG(SAMPLE_RATE), 213 | .slot_cfg = I2S_STD_PHILIPS_SLOT_DEFAULT_CONFIG(I2S_DATA_BIT_WIDTH_16BIT, I2S_SLOT_MODE_MONO ), 214 | .gpio_cfg = { 215 | .mclk = gpio_num_t(CONFIG_MEDIA_I2S_RX_MCLK_PIN), 216 | .bclk = gpio_num_t(CONFIG_MEDIA_I2S_RX_BCLK_PIN), 217 | .ws = gpio_num_t(CONFIG_MEDIA_I2S_RX_LRCLK_PIN), 218 | .dout = gpio_num_t(I2S_PIN_NO_CHANGE), 219 | .din = gpio_num_t(CONFIG_MEDIA_I2S_RX_DATA_PIN), 220 | .invert_flags = { 221 | .mclk_inv = false, 222 | .bclk_inv = false, 223 | .ws_inv = false, 224 | }, 225 | }, 226 | }; 227 | i2s_channel_init_std_mode(s_i2s_rx_handle, &std_cfg); 228 | #endif // CONFIG_MEDIA_I2S_RX_PDM 229 | i2s_channel_enable(s_i2s_rx_handle); 230 | } 231 | #endif // CONFIG_MEDIA_I2S_RX_TX_SHARED 232 | } 233 | 234 | opus_int16 *output_buffer = NULL; 235 | OpusDecoder *opus_decoder = NULL; 236 | 237 | void oai_init_audio_decoder() { 238 | int decoder_error = 0; 239 | opus_decoder = opus_decoder_create(SAMPLE_RATE, 1, &decoder_error); 240 | if (decoder_error != OPUS_OK) { 241 | ESP_LOGE(TAG, "Failed to create OPUS decoder"); 242 | return; 243 | } 244 | 245 | output_buffer = (opus_int16 *)malloc(BUFFER_SAMPLES * sizeof(opus_int16)); 246 | } 247 | 248 | void oai_audio_decode(uint8_t *data, size_t size) { 249 | int decoded_size = 250 | opus_decode(opus_decoder, data, size, output_buffer, BUFFER_SAMPLES, 0); 251 | 252 | if (decoded_size > 0) { 253 | #ifdef CONFIG_IDF_TARGET_ESP32 254 | for(size_t i = 0; i < decoded_size * sizeof(opus_int16)/4; i++) { 255 | const auto value = reinterpret_cast(output_buffer)[i]; 256 | const auto high_word = value >> 16; 257 | const auto low_word = value & 0xFFFF; 258 | reinterpret_cast(output_buffer)[i] = (low_word << 16) | high_word; 259 | } 260 | #endif // CONFIG_IDF_TARGET_ESP32 261 | std::size_t bytes_written = 0; 262 | if( esp_err_t err = i2s_channel_write(get_i2s_tx_handle(), output_buffer, decoded_size * sizeof(opus_int16), 263 | &bytes_written, portMAX_DELAY); err != ESP_OK ) { 264 | ESP_LOGE(TAG, "Failed to write audio data to I2S: %s", esp_err_to_name(err)); 265 | } 266 | #ifdef CONFIG_MEDIA_ENABLE_DEBUG_AUDIO_UDP_CLIENT 267 | sendto(s_debug_audio_sock, output_buffer, decoded_size * sizeof(opus_int16), 0, (struct sockaddr *)&s_debug_audio_out_dest_addr, sizeof(s_debug_audio_out_dest_addr)); 268 | #endif // CONFIG_MEDIA_ENABLE_DEBUG_AUDIO_UDP_CLIENT 269 | } 270 | } 271 | 272 | OpusEncoder *opus_encoder = NULL; 273 | opus_int16 *encoder_input_buffer = NULL; 274 | uint8_t *encoder_output_buffer = NULL; 275 | 276 | void oai_init_audio_encoder() { 277 | int encoder_error; 278 | opus_encoder = opus_encoder_create(SAMPLE_RATE, 1, OPUS_APPLICATION_VOIP, 279 | &encoder_error); 280 | if (encoder_error != OPUS_OK) { 281 | ESP_LOGE(TAG, "Failed to create OPUS encoder"); 282 | return; 283 | } 284 | 285 | if (opus_encoder_init(opus_encoder, SAMPLE_RATE, 1, OPUS_APPLICATION_VOIP) != 286 | OPUS_OK) { 287 | ESP_LOGE(TAG, "Failed to initialize OPUS encoder"); 288 | return; 289 | } 290 | 291 | opus_encoder_ctl(opus_encoder, OPUS_SET_BITRATE(OPUS_ENCODER_BITRATE)); 292 | opus_encoder_ctl(opus_encoder, OPUS_SET_COMPLEXITY(OPUS_ENCODER_COMPLEXITY)); 293 | opus_encoder_ctl(opus_encoder, OPUS_SET_SIGNAL(OPUS_SIGNAL_VOICE)); 294 | encoder_input_buffer = (opus_int16 *)malloc(BUFFER_SAMPLES*sizeof(opus_int16)); 295 | encoder_output_buffer = (uint8_t *)malloc(OPUS_OUT_BUFFER_SIZE); 296 | } 297 | 298 | void oai_send_audio(PeerConnection *peer_connection) { 299 | size_t bytes_read = 0; 300 | if( esp_err_t err = i2s_channel_read(get_i2s_rx_handle(), encoder_input_buffer, BUFFER_SAMPLES*sizeof(opus_int16), &bytes_read, 301 | portMAX_DELAY) ; err != ESP_OK ) { 302 | ESP_LOGE(TAG, "Failed to read audio data from I2S: %s", esp_err_to_name(err)); 303 | } 304 | #ifdef CONFIG_IDF_TARGET_ESP32 305 | for(size_t i = 0; i < bytes_read/4; i++) { 306 | const auto value = reinterpret_cast(encoder_input_buffer)[i]; 307 | const auto high_word = value >> 16; 308 | const auto low_word = value & 0xFFFF; 309 | reinterpret_cast(encoder_input_buffer)[i] = (low_word << 16) | high_word; 310 | } 311 | #endif // CONFIG_IDF_TARGET_ESP32 312 | 313 | #ifdef CONFIG_MEDIA_ENABLE_DEBUG_AUDIO_UDP_CLIENT 314 | sendto(s_debug_audio_sock, encoder_input_buffer, bytes_read, 0, (struct sockaddr *)&s_debug_audio_in_dest_addr, sizeof(s_debug_audio_in_dest_addr)); 315 | #endif // CONFIG_MEDIA_ENABLE_DEBUG_AUDIO_UDP_CLIENT 316 | 317 | auto encoded_size = 318 | opus_encode(opus_encoder, encoder_input_buffer, BUFFER_SAMPLES, 319 | encoder_output_buffer, OPUS_OUT_BUFFER_SIZE); 320 | 321 | peer_connection_send_audio(peer_connection, encoder_output_buffer, 322 | encoded_size); 323 | } 324 | -------------------------------------------------------------------------------- /src/wifi.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | #ifdef CONFIG_USE_WIFI_PROVISIONING_SOFTAP 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #endif // CONFIG_USE_WIFI_PROVISIONING_SOFTAP 14 | 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include 20 | 21 | #include "main.h" 22 | #include "bsp.h" 23 | 24 | #ifdef CONFIG_USE_WIFI_PROVISIONING_SOFTAP 25 | // From IDF examples/provisioning/wifi_prov_mgr/main/app_main.c 26 | // Example of WiFi provisioning using SoftAP provisioning scheme 27 | #define EXAMPLE_PROV_SEC2_USERNAME "wifiprov" 28 | #define EXAMPLE_PROV_SEC2_PWD "abcd1234" 29 | 30 | /* This salt,verifier has been generated for username = "wifiprov" and password = "abcd1234" 31 | * IMPORTANT NOTE: For production cases, this must be unique to every device 32 | * and should come from device manufacturing partition.*/ 33 | static const char sec2_salt[] = { 34 | 0x03, 0x6e, 0xe0, 0xc7, 0xbc, 0xb9, 0xed, 0xa8, 0x4c, 0x9e, 0xac, 0x97, 0xd9, 0x3d, 0xec, 0xf4 35 | }; 36 | 37 | static const char sec2_verifier[] = { 38 | 0x7c, 0x7c, 0x85, 0x47, 0x65, 0x08, 0x94, 0x6d, 0xd6, 0x36, 0xaf, 0x37, 0xd7, 0xe8, 0x91, 0x43, 39 | 0x78, 0xcf, 0xfd, 0x61, 0x6c, 0x59, 0xd2, 0xf8, 0x39, 0x08, 0x12, 0x72, 0x38, 0xde, 0x9e, 0x24, 40 | 0xa4, 0x70, 0x26, 0x1c, 0xdf, 0xa9, 0x03, 0xc2, 0xb2, 0x70, 0xe7, 0xb1, 0x32, 0x24, 0xda, 0x11, 41 | 0x1d, 0x97, 0x18, 0xdc, 0x60, 0x72, 0x08, 0xcc, 0x9a, 0xc9, 0x0c, 0x48, 0x27, 0xe2, 0xae, 0x89, 42 | 0xaa, 0x16, 0x25, 0xb8, 0x04, 0xd2, 0x1a, 0x9b, 0x3a, 0x8f, 0x37, 0xf6, 0xe4, 0x3a, 0x71, 0x2e, 43 | 0xe1, 0x27, 0x86, 0x6e, 0xad, 0xce, 0x28, 0xff, 0x54, 0x46, 0x60, 0x1f, 0xb9, 0x96, 0x87, 0xdc, 44 | 0x57, 0x40, 0xa7, 0xd4, 0x6c, 0xc9, 0x77, 0x54, 0xdc, 0x16, 0x82, 0xf0, 0xed, 0x35, 0x6a, 0xc4, 45 | 0x70, 0xad, 0x3d, 0x90, 0xb5, 0x81, 0x94, 0x70, 0xd7, 0xbc, 0x65, 0xb2, 0xd5, 0x18, 0xe0, 0x2e, 46 | 0xc3, 0xa5, 0xf9, 0x68, 0xdd, 0x64, 0x7b, 0xb8, 0xb7, 0x3c, 0x9c, 0xfc, 0x00, 0xd8, 0x71, 0x7e, 47 | 0xb7, 0x9a, 0x7c, 0xb1, 0xb7, 0xc2, 0xc3, 0x18, 0x34, 0x29, 0x32, 0x43, 0x3e, 0x00, 0x99, 0xe9, 48 | 0x82, 0x94, 0xe3, 0xd8, 0x2a, 0xb0, 0x96, 0x29, 0xb7, 0xdf, 0x0e, 0x5f, 0x08, 0x33, 0x40, 0x76, 49 | 0x52, 0x91, 0x32, 0x00, 0x9f, 0x97, 0x2c, 0x89, 0x6c, 0x39, 0x1e, 0xc8, 0x28, 0x05, 0x44, 0x17, 50 | 0x3f, 0x68, 0x02, 0x8a, 0x9f, 0x44, 0x61, 0xd1, 0xf5, 0xa1, 0x7e, 0x5a, 0x70, 0xd2, 0xc7, 0x23, 51 | 0x81, 0xcb, 0x38, 0x68, 0xe4, 0x2c, 0x20, 0xbc, 0x40, 0x57, 0x76, 0x17, 0xbd, 0x08, 0xb8, 0x96, 52 | 0xbc, 0x26, 0xeb, 0x32, 0x46, 0x69, 0x35, 0x05, 0x8c, 0x15, 0x70, 0xd9, 0x1b, 0xe9, 0xbe, 0xcc, 53 | 0xa9, 0x38, 0xa6, 0x67, 0xf0, 0xad, 0x50, 0x13, 0x19, 0x72, 0x64, 0xbf, 0x52, 0xc2, 0x34, 0xe2, 54 | 0x1b, 0x11, 0x79, 0x74, 0x72, 0xbd, 0x34, 0x5b, 0xb1, 0xe2, 0xfd, 0x66, 0x73, 0xfe, 0x71, 0x64, 55 | 0x74, 0xd0, 0x4e, 0xbc, 0x51, 0x24, 0x19, 0x40, 0x87, 0x0e, 0x92, 0x40, 0xe6, 0x21, 0xe7, 0x2d, 56 | 0x4e, 0x37, 0x76, 0x2f, 0x2e, 0xe2, 0x68, 0xc7, 0x89, 0xe8, 0x32, 0x13, 0x42, 0x06, 0x84, 0x84, 57 | 0x53, 0x4a, 0xb3, 0x0c, 0x1b, 0x4c, 0x8d, 0x1c, 0x51, 0x97, 0x19, 0xab, 0xae, 0x77, 0xff, 0xdb, 58 | 0xec, 0xf0, 0x10, 0x95, 0x34, 0x33, 0x6b, 0xcb, 0x3e, 0x84, 0x0f, 0xb9, 0xd8, 0x5f, 0xb8, 0xa0, 59 | 0xb8, 0x55, 0x53, 0x3e, 0x70, 0xf7, 0x18, 0xf5, 0xce, 0x7b, 0x4e, 0xbf, 0x27, 0xce, 0xce, 0xa8, 60 | 0xb3, 0xbe, 0x40, 0xc5, 0xc5, 0x32, 0x29, 0x3e, 0x71, 0x64, 0x9e, 0xde, 0x8c, 0xf6, 0x75, 0xa1, 61 | 0xe6, 0xf6, 0x53, 0xc8, 0x31, 0xa8, 0x78, 0xde, 0x50, 0x40, 0xf7, 0x62, 0xde, 0x36, 0xb2, 0xba 62 | }; 63 | #define QRCODE_BASE_URL "https://espressif.github.io/esp-jumpstart/qrcode.html" 64 | 65 | constexpr const char* OAI_NVS_NS = "oai"; 66 | constexpr const char* OAI_API_KEY_NVS_KEY = "oai_api_key"; 67 | constexpr const char* OAI_API_URI_NVS_KEY = "oai_api_uri"; 68 | 69 | extern const uint8_t index_html_start[] asm("_binary_index_html_start"); 70 | extern const uint8_t index_html_end[] asm("_binary_index_html_end"); 71 | 72 | static std::vector s_api_key; 73 | 74 | /** 75 | * Check if the API key is present 76 | */ 77 | esp_err_t oai_has_api_key(bool& has_api_key) 78 | { 79 | has_api_key = false; 80 | if( s_api_key.size() > 0 ) { 81 | has_api_key = true; 82 | return ESP_OK; 83 | } 84 | 85 | nvs_handle_t nvs_handle; 86 | if( esp_err_t err = nvs_open(OAI_NVS_NS, NVS_READONLY, &nvs_handle); err != ESP_OK ) { 87 | return err; 88 | } 89 | 90 | size_t required_size = 0; 91 | esp_err_t err = nvs_get_str(nvs_handle, OAI_API_KEY_NVS_KEY, nullptr, &required_size); 92 | if( err != ESP_OK && err != ESP_ERR_NVS_NOT_FOUND ) { 93 | nvs_close(nvs_handle); 94 | return err; 95 | } 96 | 97 | nvs_close(nvs_handle); 98 | has_api_key = err != ESP_ERR_NVS_NOT_FOUND && required_size > 0; 99 | return ESP_OK; 100 | } 101 | 102 | /** 103 | * Get the API key from the NVS 104 | */ 105 | esp_err_t oai_get_api_key(std::vector& api_key) 106 | { 107 | if( s_api_key.size() > 0 ) { 108 | api_key = s_api_key; 109 | return ESP_OK; 110 | } 111 | 112 | nvs_handle_t nvs_handle; 113 | if( esp_err_t err = nvs_open(OAI_NVS_NS, NVS_READONLY, &nvs_handle); err != ESP_OK ) { 114 | return err; 115 | } 116 | 117 | size_t required_size = 0; 118 | if( esp_err_t err = nvs_get_str(nvs_handle, OAI_API_KEY_NVS_KEY, nullptr, &required_size); err != ESP_OK ) { 119 | nvs_close(nvs_handle); 120 | return err; 121 | } 122 | 123 | s_api_key.resize(required_size); 124 | if( esp_err_t err = nvs_get_str(nvs_handle, OAI_API_KEY_NVS_KEY, s_api_key.data(), &required_size); err != ESP_OK ) { 125 | nvs_close(nvs_handle); 126 | return err; 127 | } 128 | 129 | api_key = s_api_key; 130 | 131 | nvs_close(nvs_handle); 132 | return ESP_OK; 133 | } 134 | 135 | /** 136 | * Set the API key in the NVS 137 | */ 138 | esp_err_t oai_set_api_key(const char* api_key) 139 | { 140 | assert(api_key != nullptr); 141 | 142 | // Store the new API key in the RAM. 143 | size_t api_key_len = strnlen(api_key, 1024); 144 | if( api_key_len == 0 ) { 145 | return ESP_ERR_INVALID_ARG; 146 | } 147 | s_api_key.resize(api_key_len + 1); 148 | strncpy(s_api_key.data(), api_key, api_key_len); 149 | s_api_key[api_key_len] = '\0'; 150 | 151 | // Store the new API key in the NVS. 152 | nvs_handle_t nvs_handle; 153 | if( esp_err_t err = nvs_open(OAI_NVS_NS, NVS_READWRITE, &nvs_handle); err != ESP_OK ) { 154 | return err; 155 | } 156 | 157 | esp_err_t ret = nvs_set_str(nvs_handle, OAI_API_KEY_NVS_KEY, api_key); 158 | if( ret != ESP_OK ) { 159 | nvs_close(nvs_handle); 160 | return ret; 161 | } 162 | 163 | ret = nvs_commit(nvs_handle); 164 | if( ret != ESP_OK ) { 165 | nvs_close(nvs_handle); 166 | return ret; 167 | } 168 | 169 | nvs_close(nvs_handle); 170 | return ESP_OK; 171 | } 172 | 173 | /** 174 | * Get the API URI from the NVS or the default value 175 | */ 176 | esp_err_t oai_get_api_uri(std::string& api_uri) 177 | { 178 | nvs_handle_t nvs_handle; 179 | if( esp_err_t err = nvs_open(OAI_NVS_NS, NVS_READONLY, &nvs_handle); err != ESP_OK ) { 180 | return err; 181 | } 182 | 183 | size_t required_size = 0; 184 | esp_err_t err = nvs_get_str(nvs_handle, OAI_API_URI_NVS_KEY, nullptr, &required_size); 185 | if( err != ESP_OK && err != ESP_ERR_NVS_NOT_FOUND ) { 186 | nvs_close(nvs_handle); 187 | return err; 188 | } 189 | 190 | if( err == ESP_ERR_NVS_NOT_FOUND ) { 191 | api_uri = OPENAI_REALTIMEAPI; 192 | return ESP_OK; 193 | } 194 | 195 | std::vector api_uri_buf(required_size); 196 | if( esp_err_t err = nvs_get_str(nvs_handle, OAI_API_URI_NVS_KEY, api_uri_buf.data(), &required_size); err != ESP_OK ) { 197 | nvs_close(nvs_handle); 198 | return err; 199 | } 200 | 201 | api_uri = api_uri_buf.data(); 202 | 203 | nvs_close(nvs_handle); 204 | return ESP_OK; 205 | } 206 | 207 | /** 208 | * Set the API URI in the NVS 209 | */ 210 | esp_err_t oai_set_api_uri(const char* api_uri) 211 | { 212 | assert(api_uri != nullptr); 213 | 214 | nvs_handle_t nvs_handle; 215 | if( esp_err_t err = nvs_open(OAI_NVS_NS, NVS_READWRITE, &nvs_handle); err != ESP_OK ) { 216 | return err; 217 | } 218 | 219 | esp_err_t ret = nvs_set_str(nvs_handle, OAI_API_URI_NVS_KEY, api_uri); 220 | if( ret != ESP_OK ) { 221 | nvs_close(nvs_handle); 222 | return ret; 223 | } 224 | 225 | ret = nvs_commit(nvs_handle); 226 | if( ret != ESP_OK ) { 227 | nvs_close(nvs_handle); 228 | return ret; 229 | } 230 | 231 | nvs_close(nvs_handle); 232 | return ESP_OK; 233 | } 234 | 235 | static esp_err_t config_http_get_handler(httpd_req_t* req) 236 | { 237 | if( strncmp(req->uri, "/", 2) == 0 || strncmp(req->uri, "/index.html", 11) == 0 ) { 238 | httpd_resp_set_status(req, "200 OK"); 239 | httpd_resp_set_type(req, "text/html"); 240 | size_t content_length = index_html_end - index_html_start; 241 | httpd_resp_send(req, reinterpret_cast(index_html_start), content_length); 242 | return ESP_OK; 243 | } else if( strncmp(req->uri, "/api_key", 8) == 0 ) { 244 | std::vector api_key; 245 | if( esp_err_t err = oai_get_api_key(api_key); err != ESP_OK && err != ESP_ERR_NVS_NOT_FOUND ) { 246 | httpd_resp_set_status(req, "500 Internal Server Error"); 247 | httpd_resp_set_type(req, "text/plain"); 248 | httpd_resp_send(req, "Internal Server Error", 20); 249 | return err; 250 | } 251 | httpd_resp_set_status(req, "200 OK"); 252 | httpd_resp_set_type(req, "text/plain"); 253 | httpd_resp_send(req, api_key.data(), api_key.size()); 254 | } else if( strncmp(req->uri, "/api_uri", 8) == 0 ) { 255 | std::string api_uri = CONFIG_OPENAI_REALTIMEAPI; 256 | if( esp_err_t err = oai_get_api_uri(api_uri); err != ESP_OK && err != ESP_ERR_NVS_NOT_FOUND ) { 257 | httpd_resp_set_status(req, "500 Internal Server Error"); 258 | httpd_resp_set_type(req, "text/plain"); 259 | httpd_resp_send(req, "Internal Server Error", 20); 260 | return err; 261 | } 262 | httpd_resp_set_status(req, "200 OK"); 263 | httpd_resp_set_type(req, "text/plain"); 264 | httpd_resp_send(req, api_uri.c_str(), api_uri.size()); 265 | } else { 266 | httpd_resp_set_status(req, "404 Not Found"); 267 | httpd_resp_set_type(req, "text/plain"); 268 | httpd_resp_send(req, "Not found", 9); 269 | return ESP_FAIL; 270 | } 271 | return ESP_OK; 272 | } 273 | 274 | static esp_err_t config_http_post_handler(httpd_req_t* req) 275 | { 276 | if( strncmp(req->uri, "/reboot", 8) == 0 ) { 277 | ESP_LOGI(LOG_TAG, "Rebooting the device..."); 278 | esp_restart(); 279 | } 280 | 281 | size_t buf_len = req->content_len + 1; 282 | std::vector buf(buf_len); 283 | if( httpd_req_recv(req, buf.data(), buf_len) <= 0 ) { 284 | return ESP_FAIL; 285 | } 286 | 287 | buf[buf_len - 1] = '\0'; 288 | // Trim the newline character 289 | size_t line_len = strnlen(buf.data(), buf_len - 1); 290 | if( buf[line_len - -1] == '\n' ) { 291 | buf[line_len - 1] = '\0'; 292 | } 293 | 294 | if( strncmp(req->uri, "/api_key", 9) == 0 ) { 295 | ESP_LOGI(LOG_TAG, "Received API key: %s", buf.data()); 296 | if( esp_err_t err = oai_set_api_key(buf.data()); err != ESP_OK ) { 297 | ESP_LOGE(LOG_TAG, "Failed to store the API key in the NVS - %s(%d)", esp_err_to_name(err), err); 298 | return err; 299 | } 300 | // Notify to the task. 301 | if( req->user_ctx != nullptr ) { 302 | xTaskNotifyGive(reinterpret_cast(req->user_ctx)); 303 | } 304 | } else if( strncmp(req->uri, "/api_uri", 9) == 0 ) { 305 | ESP_LOGI(LOG_TAG, "Received API URI: %s", buf.data()); 306 | if( esp_err_t err = oai_set_api_uri(buf.data()); err != ESP_OK ) { 307 | ESP_LOGE(LOG_TAG, "Failed to store the API URI in the NVS - %s(%d)", esp_err_to_name(err), err); 308 | return err; 309 | } 310 | } else { 311 | ESP_LOGW(LOG_TAG, "Unknown POST request: %s", buf.data()); 312 | return ESP_FAIL; 313 | } 314 | 315 | httpd_resp_set_status(req, "200 OK"); 316 | httpd_resp_set_type(req, "text/plain"); 317 | httpd_resp_send(req, "OK", 2); 318 | return ESP_OK; 319 | } 320 | 321 | static httpd_handle_t s_config_server = nullptr; 322 | 323 | static esp_err_t oai_config_httpd_start() 324 | { 325 | if( s_config_server != nullptr ) { 326 | return ESP_OK; 327 | } 328 | httpd_config_t config = HTTPD_DEFAULT_CONFIG(); 329 | if( auto err = httpd_start(&s_config_server, &config); err != ESP_OK ) { 330 | return err; 331 | } 332 | 333 | const httpd_uri_t config_http_get_uri = { 334 | .uri = "/", 335 | .method = HTTP_GET, 336 | .handler = config_http_get_handler, 337 | .user_ctx = nullptr 338 | }; 339 | 340 | httpd_register_uri_handler(s_config_server, &config_http_get_uri); 341 | const char* get_uris[] = {"/index.html", "/api_key", "/api_uri"}; 342 | for( const auto& uri : get_uris ) { 343 | httpd_uri_t uri_handler = { 344 | .uri = uri, 345 | .method = HTTP_GET, 346 | .handler = config_http_get_handler, 347 | .user_ctx = nullptr 348 | }; 349 | httpd_register_uri_handler(s_config_server, &uri_handler); 350 | } 351 | const char* post_uris[] = {"/api_key", "/api_uri", "/reboot"}; 352 | for( const auto& uri : post_uris ) { 353 | httpd_uri_t uri_handler = { 354 | .uri = uri, 355 | .method = HTTP_POST, 356 | .handler = config_http_post_handler, 357 | .user_ctx = xTaskGetCurrentTaskHandle(), 358 | }; 359 | httpd_register_uri_handler(s_config_server, &uri_handler); 360 | } 361 | 362 | // Start mDNS 363 | if( auto err = mdns_init(); err != ESP_OK ) { 364 | ESP_LOGE(LOG_TAG, "Failed to initialize mDNS - %d", err); 365 | return err; 366 | } 367 | mdns_hostname_set("oai-res-example"); 368 | mdns_service_add(nullptr, "_http", "_tcp", 80, nullptr, 0); 369 | mdns_service_instance_name_set("_http", "_tcp", "OpenAI Realtime Embedded SDK Example"); 370 | 371 | return ESP_OK; 372 | } 373 | 374 | static esp_err_t oai_config_httpd_stop() 375 | { 376 | if( s_config_server == nullptr ) { 377 | return ESP_OK; 378 | } 379 | 380 | mdns_free(); 381 | httpd_stop(s_config_server); 382 | s_config_server = nullptr; 383 | return ESP_OK; 384 | } 385 | #else // CONFIG_USE_WIFI_PROVISIONING_SOFTAP 386 | /** 387 | * Get the API URI from the default value 388 | */ 389 | esp_err_t oai_get_api_uri(std::string& api_uri) 390 | { 391 | api_uri = CONFIG_OPENAI_REALTIMEAPI; 392 | return ESP_OK; 393 | } 394 | #endif // CONFIG_USE_WIFI_PROVISIONING_SOFTAP 395 | 396 | static bool g_wifi_connected = false; 397 | 398 | static void oai_event_handler(void *arg, esp_event_base_t event_base, 399 | int32_t event_id, void *event_data) { 400 | static int s_retry_num = 0; 401 | if (event_base == WIFI_EVENT && event_id == WIFI_EVENT_STA_DISCONNECTED) { 402 | if (s_retry_num < 5) { 403 | esp_wifi_connect(); 404 | s_retry_num++; 405 | ESP_LOGI(LOG_TAG, "retry to connect to the AP"); 406 | } 407 | ESP_LOGI(LOG_TAG, "connect to the AP fail"); 408 | } else if (event_base == IP_EVENT && event_id == IP_EVENT_STA_GOT_IP) { 409 | ip_event_got_ip_t *event = (ip_event_got_ip_t *)event_data; 410 | ESP_LOGI(LOG_TAG, "got ip:" IPSTR, IP2STR(&event->ip_info.ip)); 411 | g_wifi_connected = true; 412 | } 413 | 414 | #ifdef CONFIG_USE_WIFI_PROVISIONING_SOFTAP 415 | if( event_base == WIFI_PROV_EVENT ) { 416 | switch( event_id ) { 417 | case WIFI_PROV_START: 418 | ESP_LOGI(LOG_TAG, "Provisioning started"); 419 | break; 420 | case WIFI_PROV_CRED_RECV: { 421 | wifi_sta_config_t *wifi_sta_cfg = (wifi_sta_config_t *)event_data; 422 | ESP_LOGI(LOG_TAG, "Received Wi-Fi credentials. SSID: %s, Password: %s", 423 | (char *)wifi_sta_cfg->ssid, (char *)wifi_sta_cfg->password); 424 | break; 425 | } 426 | case WIFI_PROV_CRED_FAIL: { 427 | wifi_prov_sta_fail_reason_t *reason = (wifi_prov_sta_fail_reason_t *)event_data; 428 | ESP_LOGE(LOG_TAG, "Provisioning failed! Reason : %s", 429 | (*reason == WIFI_PROV_STA_AUTH_ERROR) ? "Wi-Fi station authentication failed" : "Wi-Fi station DHCP client failed"); 430 | break; 431 | } 432 | case WIFI_PROV_CRED_SUCCESS: 433 | ESP_LOGI(LOG_TAG, "Provisioning successful"); 434 | break; 435 | case WIFI_PROV_END: 436 | ESP_LOGI(LOG_TAG, "Provisioning ended"); 437 | wifi_prov_mgr_deinit(); 438 | break; 439 | } 440 | } 441 | #endif // CONFIG_USE_WIFI_PROVISIONING_SOFTAP 442 | } 443 | 444 | #ifdef CONFIG_USE_WIFI_PROVISIONING_SOFTAP 445 | static void wifi_prov_print_qr(const char *name, const char *username, const char *pop) 446 | { 447 | assert(name); 448 | if( pop ) { 449 | assert(username); 450 | } 451 | 452 | constexpr const char* transport = "softap"; 453 | constexpr const char* PROV_QR_VERSION = "v1"; 454 | 455 | std::vector payload; 456 | payload.resize(150); 457 | 458 | if (pop) { 459 | snprintf(payload.data(), payload.size(), "{\"ver\":\"%s\",\"name\":\"%s\"" \ 460 | ",\"username\":\"%s\",\"pop\":\"%s\",\"transport\":\"%s\"}", 461 | PROV_QR_VERSION, name, username, pop, transport); 462 | } else { 463 | snprintf(payload.data(), payload.size(), "{\"ver\":\"%s\",\"name\":\"%s\"" \ 464 | ",\"transport\":\"%s\"}", 465 | PROV_QR_VERSION, name, transport); 466 | } 467 | ESP_LOGI(LOG_TAG, "Scan this QR code from the provisioning application for Provisioning."); 468 | esp_qrcode_config_t cfg = ESP_QRCODE_CONFIG_DEFAULT(); 469 | esp_qrcode_generate(&cfg, payload.data()); 470 | ESP_LOGI(LOG_TAG, "If QR code is not visible, copy paste the below URL in a browser.\n%s?data=%s", QRCODE_BASE_URL, payload.data()); 471 | } 472 | #endif 473 | 474 | void oai_wifi(void) { 475 | ESP_ERROR_CHECK(esp_event_handler_register(WIFI_EVENT, ESP_EVENT_ANY_ID, 476 | &oai_event_handler, NULL)); 477 | ESP_ERROR_CHECK(esp_event_handler_register(IP_EVENT, IP_EVENT_STA_GOT_IP, 478 | &oai_event_handler, NULL)); 479 | #ifdef CONFIG_USE_WIFI_PROVISIONING_SOFTAP 480 | ESP_ERROR_CHECK(esp_event_handler_register(WIFI_PROV_EVENT, ESP_EVENT_ANY_ID, &oai_event_handler, NULL)); 481 | ESP_ERROR_CHECK(esp_event_handler_register(PROTOCOMM_SECURITY_SESSION_EVENT, ESP_EVENT_ANY_ID, &oai_event_handler, NULL)); 482 | #endif // CONFIG_USE_WIFI_PROVISIONING_SOFTAP 483 | 484 | ESP_ERROR_CHECK(esp_netif_init()); 485 | esp_netif_t *sta_netif = esp_netif_create_default_wifi_sta(); 486 | assert(sta_netif); 487 | 488 | #ifdef CONFIG_USE_WIFI_PROVISIONING_SOFTAP 489 | esp_netif_create_default_wifi_ap(); 490 | #endif // CONFIG_USE_WIFI_PROVISIONING_SOFTAP 491 | 492 | wifi_init_config_t cfg = WIFI_INIT_CONFIG_DEFAULT(); 493 | ESP_ERROR_CHECK(esp_wifi_init(&cfg)); 494 | 495 | #ifndef CONFIG_USE_WIFI_PROVISIONING_SOFTAP 496 | // Start WiFi in station mode 497 | ESP_ERROR_CHECK(esp_wifi_set_mode(WIFI_MODE_STA)); 498 | ESP_ERROR_CHECK(esp_wifi_start()); 499 | 500 | ESP_LOGI(LOG_TAG, "Connecting to WiFi SSID: %s", CONFIG_WIFI_SSID); 501 | wifi_config_t wifi_config; 502 | memset(&wifi_config, 0, sizeof(wifi_config)); 503 | strncpy((char *)wifi_config.sta.ssid, (char *)CONFIG_WIFI_SSID, 504 | sizeof(wifi_config.sta.ssid)); 505 | strncpy((char *)wifi_config.sta.password, (char *)CONFIG_WIFI_PASSWORD, 506 | sizeof(wifi_config.sta.password)); 507 | 508 | ESP_ERROR_CHECK(esp_wifi_set_config( 509 | static_cast(ESP_IF_WIFI_STA), &wifi_config)); 510 | ESP_ERROR_CHECK(esp_wifi_connect()); 511 | #else // CONFIG_USE_WIFI_PROVISIONING_SOFTAP 512 | wifi_prov_mgr_config_t config = { 513 | .scheme = wifi_prov_scheme_softap, 514 | .scheme_event_handler = WIFI_PROV_EVENT_HANDLER_NONE, 515 | .app_event_handler = WIFI_PROV_EVENT_HANDLER_NONE, 516 | }; 517 | ESP_ERROR_CHECK(wifi_prov_mgr_init(config)); 518 | 519 | bool reset_provisioning = bsp_check_reset_provisioning(); 520 | 521 | bool provisioned = false; 522 | ESP_ERROR_CHECK(wifi_prov_mgr_is_provisioned(&provisioned)); 523 | 524 | if (!provisioned || reset_provisioning) { 525 | // Not provisioned, start provisioning via SoftAP 526 | ESP_LOGI(LOG_TAG, "Starting provisioning"); 527 | const auto security = WIFI_PROV_SECURITY_2; 528 | const char* service_name = "OAI_RES_WIFI"; 529 | const char* service_key = nullptr; 530 | wifi_prov_security2_params_t sec2_params = {}; 531 | sec2_params.salt = sec2_salt; 532 | sec2_params.salt_len = sizeof(sec2_salt); 533 | sec2_params.verifier = sec2_verifier; 534 | sec2_params.verifier_len = sizeof(sec2_verifier); 535 | 536 | ESP_ERROR_CHECK(wifi_prov_mgr_start_provisioning(security, &sec2_params, service_name, service_key)); 537 | 538 | wifi_prov_print_qr(service_name, EXAMPLE_PROV_SEC2_USERNAME, EXAMPLE_PROV_SEC2_PWD); 539 | 540 | wifi_prov_mgr_wait(); 541 | } else { 542 | ESP_LOGI(LOG_TAG, "Already provisioned, starting WiFi"); 543 | ESP_ERROR_CHECK(esp_wifi_set_mode(WIFI_MODE_STA)); 544 | ESP_ERROR_CHECK(esp_wifi_start()); 545 | ESP_ERROR_CHECK(esp_wifi_connect()); 546 | } 547 | 548 | // block until we get an IP address 549 | while (!g_wifi_connected) { 550 | vTaskDelay(pdMS_TO_TICKS(200)); 551 | } 552 | 553 | bool has_api_key = false; 554 | if( auto err = oai_has_api_key(has_api_key); err != ESP_OK || !has_api_key || reset_provisioning ) { 555 | ESP_LOGW(LOG_TAG, "API key not set"); 556 | // Strat the HTTP server to receive the API key 557 | ESP_ERROR_CHECK(oai_config_httpd_start()); 558 | // Wait for the API key to be set 559 | ESP_LOGI(LOG_TAG, "Waiting for the API key to be set"); 560 | ulTaskNotifyTake(pdTRUE, portMAX_DELAY); 561 | #ifdef CONFIG_DISABLE_CONFIGURATOR_AFTER_PROVISIONED 562 | // Stop the HTTP server 563 | oai_config_httpd_stop(); 564 | #endif 565 | } else { 566 | ESP_LOGI(LOG_TAG, "API key found."); 567 | #ifndef CONFIG_DISABLE_CONFIGURATOR_AFTER_PROVISIONED 568 | // Strat the HTTP server. 569 | ESP_ERROR_CHECK(oai_config_httpd_start()); 570 | #endif 571 | } 572 | 573 | #endif // CONFIG_USE_WIFI_PROVISIONING_SMARTCONFIG 574 | } 575 | --------------------------------------------------------------------------------