├── README.md └── sources ├── Makefile.am ├── conf ├── autoload_configs │ └── openai_asr.conf.xml └── dialplan │ └── dialplan.xml ├── curl.c ├── mod_openai_asr.c ├── mod_openai_asr.h └── utils.c /README.md: -------------------------------------------------------------------------------- 1 |

2 | OpenAI Speech-To-Text service for the Freeswitch.
3 | Features: vad, flexible adjustments.
4 | Available through: asr_interface, commnad api
5 |
6 | If you have troubles with building this module, pay attention to this issue and the comment, 7 | maybe you faced the same thing. 8 |

9 | 10 | ### Dialplan example 11 | ```XML 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | ``` 22 | 23 | ### mod_quickjs 24 | ```javascript 25 | session.ttsEngine= 'openai'; // requires: mod_openai_tts 26 | session.asrEngine= 'openai'; 27 | 28 | var txt = session.sayAndDetectSpeech('Hello, how can I help you?', 10); 29 | consoleLog('info', "TEXT: " + txt); 30 | ``` 31 | 32 | ### Command line 33 | ``` 34 | freeswitch> openai_asr_transcript /tmp/test.[wav|mp3] [key=altKey mode=altModel] 35 | +OK: How old is the Brooklyn Bridge 36 | ``` 37 | -------------------------------------------------------------------------------- /sources/Makefile.am: -------------------------------------------------------------------------------- 1 | 2 | include $(top_srcdir)/build/modmake.rulesam 3 | 4 | MODNAME=mod_openai_asr 5 | mod_LTLIBRARIES = mod_openai_asr.la 6 | mod_openai_asr_la_SOURCES = mod_openai_asr.c utils.c curl.c 7 | mod_openai_asr_la_CFLAGS = $(AM_CFLAGS) -I. -Wno-pointer-arith 8 | mod_openai_asr_la_LIBADD = $(switch_builddir)/libfreeswitch.la 9 | mod_openai_asr_la_LDFLAGS = -avoid-version -module -no-undefined -shared 10 | 11 | $(am_mod_openai_asr_la_OBJECTS): mod_openai_asr.h 12 | 13 | -------------------------------------------------------------------------------- /sources/conf/autoload_configs/openai_asr.conf.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | -------------------------------------------------------------------------------- /sources/conf/dialplan/dialplan.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | -------------------------------------------------------------------------------- /sources/curl.c: -------------------------------------------------------------------------------- 1 | /* 2 | * FreeSWITCH Modular Media Switching Software Library / Soft-Switch Application 3 | * Copyright (C) 2005-2014, Anthony Minessale II 4 | * 5 | * Version: MPL 1.1 6 | * 7 | * The contents of this file are subject to the Mozilla Public License Version 8 | * 1.1 (the "License"); you may not use this file except in compliance with 9 | * the License. You may obtain a copy of the License at 10 | * http://www.mozilla.org/MPL/ 11 | * 12 | * Software distributed under the License is distributed on an "AS IS" basis, 13 | * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License 14 | * for the specific language governing rights and limitations under the 15 | * License. 16 | * 17 | * Module Contributor(s): 18 | * Konstantin Alexandrin 19 | * 20 | * 21 | */ 22 | #include "mod_openai_asr.h" 23 | 24 | static size_t curl_io_write_callback(char *buffer, size_t size, size_t nitems, void *user_data) { 25 | switch_buffer_t *recv_buffer = (switch_buffer_t *)user_data; 26 | size_t len = (size * nitems); 27 | 28 | if(len > 0 && recv_buffer) { 29 | switch_buffer_write(recv_buffer, buffer, len); 30 | } 31 | 32 | return len; 33 | } 34 | 35 | switch_status_t curl_perform(switch_buffer_t *recv_buffer, char *api_key, char *model_name, char *filename, globals_t *globals) { 36 | switch_status_t status = SWITCH_STATUS_SUCCESS; 37 | CURL *curl_handle = NULL; 38 | curl_mime *form = NULL; 39 | curl_mimepart *field1=NULL, *field2=NULL; 40 | switch_curl_slist_t *headers = NULL; 41 | switch_CURLcode curl_ret = 0; 42 | long http_resp = 0; 43 | 44 | curl_handle = switch_curl_easy_init(); 45 | headers = switch_curl_slist_append(headers, "Content-Type: multipart/form-data"); 46 | 47 | switch_curl_easy_setopt(curl_handle, CURLOPT_HTTPHEADER, headers); 48 | switch_curl_easy_setopt(curl_handle, CURLOPT_POST, 1); 49 | switch_curl_easy_setopt(curl_handle, CURLOPT_NOSIGNAL, 1); 50 | switch_curl_easy_setopt(curl_handle, CURLOPT_WRITEFUNCTION, curl_io_write_callback); 51 | switch_curl_easy_setopt(curl_handle, CURLOPT_WRITEDATA, (void *) recv_buffer); 52 | 53 | if(globals->connect_timeout > 0) { 54 | switch_curl_easy_setopt(curl_handle, CURLOPT_CONNECTTIMEOUT, globals->connect_timeout); 55 | } 56 | if(globals->request_timeout > 0) { 57 | switch_curl_easy_setopt(curl_handle, CURLOPT_TIMEOUT, globals->request_timeout); 58 | } 59 | if(globals->user_agent) { 60 | switch_curl_easy_setopt(curl_handle, CURLOPT_USERAGENT, globals->user_agent); 61 | } 62 | if(strncasecmp(globals->api_url, "https", 5) == 0) { 63 | switch_curl_easy_setopt(curl_handle, CURLOPT_SSL_VERIFYPEER, 0); 64 | switch_curl_easy_setopt(curl_handle, CURLOPT_SSL_VERIFYHOST, 0); 65 | } 66 | if(globals->proxy) { 67 | if(globals->proxy_credentials != NULL) { 68 | switch_curl_easy_setopt(curl_handle, CURLOPT_PROXYAUTH, CURLAUTH_ANY); 69 | switch_curl_easy_setopt(curl_handle, CURLOPT_PROXYUSERPWD, globals->proxy_credentials); 70 | } 71 | if(strncasecmp(globals->proxy, "https", 5) == 0) { 72 | switch_curl_easy_setopt(curl_handle, CURLOPT_PROXY_SSL_VERIFYPEER, 0); 73 | } 74 | switch_curl_easy_setopt(curl_handle, CURLOPT_PROXY, globals->proxy); 75 | } 76 | 77 | if(api_key) { 78 | curl_easy_setopt(curl_handle, CURLOPT_XOAUTH2_BEARER, api_key); 79 | curl_easy_setopt(curl_handle, CURLOPT_HTTPAUTH, CURLAUTH_BEARER); 80 | } 81 | 82 | if((form = curl_mime_init(curl_handle))) { 83 | if((field1 = curl_mime_addpart(form))) { 84 | curl_mime_name(field1, "model"); 85 | curl_mime_data(field1, model_name, CURL_ZERO_TERMINATED); 86 | } 87 | if((field2 = curl_mime_addpart(form))) { 88 | curl_mime_name(field2, "file"); 89 | curl_mime_filedata(field2, filename); 90 | } 91 | switch_curl_easy_setopt(curl_handle, CURLOPT_MIMEPOST, form); 92 | } 93 | 94 | headers = switch_curl_slist_append(headers, "Expect:"); 95 | switch_curl_easy_setopt(curl_handle, CURLOPT_URL, globals->api_url); 96 | 97 | curl_ret = switch_curl_easy_perform(curl_handle); 98 | if(!curl_ret) { 99 | switch_curl_easy_getinfo(curl_handle, CURLINFO_RESPONSE_CODE, &http_resp); 100 | if(!http_resp) { switch_curl_easy_getinfo(curl_handle, CURLINFO_HTTP_CONNECTCODE, &http_resp); } 101 | } else { 102 | http_resp = curl_ret; 103 | } 104 | 105 | if(http_resp != 200) { 106 | switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "http-error=[%ld] (%s)\n", http_resp, globals->api_url); 107 | status = SWITCH_STATUS_FALSE; 108 | } 109 | 110 | if(recv_buffer) { 111 | if(switch_buffer_inuse(recv_buffer) > 0) { 112 | switch_buffer_write(recv_buffer, "\0", 1); 113 | } 114 | } 115 | 116 | if(curl_handle) { 117 | switch_curl_easy_cleanup(curl_handle); 118 | } 119 | if(form) { 120 | curl_mime_free(form); 121 | } 122 | if(headers) { 123 | switch_curl_slist_free_all(headers); 124 | } 125 | 126 | return status; 127 | } 128 | -------------------------------------------------------------------------------- /sources/mod_openai_asr.c: -------------------------------------------------------------------------------- 1 | /* 2 | * FreeSWITCH Modular Media Switching Software Library / Soft-Switch Application 3 | * Copyright (C) 2005-2014, Anthony Minessale II 4 | * 5 | * Version: MPL 1.1 6 | * 7 | * The contents of this file are subject to the Mozilla Public License Version 8 | * 1.1 (the "License"); you may not use this file except in compliance with 9 | * the License. You may obtain a copy of the License at 10 | * http://www.mozilla.org/MPL/ 11 | * 12 | * Software distributed under the License is distributed on an "AS IS" basis, 13 | * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License 14 | * for the specific language governing rights and limitations under the 15 | * License. 16 | * 17 | * Module Contributor(s): 18 | * Konstantin Alexandrin 19 | * 20 | * 21 | * OpenAI Speech-To-Text service for the Freeswitch. 22 | * https://platform.openai.com/docs/guides/speech-to-text 23 | * 24 | * Development respository: 25 | * https://github.com/akscf/mod_openai_asr 26 | * 27 | */ 28 | #include "mod_openai_asr.h" 29 | 30 | globals_t globals; 31 | 32 | SWITCH_MODULE_LOAD_FUNCTION(mod_openai_asr_load); 33 | SWITCH_MODULE_SHUTDOWN_FUNCTION(mod_openai_asr_shutdown); 34 | SWITCH_MODULE_DEFINITION(mod_openai_asr, mod_openai_asr_load, mod_openai_asr_shutdown, NULL); 35 | 36 | static void *SWITCH_THREAD_FUNC transcribe_thread(switch_thread_t *thread, void *obj) { 37 | volatile asr_ctx_t *_ref = (asr_ctx_t *)obj; 38 | asr_ctx_t *asr_ctx = (asr_ctx_t *)_ref; 39 | switch_status_t status = SWITCH_STATUS_FALSE; 40 | switch_buffer_t *chunk_buffer = NULL; 41 | switch_buffer_t *curl_recv_buffer = NULL; 42 | switch_memory_pool_t *pool = NULL; 43 | cJSON *json = NULL; 44 | time_t sentence_timeout = 0; 45 | uint32_t schunks = 0; 46 | uint32_t chunk_buffer_size = 0; 47 | uint8_t fl_cbuff_overflow = SWITCH_FALSE; 48 | void *pop = NULL; 49 | 50 | switch_mutex_lock(asr_ctx->mutex); 51 | asr_ctx->refs++; 52 | switch_mutex_unlock(asr_ctx->mutex); 53 | 54 | if(switch_core_new_memory_pool(&pool) != SWITCH_STATUS_SUCCESS) { 55 | switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_CRIT, "switch_core_new_memory_pool()\n"); 56 | goto out; 57 | } 58 | if(switch_buffer_create_dynamic(&curl_recv_buffer, 1024, 2048, 8192) != SWITCH_STATUS_SUCCESS) { 59 | switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "switch_buffer_create_dynamic()\n"); 60 | goto out; 61 | } 62 | 63 | while(SWITCH_TRUE) { 64 | if(globals.fl_shutdown || asr_ctx->fl_destroyed) { 65 | break; 66 | } 67 | if(chunk_buffer_size == 0) { 68 | switch_mutex_lock(asr_ctx->mutex); 69 | chunk_buffer_size = asr_ctx->chunk_buffer_size; 70 | switch_mutex_unlock(asr_ctx->mutex); 71 | 72 | if(chunk_buffer_size > 0) { 73 | if(switch_buffer_create(pool, &chunk_buffer, chunk_buffer_size) != SWITCH_STATUS_SUCCESS) { 74 | switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_CRIT, "switch_buffer_create()\n"); 75 | break; 76 | } 77 | switch_buffer_zero(chunk_buffer); 78 | } 79 | goto timer_next; 80 | } 81 | 82 | fl_cbuff_overflow = SWITCH_FALSE; 83 | while(switch_queue_trypop(asr_ctx->q_audio, &pop) == SWITCH_STATUS_SUCCESS) { 84 | xdata_buffer_t *audio_buffer = (xdata_buffer_t *)pop; 85 | if(globals.fl_shutdown || asr_ctx->fl_destroyed ) { 86 | xdata_buffer_free(&audio_buffer); 87 | break; 88 | } 89 | if(audio_buffer && audio_buffer->len) { 90 | if(switch_buffer_write(chunk_buffer, audio_buffer->data, audio_buffer->len) >= chunk_buffer_size) { 91 | fl_cbuff_overflow = SWITCH_TRUE; 92 | break; 93 | } 94 | schunks++; 95 | } 96 | xdata_buffer_free(&audio_buffer); 97 | } 98 | 99 | if(fl_cbuff_overflow) { 100 | sentence_timeout = 1; 101 | } else { 102 | if(schunks && asr_ctx->vad_state == SWITCH_VAD_STATE_STOP_TALKING) { 103 | if(!sentence_timeout) { 104 | sentence_timeout = asr_ctx->silence_sec + switch_epoch_time_now(NULL); 105 | } 106 | } 107 | if(sentence_timeout && (asr_ctx->vad_state == SWITCH_VAD_STATE_START_TALKING || asr_ctx->vad_state == SWITCH_VAD_STATE_TALKING)) { 108 | sentence_timeout = 0; 109 | } 110 | } 111 | 112 | if(sentence_timeout && sentence_timeout <= switch_epoch_time_now(NULL)) { 113 | const void *chunk_buffer_ptr = NULL; 114 | const void *http_response_ptr = NULL; 115 | uint32_t buf_len = 0, http_recv_len = 0, stt_failed = 0; 116 | char *chunk_fname = NULL; 117 | 118 | if((buf_len = switch_buffer_peek_zerocopy(chunk_buffer, &chunk_buffer_ptr)) > 0 && chunk_buffer_ptr) { 119 | chunk_fname = chunk_write((switch_byte_t *)chunk_buffer_ptr, buf_len, asr_ctx->channels, asr_ctx->samplerate, globals.opt_encoding); 120 | } 121 | if(chunk_fname) { 122 | for(uint32_t rqtry = 0; rqtry < asr_ctx->retries_on_error; rqtry++) { 123 | switch_buffer_zero(curl_recv_buffer); 124 | status = curl_perform(curl_recv_buffer, asr_ctx->opt_api_key, asr_ctx->opt_model, chunk_fname, &globals); 125 | if(status == SWITCH_STATUS_SUCCESS || globals.fl_shutdown || asr_ctx->fl_destroyed) { break; } 126 | switch_yield(100000); 127 | } 128 | 129 | http_recv_len = switch_buffer_peek_zerocopy(curl_recv_buffer, &http_response_ptr); 130 | if(status == SWITCH_STATUS_SUCCESS) { 131 | if(http_response_ptr && http_recv_len) { 132 | char *txt = parse_response((char *)http_response_ptr, NULL); 133 | #ifdef MOD_OPENAI_ASR_DEBUG 134 | switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "Service response [%s]\n", (char *)http_response_ptr); 135 | switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "Text [%s]\n", txt ? txt : "null"); 136 | #endif 137 | if(!txt) txt = strdup(""); 138 | if(switch_queue_trypush(asr_ctx->q_text, txt) == SWITCH_STATUS_SUCCESS) { 139 | switch_mutex_lock(asr_ctx->mutex); 140 | asr_ctx->transcription_results++; 141 | switch_mutex_unlock(asr_ctx->mutex); 142 | } else { 143 | switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Queue is full!\n"); 144 | switch_safe_free(txt); 145 | } 146 | } else { 147 | stt_failed = 1; 148 | switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Empty service response!\n"); 149 | } 150 | } else { 151 | stt_failed = 1; 152 | switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Unable to perform request!\n"); 153 | } 154 | 155 | if(stt_failed) { 156 | char *txt = strdup("[transcription failed]"); 157 | if(switch_queue_trypush(asr_ctx->q_text, txt) == SWITCH_STATUS_SUCCESS) { 158 | switch_mutex_lock(asr_ctx->mutex); 159 | asr_ctx->transcription_results++; 160 | switch_mutex_unlock(asr_ctx->mutex); 161 | } else { 162 | switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Queue is full!\n"); 163 | switch_safe_free(txt); 164 | } 165 | } 166 | 167 | schunks = 0; 168 | sentence_timeout = 0; 169 | unlink(chunk_fname); 170 | switch_safe_free(chunk_fname); 171 | switch_buffer_zero(chunk_buffer); 172 | } 173 | } 174 | 175 | timer_next: 176 | switch_yield(10000); 177 | } 178 | 179 | out: 180 | if(json != NULL) { 181 | cJSON_Delete(json); 182 | } 183 | if(curl_recv_buffer) { 184 | switch_buffer_destroy(&curl_recv_buffer); 185 | } 186 | if(chunk_buffer) { 187 | switch_buffer_destroy(&chunk_buffer); 188 | } 189 | if(pool) { 190 | switch_core_destroy_memory_pool(&pool); 191 | } 192 | 193 | switch_mutex_lock(asr_ctx->mutex); 194 | if(asr_ctx->refs > 0) asr_ctx->refs--; 195 | switch_mutex_unlock(asr_ctx->mutex); 196 | 197 | switch_mutex_lock(globals.mutex); 198 | if(globals.active_threads) globals.active_threads--; 199 | switch_mutex_unlock(globals.mutex); 200 | 201 | return NULL; 202 | } 203 | 204 | // --------------------------------------------------------------------------------------------------------------------------------------------- 205 | static switch_status_t asr_open(switch_asr_handle_t *ah, const char *codec, int samplerate, const char *dest, switch_asr_flag_t *flags) { 206 | switch_status_t status = SWITCH_STATUS_SUCCESS; 207 | switch_threadattr_t *attr = NULL; 208 | switch_thread_t *thread = NULL; 209 | asr_ctx_t *asr_ctx = NULL; 210 | 211 | if(strcmp(codec, "L16") !=0) { 212 | switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Unsupported encoding (%s)\n", codec); 213 | switch_goto_status(SWITCH_STATUS_FALSE, out); 214 | } 215 | 216 | if((asr_ctx = switch_core_alloc(ah->memory_pool, sizeof(asr_ctx_t))) == NULL) { 217 | switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "switch_core_alloc()\n"); 218 | switch_goto_status(SWITCH_STATUS_GENERR, out); 219 | } 220 | 221 | asr_ctx->channels = 1; 222 | asr_ctx->chunk_buffer_size = 0; 223 | asr_ctx->samplerate = samplerate; 224 | asr_ctx->silence_sec = globals.speech_silence_sec; 225 | asr_ctx->retries_on_error = globals.retries_on_error; 226 | 227 | asr_ctx->opt_model = globals.opt_model; 228 | asr_ctx->opt_api_key = globals.api_key; 229 | 230 | if((status = switch_mutex_init(&asr_ctx->mutex, SWITCH_MUTEX_NESTED, ah->memory_pool)) != SWITCH_STATUS_SUCCESS) { 231 | switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "switch_mutex_init()\n"); 232 | switch_goto_status(SWITCH_STATUS_GENERR, out); 233 | } 234 | 235 | switch_queue_create(&asr_ctx->q_audio, QUEUE_SIZE, ah->memory_pool); 236 | switch_queue_create(&asr_ctx->q_text, QUEUE_SIZE, ah->memory_pool); 237 | 238 | asr_ctx->vad_buffer = NULL; 239 | asr_ctx->frame_len = 0; 240 | asr_ctx->vad_buffer_size = 0; 241 | asr_ctx->vad_stored_frames = 0; 242 | asr_ctx->fl_vad_first_cycle = SWITCH_TRUE; 243 | 244 | if((asr_ctx->vad = switch_vad_init(asr_ctx->samplerate, asr_ctx->channels)) == NULL) { 245 | switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "switch_vad_init()\n"); 246 | switch_goto_status(SWITCH_STATUS_GENERR, out); 247 | } 248 | switch_vad_set_mode(asr_ctx->vad, -1); 249 | switch_vad_set_param(asr_ctx->vad, "debug", globals.fl_vad_debug); 250 | if(globals.vad_silence_ms > 0) { switch_vad_set_param(asr_ctx->vad, "silence_ms", globals.vad_silence_ms); } 251 | if(globals.vad_voice_ms > 0) { switch_vad_set_param(asr_ctx->vad, "voice_ms", globals.vad_voice_ms); } 252 | if(globals.vad_threshold > 0) { switch_vad_set_param(asr_ctx->vad, "thresh", globals.vad_threshold); } 253 | 254 | ah->private_info = asr_ctx; 255 | 256 | switch_mutex_lock(globals.mutex); 257 | globals.active_threads++; 258 | switch_mutex_unlock(globals.mutex); 259 | 260 | switch_threadattr_create(&attr, ah->memory_pool); 261 | switch_threadattr_detach_set(attr, 1); 262 | switch_threadattr_stacksize_set(attr, SWITCH_THREAD_STACKSIZE); 263 | switch_thread_create(&thread, attr, transcribe_thread, asr_ctx, ah->memory_pool); 264 | 265 | out: 266 | return status; 267 | } 268 | 269 | static switch_status_t asr_close(switch_asr_handle_t *ah, switch_asr_flag_t *flags) { 270 | asr_ctx_t *asr_ctx = (asr_ctx_t *)ah->private_info; 271 | uint8_t fl_wloop = SWITCH_TRUE; 272 | 273 | assert(asr_ctx != NULL); 274 | 275 | asr_ctx->fl_abort = SWITCH_TRUE; 276 | asr_ctx->fl_destroyed = SWITCH_TRUE; 277 | 278 | switch_mutex_lock(asr_ctx->mutex); 279 | fl_wloop = (asr_ctx->refs != 0); 280 | switch_mutex_unlock(asr_ctx->mutex); 281 | 282 | if(fl_wloop) { 283 | switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "Waiting for unlock (refs=%d)...\n", asr_ctx->refs); 284 | while(fl_wloop) { 285 | switch_mutex_lock(asr_ctx->mutex); 286 | fl_wloop = (asr_ctx->refs != 0); 287 | switch_mutex_unlock(asr_ctx->mutex); 288 | switch_yield(100000); 289 | } 290 | } 291 | 292 | if(asr_ctx->q_audio) { 293 | xdata_buffer_queue_clean(asr_ctx->q_audio); 294 | switch_queue_term(asr_ctx->q_audio); 295 | } 296 | if(asr_ctx->q_text) { 297 | text_queue_clean(asr_ctx->q_text); 298 | switch_queue_term(asr_ctx->q_text); 299 | } 300 | if(asr_ctx->vad) { 301 | switch_vad_destroy(&asr_ctx->vad); 302 | } 303 | if(asr_ctx->vad_buffer) { 304 | switch_buffer_destroy(&asr_ctx->vad_buffer); 305 | } 306 | 307 | switch_set_flag(ah, SWITCH_ASR_FLAG_CLOSED); 308 | 309 | return SWITCH_STATUS_SUCCESS; 310 | } 311 | 312 | static switch_status_t asr_feed(switch_asr_handle_t *ah, void *data, unsigned int data_len, switch_asr_flag_t *flags) { 313 | asr_ctx_t *asr_ctx = (asr_ctx_t *) ah->private_info; 314 | switch_vad_state_t vad_state = 0; 315 | uint8_t fl_has_audio = SWITCH_FALSE; 316 | 317 | assert(asr_ctx != NULL); 318 | 319 | if(switch_test_flag(ah, SWITCH_ASR_FLAG_CLOSED)) { 320 | return SWITCH_STATUS_BREAK; 321 | } 322 | if(asr_ctx->fl_destroyed || asr_ctx->fl_abort) { 323 | return SWITCH_STATUS_BREAK; 324 | } 325 | if(asr_ctx->fl_pause) { 326 | return SWITCH_STATUS_SUCCESS; 327 | } 328 | if(!data || !data_len) { 329 | return SWITCH_STATUS_BREAK; 330 | } 331 | 332 | if(data_len > 0 && asr_ctx->frame_len == 0) { 333 | switch_mutex_lock(asr_ctx->mutex); 334 | asr_ctx->frame_len = data_len; 335 | asr_ctx->vad_buffer_size = asr_ctx->frame_len * VAD_STORE_FRAMES; 336 | asr_ctx->chunk_buffer_size = asr_ctx->samplerate * globals.speech_max_sec; 337 | switch_mutex_unlock(asr_ctx->mutex); 338 | 339 | if(switch_buffer_create(ah->memory_pool, &asr_ctx->vad_buffer, asr_ctx->vad_buffer_size) != SWITCH_STATUS_SUCCESS) { 340 | asr_ctx->vad_buffer_size = 0; 341 | switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "switch_buffer_create()\n"); 342 | } 343 | } 344 | 345 | if(asr_ctx->vad_buffer_size) { 346 | if(asr_ctx->vad_state == SWITCH_VAD_STATE_STOP_TALKING || (asr_ctx->vad_state == vad_state && vad_state == SWITCH_VAD_STATE_NONE)) { 347 | if(data_len <= asr_ctx->frame_len) { 348 | if(asr_ctx->vad_stored_frames >= VAD_STORE_FRAMES) { 349 | switch_buffer_zero(asr_ctx->vad_buffer); 350 | asr_ctx->vad_stored_frames = 0; 351 | asr_ctx->fl_vad_first_cycle = SWITCH_FALSE; 352 | } 353 | switch_buffer_write(asr_ctx->vad_buffer, data, MIN(asr_ctx->frame_len, data_len)); 354 | asr_ctx->vad_stored_frames++; 355 | } 356 | } 357 | 358 | vad_state = switch_vad_process(asr_ctx->vad, (int16_t *)data, (data_len / sizeof(int16_t))); 359 | if(vad_state == SWITCH_VAD_STATE_START_TALKING) { 360 | asr_ctx->vad_state = vad_state; 361 | fl_has_audio = SWITCH_TRUE; 362 | } else if (vad_state == SWITCH_VAD_STATE_STOP_TALKING) { 363 | asr_ctx->vad_state = vad_state; 364 | fl_has_audio = SWITCH_FALSE; 365 | switch_vad_reset(asr_ctx->vad); 366 | } else if (vad_state == SWITCH_VAD_STATE_TALKING) { 367 | asr_ctx->vad_state = vad_state; 368 | fl_has_audio = SWITCH_TRUE; 369 | } 370 | } else { 371 | fl_has_audio = SWITCH_TRUE; 372 | } 373 | 374 | if(fl_has_audio) { 375 | if(vad_state == SWITCH_VAD_STATE_START_TALKING && asr_ctx->vad_stored_frames > 0) { 376 | xdata_buffer_t *tau_buf = NULL; 377 | const void *ptr = NULL; 378 | switch_size_t vblen = 0; 379 | uint32_t rframes = 0, rlen = 0; 380 | int ofs = 0; 381 | 382 | if((vblen = switch_buffer_peek_zerocopy(asr_ctx->vad_buffer, &ptr)) && ptr && vblen > 0) { 383 | rframes = (asr_ctx->vad_stored_frames >= VAD_RECOVERY_FRAMES ? VAD_RECOVERY_FRAMES : (asr_ctx->fl_vad_first_cycle ? asr_ctx->vad_stored_frames : VAD_RECOVERY_FRAMES)); 384 | rlen = (rframes * asr_ctx->frame_len); 385 | ofs = (vblen - rlen); 386 | 387 | if(ofs < 0) { 388 | uint32_t hdr_sz = -ofs; 389 | uint32_t hdr_ofs = (asr_ctx->vad_buffer_size - hdr_sz); 390 | 391 | switch_zmalloc(tau_buf, sizeof(xdata_buffer_t)); 392 | 393 | tau_buf->len = (hdr_sz + vblen + data_len); 394 | switch_malloc(tau_buf->data, tau_buf->len); 395 | 396 | memcpy(tau_buf->data, (void *)(ptr + hdr_ofs), hdr_sz); 397 | memcpy(tau_buf->data + hdr_sz , (void *)(ptr + 0), vblen); 398 | memcpy(tau_buf->data + rlen, data, data_len); 399 | 400 | if(switch_queue_trypush(asr_ctx->q_audio, tau_buf) != SWITCH_STATUS_SUCCESS) { 401 | xdata_buffer_free(&tau_buf); 402 | } 403 | 404 | switch_buffer_zero(asr_ctx->vad_buffer); 405 | asr_ctx->vad_stored_frames = 0; 406 | } else { 407 | switch_zmalloc(tau_buf, sizeof(xdata_buffer_t)); 408 | 409 | tau_buf->len = (rlen + data_len); 410 | switch_malloc(tau_buf->data, tau_buf->len); 411 | 412 | memcpy(tau_buf->data, (void *)(ptr + ofs), rlen); 413 | memcpy(tau_buf->data + rlen, data, data_len); 414 | 415 | if(switch_queue_trypush(asr_ctx->q_audio, tau_buf) != SWITCH_STATUS_SUCCESS) { 416 | xdata_buffer_free(&tau_buf); 417 | } 418 | 419 | switch_buffer_zero(asr_ctx->vad_buffer); 420 | asr_ctx->vad_stored_frames = 0; 421 | } 422 | } 423 | } else { 424 | xdata_buffer_push(asr_ctx->q_audio, data, data_len); 425 | } 426 | } 427 | 428 | return SWITCH_STATUS_SUCCESS; 429 | } 430 | 431 | static switch_status_t asr_check_results(switch_asr_handle_t *ah, switch_asr_flag_t *flags) { 432 | asr_ctx_t *asr_ctx = (asr_ctx_t *)ah->private_info; 433 | 434 | assert(asr_ctx != NULL); 435 | 436 | if(asr_ctx->fl_pause) { 437 | return SWITCH_STATUS_FALSE; 438 | } 439 | 440 | return (asr_ctx->transcription_results > 0 ? SWITCH_STATUS_SUCCESS : SWITCH_STATUS_FALSE); 441 | } 442 | 443 | static switch_status_t asr_get_results(switch_asr_handle_t *ah, char **xmlstr, switch_asr_flag_t *flags) { 444 | asr_ctx_t *asr_ctx = (asr_ctx_t *)ah->private_info; 445 | switch_status_t status = SWITCH_STATUS_FALSE; 446 | void *pop = NULL; 447 | 448 | assert(asr_ctx != NULL); 449 | 450 | if(switch_queue_trypop(asr_ctx->q_text, &pop) == SWITCH_STATUS_SUCCESS) { 451 | if(pop) { 452 | *xmlstr = (char *)pop; 453 | status = SWITCH_STATUS_SUCCESS; 454 | 455 | switch_mutex_lock(asr_ctx->mutex); 456 | if(asr_ctx->transcription_results > 0) asr_ctx->transcription_results--; 457 | switch_mutex_unlock(asr_ctx->mutex); 458 | } 459 | } 460 | 461 | return status; 462 | } 463 | 464 | static switch_status_t asr_start_input_timers(switch_asr_handle_t *ah) { 465 | asr_ctx_t *asr_ctx = (asr_ctx_t *)ah->private_info; 466 | 467 | assert(asr_ctx != NULL); 468 | 469 | asr_ctx->fl_start_timers = SWITCH_TRUE; 470 | 471 | return SWITCH_STATUS_SUCCESS; 472 | } 473 | 474 | static switch_status_t asr_pause(switch_asr_handle_t *ah) { 475 | asr_ctx_t *asr_ctx = (asr_ctx_t *)ah->private_info; 476 | 477 | assert(asr_ctx != NULL); 478 | 479 | asr_ctx->fl_pause = SWITCH_TRUE; 480 | 481 | return SWITCH_STATUS_SUCCESS; 482 | } 483 | 484 | static switch_status_t asr_resume(switch_asr_handle_t *ah) { 485 | asr_ctx_t *asr_ctx = (asr_ctx_t *)ah->private_info; 486 | 487 | assert(asr_ctx != NULL); 488 | 489 | asr_ctx->fl_pause = SWITCH_FALSE; 490 | 491 | return SWITCH_STATUS_SUCCESS; 492 | } 493 | 494 | static void asr_text_param(switch_asr_handle_t *ah, char *param, const char *val) { 495 | asr_ctx_t *asr_ctx = (asr_ctx_t *)ah->private_info; 496 | 497 | assert(asr_ctx != NULL); 498 | 499 | if(strcasecmp(param, "lang") == 0) { 500 | if(val) asr_ctx->opt_lang = switch_core_strdup(ah->memory_pool, val); 501 | } else if(strcasecmp(param, "model") == 0) { 502 | if(val) asr_ctx->opt_model = switch_core_strdup(ah->memory_pool, val); 503 | } else if(strcasecmp(param, "key") == 0) { 504 | if(val) asr_ctx->opt_api_key = switch_core_strdup(ah->memory_pool, val); 505 | } else if(strcasecmp(param, "silence") == 0) { 506 | if(val) asr_ctx->silence_sec = atoi(val); 507 | } 508 | } 509 | 510 | static void asr_numeric_param(switch_asr_handle_t *ah, char *param, int val) { 511 | } 512 | 513 | static void asr_float_param(switch_asr_handle_t *ah, char *param, double val) { 514 | } 515 | 516 | static switch_status_t asr_load_grammar(switch_asr_handle_t *ah, const char *grammar, const char *name) { 517 | return SWITCH_STATUS_SUCCESS; 518 | } 519 | 520 | static switch_status_t asr_unload_grammar(switch_asr_handle_t *ah, const char *name) { 521 | return SWITCH_STATUS_SUCCESS; 522 | } 523 | 524 | // --------------------------------------------------------------------------------------------------------------------------------------------- 525 | #define CMD_SYNTAX "path_to/filename.(mp3|wav) [key=altkey model=altModel]\n" 526 | SWITCH_STANDARD_API(openai_asr_cmd_handler) { 527 | switch_status_t status = 0; 528 | char *mycmd = NULL, *argv[10] = { 0 }; int argc = 0; 529 | switch_buffer_t *recv_buf = NULL; 530 | const void *response_ptr = NULL; 531 | char *opt_api_key = globals.api_key; 532 | char *opt_model = globals.opt_model; 533 | char *file_name = NULL, *file_ext = NULL; 534 | uint32_t recv_len = 0; 535 | 536 | if (!zstr(cmd)) { 537 | mycmd = strdup(cmd); 538 | switch_assert(mycmd); 539 | argc = switch_separate_string(mycmd, ' ', argv, (sizeof(argv) / sizeof(argv[0]))); 540 | } 541 | if(argc == 0) { 542 | goto usage; 543 | } 544 | 545 | file_name = argv[0]; 546 | if(switch_file_exists(file_name, NULL) != SWITCH_STATUS_SUCCESS) { 547 | stream->write_function(stream, "-ERR: file not found (%s)\n", file_name); 548 | goto out; 549 | } 550 | 551 | file_ext = strrchr(file_name, '.'); 552 | if(!file_ext) { 553 | stream->write_function(stream, "-ERR: unsupported file encoding (null)\n"); 554 | goto out; 555 | } 556 | 557 | file_ext++; 558 | if(strcasecmp("mp3", file_ext) && strcasecmp("wav", file_ext)) { 559 | stream->write_function(stream, "-ERR: unsupported file encoding (%s)\n", file_ext); 560 | goto out; 561 | } 562 | 563 | if(switch_buffer_create_dynamic(&recv_buf, 1024, 2048, 8192) != SWITCH_STATUS_SUCCESS) { 564 | stream->write_function(stream, "-ERR: switch_buffer_create_dynamic()\n"); 565 | goto out; 566 | } 567 | 568 | if(argc > 1) { 569 | for(int i = 1; i < argc; i++) { 570 | char *kvp[2] = { 0 }; 571 | if(switch_separate_string(argv[i], '=', kvp, 2) >= 2) { 572 | if(strcasecmp(kvp[0], "key") == 0) { 573 | if(kvp[1]) opt_api_key = kvp[1]; 574 | } else if(strcasecmp(kvp[0], "model") == 0) { 575 | if(kvp[1]) opt_model = kvp[1]; 576 | } 577 | } 578 | } 579 | } 580 | 581 | status = curl_perform(recv_buf, opt_api_key, opt_model, file_name, &globals); 582 | 583 | recv_len = switch_buffer_peek_zerocopy(recv_buf, &response_ptr); 584 | if(status == SWITCH_STATUS_SUCCESS && response_ptr && recv_len) { 585 | char *txt = parse_response((char *)response_ptr, stream); 586 | if(txt) { 587 | stream->write_function(stream, "+OK: %s\n", txt); 588 | } 589 | switch_safe_free(txt); 590 | } else { 591 | stream->write_function(stream, "-ERR: unable to perform request\n"); 592 | } 593 | 594 | goto out; 595 | usage: 596 | stream->write_function(stream, "-ERR:\nUsage: %s\n", CMD_SYNTAX); 597 | 598 | out: 599 | if(recv_buf) { 600 | switch_buffer_destroy(&recv_buf); 601 | } 602 | 603 | switch_safe_free(mycmd); 604 | return SWITCH_STATUS_SUCCESS; 605 | } 606 | 607 | // --------------------------------------------------------------------------------------------------------------------------------------------- 608 | // main 609 | // --------------------------------------------------------------------------------------------------------------------------------------------- 610 | SWITCH_MODULE_LOAD_FUNCTION(mod_openai_asr_load) { 611 | switch_status_t status = SWITCH_STATUS_SUCCESS; 612 | switch_xml_t cfg, xml, settings, param; 613 | switch_api_interface_t *commands_interface; 614 | switch_asr_interface_t *asr_interface; 615 | 616 | memset(&globals, 0, sizeof(globals)); 617 | switch_mutex_init(&globals.mutex, SWITCH_MUTEX_NESTED, pool); 618 | 619 | if((xml = switch_xml_open_cfg(MOD_CONFIG_NAME, &cfg, NULL)) == NULL) { 620 | switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Unable to open configuration: %s\n", MOD_CONFIG_NAME); 621 | switch_goto_status(SWITCH_STATUS_GENERR, out); 622 | } 623 | 624 | if((settings = switch_xml_child(cfg, "settings"))) { 625 | for (param = switch_xml_child(settings, "param"); param; param = param->next) { 626 | char *var = (char *) switch_xml_attr_soft(param, "name"); 627 | char *val = (char *) switch_xml_attr_soft(param, "value"); 628 | 629 | if(!strcasecmp(var, "vad-silence-ms")) { 630 | if(val) globals.vad_silence_ms = atoi (val); 631 | } else if(!strcasecmp(var, "vad-voice-ms")) { 632 | if(val) globals.vad_voice_ms = atoi (val); 633 | } else if(!strcasecmp(var, "vad-threshold")) { 634 | if(val) globals.vad_threshold = atoi (val); 635 | } else if(!strcasecmp(var, "vad-debug")) { 636 | if(val) globals.fl_vad_debug = switch_true(val); 637 | } else if(!strcasecmp(var, "api-key")) { 638 | if(val) globals.api_key = switch_core_strdup(pool, val); 639 | } else if(!strcasecmp(var, "api-url")) { 640 | if(val) globals.api_url = switch_core_strdup(pool, val); 641 | } else if(!strcasecmp(var, "user-agent")) { 642 | if(val) globals.user_agent = switch_core_strdup(pool, val); 643 | } else if(!strcasecmp(var, "proxy")) { 644 | if(val) globals.proxy = switch_core_strdup(pool, val); 645 | } else if(!strcasecmp(var, "proxy-credentials")) { 646 | if(val) globals.proxy_credentials = switch_core_strdup(pool, val); 647 | } else if(!strcasecmp(var, "encoding")) { 648 | if(val) globals.opt_encoding = switch_core_strdup(pool, val); 649 | } else if(!strcasecmp(var, "model")) { 650 | if(val) globals.opt_model= switch_core_strdup(pool, val); 651 | } else if(!strcasecmp(var, "speech-max-sec")) { 652 | if(val) globals.speech_max_sec = atoi(val); 653 | } else if(!strcasecmp(var, "speech-silence-sec")) { 654 | if(val) globals.speech_silence_sec = atoi(val); 655 | } else if(!strcasecmp(var, "request-timeout")) { 656 | if(val) globals.request_timeout = atoi(val); 657 | } else if(!strcasecmp(var, "connect-timeout")) { 658 | if(val) globals.connect_timeout = atoi(val); 659 | } else if(!strcasecmp(var, "log-http-errors")) { 660 | if(val) globals.fl_log_http_errors = switch_true(val); 661 | } else if(!strcasecmp(var, "retries-on-error")) { 662 | if(val) globals.retries_on_error = atoi(val); 663 | } 664 | } 665 | } 666 | 667 | if(!globals.api_url) { 668 | switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Missing required parameter: api-url\n"); 669 | switch_goto_status(SWITCH_STATUS_GENERR, out); 670 | } 671 | 672 | globals.opt_encoding = globals.opt_encoding ? globals.opt_encoding : "wav"; 673 | globals.speech_max_sec = !globals.speech_max_sec ? 35 : globals.speech_max_sec; 674 | globals.speech_silence_sec = !globals.speech_silence_sec ? 3 : globals.speech_silence_sec; 675 | globals.retries_on_error = !globals.retries_on_error ? 1 : globals.retries_on_error; 676 | 677 | globals.tmp_path = switch_core_sprintf(pool, "%s%sopenai-asr-cache", SWITCH_GLOBAL_dirs.temp_dir, SWITCH_PATH_SEPARATOR); 678 | if(switch_directory_exists(globals.tmp_path, NULL) != SWITCH_STATUS_SUCCESS) { 679 | switch_dir_make(globals.tmp_path, SWITCH_FPROT_OS_DEFAULT, NULL); 680 | } 681 | 682 | *module_interface = switch_loadable_module_create_module_interface(pool, modname); 683 | SWITCH_ADD_API(commands_interface, "openai_asr_transcript", "OpenAI speech-to-text", openai_asr_cmd_handler, CMD_SYNTAX); 684 | 685 | asr_interface = switch_loadable_module_create_interface(*module_interface, SWITCH_ASR_INTERFACE); 686 | asr_interface->interface_name = "openai"; 687 | asr_interface->asr_open = asr_open; 688 | asr_interface->asr_close = asr_close; 689 | asr_interface->asr_feed = asr_feed; 690 | asr_interface->asr_pause = asr_pause; 691 | asr_interface->asr_resume = asr_resume; 692 | asr_interface->asr_check_results = asr_check_results; 693 | asr_interface->asr_get_results = asr_get_results; 694 | asr_interface->asr_start_input_timers = asr_start_input_timers; 695 | asr_interface->asr_text_param = asr_text_param; 696 | asr_interface->asr_numeric_param = asr_numeric_param; 697 | asr_interface->asr_float_param = asr_float_param; 698 | asr_interface->asr_load_grammar = asr_load_grammar; 699 | asr_interface->asr_unload_grammar = asr_unload_grammar; 700 | 701 | switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_NOTICE, "OpenAI-ASR (%s)\n", MOD_VERSION); 702 | out: 703 | if(xml) { 704 | switch_xml_free(xml); 705 | } 706 | return status; 707 | } 708 | 709 | SWITCH_MODULE_SHUTDOWN_FUNCTION(mod_openai_asr_shutdown) { 710 | uint8_t fl_wloop = SWITCH_TRUE; 711 | 712 | globals.fl_shutdown = SWITCH_TRUE; 713 | 714 | switch_mutex_lock(globals.mutex); 715 | fl_wloop = (globals.active_threads > 0); 716 | switch_mutex_unlock(globals.mutex); 717 | 718 | if(fl_wloop) { 719 | switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "Waiting for termination (%d) threads...\n", globals.active_threads); 720 | while(fl_wloop) { 721 | switch_mutex_lock(globals.mutex); 722 | fl_wloop = (globals.active_threads > 0); 723 | switch_mutex_unlock(globals.mutex); 724 | switch_yield(100000); 725 | } 726 | } 727 | 728 | return SWITCH_STATUS_SUCCESS; 729 | } 730 | -------------------------------------------------------------------------------- /sources/mod_openai_asr.h: -------------------------------------------------------------------------------- 1 | /* 2 | * FreeSWITCH Modular Media Switching Software Library / Soft-Switch Application 3 | * Copyright (C) 2005-2014, Anthony Minessale II 4 | * 5 | * Version: MPL 1.1 6 | * 7 | * The contents of this file are subject to the Mozilla Public License Version 8 | * 1.1 (the "License"); you may not use this file except in compliance with 9 | * the License. You may obtain a copy of the License at 10 | * http://www.mozilla.org/MPL/ 11 | * 12 | * Software distributed under the License is distributed on an "AS IS" basis, 13 | * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License 14 | * for the specific language governing rights and limitations under the 15 | * License. 16 | * 17 | * Module Contributor(s): 18 | * Konstantin Alexandrin 19 | * 20 | * 21 | */ 22 | #ifndef MOD_OPENAI_ASR_H 23 | #define MOD_OPENAI_ASR_H 24 | 25 | #include 26 | #include 27 | #include 28 | 29 | #define MIN(a,b) (((a)<(b))?(a):(b)) 30 | #define MAX(a,b) (((a)>(b))?(a):(b)) 31 | 32 | #define MOD_CONFIG_NAME "openai_asr.conf" 33 | #define MOD_VERSION "1.0.4" 34 | #define QUEUE_SIZE 128 35 | #define VAD_STORE_FRAMES 64 36 | #define VAD_RECOVERY_FRAMES 20 37 | 38 | //#define MOD_OPENAI_ASR_DEBUG 39 | 40 | typedef struct { 41 | switch_mutex_t *mutex; 42 | uint32_t active_threads; 43 | uint32_t speech_max_sec; 44 | uint32_t speech_silence_sec; 45 | uint32_t vad_silence_ms; 46 | uint32_t vad_voice_ms; 47 | uint32_t vad_threshold; 48 | uint32_t request_timeout; // secondss 49 | uint32_t connect_timeout; // seconds 50 | uint32_t retries_on_error; 51 | uint8_t fl_vad_debug; 52 | uint8_t fl_shutdown; 53 | uint8_t fl_log_http_errors; 54 | char *tmp_path; 55 | char *api_key; 56 | char *api_url; 57 | char *user_agent; 58 | char *proxy; 59 | char *proxy_credentials; 60 | char *opt_encoding; 61 | char *opt_model; 62 | } globals_t; 63 | 64 | typedef struct { 65 | switch_memory_pool_t *pool; 66 | switch_vad_t *vad; 67 | switch_buffer_t *vad_buffer; 68 | switch_mutex_t *mutex; 69 | switch_queue_t *q_audio; 70 | switch_queue_t *q_text; 71 | switch_buffer_t *curl_recv_buffer_ref; 72 | switch_vad_state_t vad_state; 73 | char *opt_lang; 74 | char *opt_model; 75 | char *opt_api_key; 76 | int32_t transcription_results; 77 | uint32_t retries_on_error; 78 | uint32_t vad_buffer_size; 79 | uint32_t vad_stored_frames; 80 | uint32_t chunk_buffer_size; 81 | uint32_t refs; 82 | uint32_t samplerate; 83 | uint32_t channels; 84 | uint32_t frame_len; 85 | uint32_t silence_sec; 86 | uint8_t fl_start_timers; 87 | uint8_t fl_pause; 88 | uint8_t fl_vad_first_cycle; 89 | uint8_t fl_destroyed; 90 | uint8_t fl_abort; 91 | } asr_ctx_t; 92 | 93 | typedef struct { 94 | uint32_t len; 95 | switch_byte_t *data; 96 | } xdata_buffer_t; 97 | 98 | /* curl.c */ 99 | switch_status_t curl_perform(switch_buffer_t *recv_buffer, char *api_key, char *model_name, char *filename, globals_t *globals); 100 | 101 | /* utils.c */ 102 | char *chunk_write(switch_byte_t *buf, uint32_t buf_len, uint32_t channels, uint32_t samplerate, const char *file_ext); 103 | switch_status_t xdata_buffer_push(switch_queue_t *queue, switch_byte_t *data, uint32_t data_len); 104 | switch_status_t xdata_buffer_alloc(xdata_buffer_t **out, switch_byte_t *data, uint32_t data_len); 105 | void xdata_buffer_free(xdata_buffer_t **buf); 106 | void xdata_buffer_queue_clean(switch_queue_t *queue); 107 | void text_queue_clean(switch_queue_t *queue); 108 | char *parse_response(char *data, switch_stream_handle_t *stream); 109 | 110 | #endif 111 | -------------------------------------------------------------------------------- /sources/utils.c: -------------------------------------------------------------------------------- 1 | /* 2 | * FreeSWITCH Modular Media Switching Software Library / Soft-Switch Application 3 | * Copyright (C) 2005-2014, Anthony Minessale II 4 | * 5 | * Version: MPL 1.1 6 | * 7 | * The contents of this file are subject to the Mozilla Public License Version 8 | * 1.1 (the "License"); you may not use this file except in compliance with 9 | * the License. You may obtain a copy of the License at 10 | * http://www.mozilla.org/MPL/ 11 | * 12 | * Software distributed under the License is distributed on an "AS IS" basis, 13 | * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License 14 | * for the specific language governing rights and limitations under the 15 | * License. 16 | * 17 | * Module Contributor(s): 18 | * Konstantin Alexandrin 19 | * 20 | * 21 | */ 22 | #include "mod_openai_asr.h" 23 | 24 | extern globals_t globals; 25 | 26 | switch_status_t xdata_buffer_alloc(xdata_buffer_t **out, switch_byte_t *data, uint32_t data_len) { 27 | xdata_buffer_t *buf = NULL; 28 | 29 | switch_zmalloc(buf, sizeof(xdata_buffer_t)); 30 | 31 | if(data_len) { 32 | switch_malloc(buf->data, data_len); 33 | switch_assert(buf->data); 34 | 35 | buf->len = data_len; 36 | memcpy(buf->data, data, data_len); 37 | } 38 | 39 | *out = buf; 40 | return SWITCH_STATUS_SUCCESS; 41 | } 42 | 43 | void xdata_buffer_free(xdata_buffer_t **buf) { 44 | if(buf && *buf) { 45 | switch_safe_free((*buf)->data); 46 | free(*buf); 47 | } 48 | } 49 | 50 | void xdata_buffer_queue_clean(switch_queue_t *queue) { 51 | xdata_buffer_t *data = NULL; 52 | 53 | if(!queue || !switch_queue_size(queue)) { 54 | return; 55 | } 56 | 57 | while(switch_queue_trypop(queue, (void *) &data) == SWITCH_STATUS_SUCCESS) { 58 | if(data) { xdata_buffer_free(&data); } 59 | } 60 | } 61 | 62 | switch_status_t xdata_buffer_push(switch_queue_t *queue, switch_byte_t *data, uint32_t data_len) { 63 | xdata_buffer_t *buff = NULL; 64 | 65 | if(xdata_buffer_alloc(&buff, data, data_len) == SWITCH_STATUS_SUCCESS) { 66 | if(switch_queue_trypush(queue, buff) == SWITCH_STATUS_SUCCESS) { 67 | return SWITCH_STATUS_SUCCESS; 68 | } 69 | xdata_buffer_free(&buff); 70 | } 71 | return SWITCH_STATUS_FALSE; 72 | } 73 | 74 | char *chunk_write(switch_byte_t *buf, uint32_t buf_len, uint32_t channels, uint32_t samplerate, const char *file_ext) { 75 | switch_status_t status = SWITCH_STATUS_FALSE; 76 | switch_size_t len = (buf_len / sizeof(int16_t)); 77 | switch_file_handle_t fh = { 0 }; 78 | char *file_name = NULL; 79 | char name_uuid[SWITCH_UUID_FORMATTED_LENGTH + 1] = { 0 }; 80 | int flags = (SWITCH_FILE_FLAG_WRITE | SWITCH_FILE_DATA_SHORT); 81 | 82 | switch_uuid_str((char *)name_uuid, sizeof(name_uuid)); 83 | file_name = switch_mprintf("%s%s%s.%s", globals.tmp_path, SWITCH_PATH_SEPARATOR, name_uuid, (file_ext == NULL ? "wav" : file_ext) ); 84 | 85 | if((status = switch_core_file_open(&fh, file_name, channels, samplerate, flags, NULL)) != SWITCH_STATUS_SUCCESS) { 86 | switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Unable to open file (%s)\n", file_name); 87 | goto out; 88 | } 89 | 90 | if((status = switch_core_file_write(&fh, buf, &len)) != SWITCH_STATUS_SUCCESS) { 91 | switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Unable to write (%s)\n", file_name); 92 | goto out; 93 | } 94 | 95 | switch_core_file_close(&fh); 96 | out: 97 | if(status != SWITCH_STATUS_SUCCESS) { 98 | if(file_name) { 99 | unlink(file_name); 100 | switch_safe_free(file_name); 101 | } 102 | return NULL; 103 | } 104 | 105 | return file_name; 106 | } 107 | 108 | void text_queue_clean(switch_queue_t *queue) { 109 | void *data = NULL; 110 | 111 | if(!queue || !switch_queue_size(queue)) { 112 | return; 113 | } 114 | 115 | while(switch_queue_trypop(queue, (void *)&data) == SWITCH_STATUS_SUCCESS) { 116 | switch_safe_free(data); 117 | } 118 | } 119 | 120 | char *parse_response(char *data, switch_stream_handle_t *stream) { 121 | char *result = NULL; 122 | cJSON *json = NULL; 123 | 124 | if(!data) { 125 | return NULL; 126 | } 127 | 128 | if(!(json = cJSON_Parse(data))) { 129 | switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Unable to parse json (%s)\n", data); 130 | if(stream) stream->write_function(stream, "-ERR: Unable to parse json (see log)\n"); 131 | } else { 132 | cJSON *jres = cJSON_GetObjectItem(json, "error"); 133 | if(jres) { 134 | switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Service returns error (%s)\n", data); 135 | if(stream) stream->write_function(stream, "-ERR: Service returns error (see log)\n"); 136 | } else { 137 | cJSON *jres = cJSON_GetObjectItem(json, "text"); 138 | if(jres) { 139 | result = strdup(jres->valuestring); 140 | } 141 | } 142 | } 143 | 144 | if(json) { 145 | cJSON_Delete(json); 146 | } 147 | 148 | return result; 149 | } 150 | --------------------------------------------------------------------------------