├── README.md
└── sources
├── Makefile.am
├── conf
├── autoload_configs
│ └── openai_asr.conf.xml
└── dialplan
│ └── dialplan.xml
├── curl.c
├── mod_openai_asr.c
├── mod_openai_asr.h
└── utils.c
/README.md:
--------------------------------------------------------------------------------
1 |
2 | OpenAI Speech-To-Text service for the Freeswitch.
3 | Features: vad, flexible adjustments.
4 | Available through: asr_interface, commnad api
5 |
6 | If you have troubles with building this module, pay attention to this issue and the comment,
7 | maybe you faced the same thing.
8 |
9 |
10 | ### Dialplan example
11 | ```XML
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 | ```
22 |
23 | ### mod_quickjs
24 | ```javascript
25 | session.ttsEngine= 'openai'; // requires: mod_openai_tts
26 | session.asrEngine= 'openai';
27 |
28 | var txt = session.sayAndDetectSpeech('Hello, how can I help you?', 10);
29 | consoleLog('info', "TEXT: " + txt);
30 | ```
31 |
32 | ### Command line
33 | ```
34 | freeswitch> openai_asr_transcript /tmp/test.[wav|mp3] [key=altKey mode=altModel]
35 | +OK: How old is the Brooklyn Bridge
36 | ```
37 |
--------------------------------------------------------------------------------
/sources/Makefile.am:
--------------------------------------------------------------------------------
1 |
2 | include $(top_srcdir)/build/modmake.rulesam
3 |
4 | MODNAME=mod_openai_asr
5 | mod_LTLIBRARIES = mod_openai_asr.la
6 | mod_openai_asr_la_SOURCES = mod_openai_asr.c utils.c curl.c
7 | mod_openai_asr_la_CFLAGS = $(AM_CFLAGS) -I. -Wno-pointer-arith
8 | mod_openai_asr_la_LIBADD = $(switch_builddir)/libfreeswitch.la
9 | mod_openai_asr_la_LDFLAGS = -avoid-version -module -no-undefined -shared
10 |
11 | $(am_mod_openai_asr_la_OBJECTS): mod_openai_asr.h
12 |
13 |
--------------------------------------------------------------------------------
/sources/conf/autoload_configs/openai_asr.conf.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
--------------------------------------------------------------------------------
/sources/conf/dialplan/dialplan.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
--------------------------------------------------------------------------------
/sources/curl.c:
--------------------------------------------------------------------------------
1 | /*
2 | * FreeSWITCH Modular Media Switching Software Library / Soft-Switch Application
3 | * Copyright (C) 2005-2014, Anthony Minessale II
4 | *
5 | * Version: MPL 1.1
6 | *
7 | * The contents of this file are subject to the Mozilla Public License Version
8 | * 1.1 (the "License"); you may not use this file except in compliance with
9 | * the License. You may obtain a copy of the License at
10 | * http://www.mozilla.org/MPL/
11 | *
12 | * Software distributed under the License is distributed on an "AS IS" basis,
13 | * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
14 | * for the specific language governing rights and limitations under the
15 | * License.
16 | *
17 | * Module Contributor(s):
18 | * Konstantin Alexandrin
19 | *
20 | *
21 | */
22 | #include "mod_openai_asr.h"
23 |
24 | static size_t curl_io_write_callback(char *buffer, size_t size, size_t nitems, void *user_data) {
25 | switch_buffer_t *recv_buffer = (switch_buffer_t *)user_data;
26 | size_t len = (size * nitems);
27 |
28 | if(len > 0 && recv_buffer) {
29 | switch_buffer_write(recv_buffer, buffer, len);
30 | }
31 |
32 | return len;
33 | }
34 |
35 | switch_status_t curl_perform(switch_buffer_t *recv_buffer, char *api_key, char *model_name, char *filename, globals_t *globals) {
36 | switch_status_t status = SWITCH_STATUS_SUCCESS;
37 | CURL *curl_handle = NULL;
38 | curl_mime *form = NULL;
39 | curl_mimepart *field1=NULL, *field2=NULL;
40 | switch_curl_slist_t *headers = NULL;
41 | switch_CURLcode curl_ret = 0;
42 | long http_resp = 0;
43 |
44 | curl_handle = switch_curl_easy_init();
45 | headers = switch_curl_slist_append(headers, "Content-Type: multipart/form-data");
46 |
47 | switch_curl_easy_setopt(curl_handle, CURLOPT_HTTPHEADER, headers);
48 | switch_curl_easy_setopt(curl_handle, CURLOPT_POST, 1);
49 | switch_curl_easy_setopt(curl_handle, CURLOPT_NOSIGNAL, 1);
50 | switch_curl_easy_setopt(curl_handle, CURLOPT_WRITEFUNCTION, curl_io_write_callback);
51 | switch_curl_easy_setopt(curl_handle, CURLOPT_WRITEDATA, (void *) recv_buffer);
52 |
53 | if(globals->connect_timeout > 0) {
54 | switch_curl_easy_setopt(curl_handle, CURLOPT_CONNECTTIMEOUT, globals->connect_timeout);
55 | }
56 | if(globals->request_timeout > 0) {
57 | switch_curl_easy_setopt(curl_handle, CURLOPT_TIMEOUT, globals->request_timeout);
58 | }
59 | if(globals->user_agent) {
60 | switch_curl_easy_setopt(curl_handle, CURLOPT_USERAGENT, globals->user_agent);
61 | }
62 | if(strncasecmp(globals->api_url, "https", 5) == 0) {
63 | switch_curl_easy_setopt(curl_handle, CURLOPT_SSL_VERIFYPEER, 0);
64 | switch_curl_easy_setopt(curl_handle, CURLOPT_SSL_VERIFYHOST, 0);
65 | }
66 | if(globals->proxy) {
67 | if(globals->proxy_credentials != NULL) {
68 | switch_curl_easy_setopt(curl_handle, CURLOPT_PROXYAUTH, CURLAUTH_ANY);
69 | switch_curl_easy_setopt(curl_handle, CURLOPT_PROXYUSERPWD, globals->proxy_credentials);
70 | }
71 | if(strncasecmp(globals->proxy, "https", 5) == 0) {
72 | switch_curl_easy_setopt(curl_handle, CURLOPT_PROXY_SSL_VERIFYPEER, 0);
73 | }
74 | switch_curl_easy_setopt(curl_handle, CURLOPT_PROXY, globals->proxy);
75 | }
76 |
77 | if(api_key) {
78 | curl_easy_setopt(curl_handle, CURLOPT_XOAUTH2_BEARER, api_key);
79 | curl_easy_setopt(curl_handle, CURLOPT_HTTPAUTH, CURLAUTH_BEARER);
80 | }
81 |
82 | if((form = curl_mime_init(curl_handle))) {
83 | if((field1 = curl_mime_addpart(form))) {
84 | curl_mime_name(field1, "model");
85 | curl_mime_data(field1, model_name, CURL_ZERO_TERMINATED);
86 | }
87 | if((field2 = curl_mime_addpart(form))) {
88 | curl_mime_name(field2, "file");
89 | curl_mime_filedata(field2, filename);
90 | }
91 | switch_curl_easy_setopt(curl_handle, CURLOPT_MIMEPOST, form);
92 | }
93 |
94 | headers = switch_curl_slist_append(headers, "Expect:");
95 | switch_curl_easy_setopt(curl_handle, CURLOPT_URL, globals->api_url);
96 |
97 | curl_ret = switch_curl_easy_perform(curl_handle);
98 | if(!curl_ret) {
99 | switch_curl_easy_getinfo(curl_handle, CURLINFO_RESPONSE_CODE, &http_resp);
100 | if(!http_resp) { switch_curl_easy_getinfo(curl_handle, CURLINFO_HTTP_CONNECTCODE, &http_resp); }
101 | } else {
102 | http_resp = curl_ret;
103 | }
104 |
105 | if(http_resp != 200) {
106 | switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "http-error=[%ld] (%s)\n", http_resp, globals->api_url);
107 | status = SWITCH_STATUS_FALSE;
108 | }
109 |
110 | if(recv_buffer) {
111 | if(switch_buffer_inuse(recv_buffer) > 0) {
112 | switch_buffer_write(recv_buffer, "\0", 1);
113 | }
114 | }
115 |
116 | if(curl_handle) {
117 | switch_curl_easy_cleanup(curl_handle);
118 | }
119 | if(form) {
120 | curl_mime_free(form);
121 | }
122 | if(headers) {
123 | switch_curl_slist_free_all(headers);
124 | }
125 |
126 | return status;
127 | }
128 |
--------------------------------------------------------------------------------
/sources/mod_openai_asr.c:
--------------------------------------------------------------------------------
1 | /*
2 | * FreeSWITCH Modular Media Switching Software Library / Soft-Switch Application
3 | * Copyright (C) 2005-2014, Anthony Minessale II
4 | *
5 | * Version: MPL 1.1
6 | *
7 | * The contents of this file are subject to the Mozilla Public License Version
8 | * 1.1 (the "License"); you may not use this file except in compliance with
9 | * the License. You may obtain a copy of the License at
10 | * http://www.mozilla.org/MPL/
11 | *
12 | * Software distributed under the License is distributed on an "AS IS" basis,
13 | * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
14 | * for the specific language governing rights and limitations under the
15 | * License.
16 | *
17 | * Module Contributor(s):
18 | * Konstantin Alexandrin
19 | *
20 | *
21 | * OpenAI Speech-To-Text service for the Freeswitch.
22 | * https://platform.openai.com/docs/guides/speech-to-text
23 | *
24 | * Development respository:
25 | * https://github.com/akscf/mod_openai_asr
26 | *
27 | */
28 | #include "mod_openai_asr.h"
29 |
30 | globals_t globals;
31 |
32 | SWITCH_MODULE_LOAD_FUNCTION(mod_openai_asr_load);
33 | SWITCH_MODULE_SHUTDOWN_FUNCTION(mod_openai_asr_shutdown);
34 | SWITCH_MODULE_DEFINITION(mod_openai_asr, mod_openai_asr_load, mod_openai_asr_shutdown, NULL);
35 |
36 | static void *SWITCH_THREAD_FUNC transcribe_thread(switch_thread_t *thread, void *obj) {
37 | volatile asr_ctx_t *_ref = (asr_ctx_t *)obj;
38 | asr_ctx_t *asr_ctx = (asr_ctx_t *)_ref;
39 | switch_status_t status = SWITCH_STATUS_FALSE;
40 | switch_buffer_t *chunk_buffer = NULL;
41 | switch_buffer_t *curl_recv_buffer = NULL;
42 | switch_memory_pool_t *pool = NULL;
43 | cJSON *json = NULL;
44 | time_t sentence_timeout = 0;
45 | uint32_t schunks = 0;
46 | uint32_t chunk_buffer_size = 0;
47 | uint8_t fl_cbuff_overflow = SWITCH_FALSE;
48 | void *pop = NULL;
49 |
50 | switch_mutex_lock(asr_ctx->mutex);
51 | asr_ctx->refs++;
52 | switch_mutex_unlock(asr_ctx->mutex);
53 |
54 | if(switch_core_new_memory_pool(&pool) != SWITCH_STATUS_SUCCESS) {
55 | switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_CRIT, "switch_core_new_memory_pool()\n");
56 | goto out;
57 | }
58 | if(switch_buffer_create_dynamic(&curl_recv_buffer, 1024, 2048, 8192) != SWITCH_STATUS_SUCCESS) {
59 | switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "switch_buffer_create_dynamic()\n");
60 | goto out;
61 | }
62 |
63 | while(SWITCH_TRUE) {
64 | if(globals.fl_shutdown || asr_ctx->fl_destroyed) {
65 | break;
66 | }
67 | if(chunk_buffer_size == 0) {
68 | switch_mutex_lock(asr_ctx->mutex);
69 | chunk_buffer_size = asr_ctx->chunk_buffer_size;
70 | switch_mutex_unlock(asr_ctx->mutex);
71 |
72 | if(chunk_buffer_size > 0) {
73 | if(switch_buffer_create(pool, &chunk_buffer, chunk_buffer_size) != SWITCH_STATUS_SUCCESS) {
74 | switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_CRIT, "switch_buffer_create()\n");
75 | break;
76 | }
77 | switch_buffer_zero(chunk_buffer);
78 | }
79 | goto timer_next;
80 | }
81 |
82 | fl_cbuff_overflow = SWITCH_FALSE;
83 | while(switch_queue_trypop(asr_ctx->q_audio, &pop) == SWITCH_STATUS_SUCCESS) {
84 | xdata_buffer_t *audio_buffer = (xdata_buffer_t *)pop;
85 | if(globals.fl_shutdown || asr_ctx->fl_destroyed ) {
86 | xdata_buffer_free(&audio_buffer);
87 | break;
88 | }
89 | if(audio_buffer && audio_buffer->len) {
90 | if(switch_buffer_write(chunk_buffer, audio_buffer->data, audio_buffer->len) >= chunk_buffer_size) {
91 | fl_cbuff_overflow = SWITCH_TRUE;
92 | break;
93 | }
94 | schunks++;
95 | }
96 | xdata_buffer_free(&audio_buffer);
97 | }
98 |
99 | if(fl_cbuff_overflow) {
100 | sentence_timeout = 1;
101 | } else {
102 | if(schunks && asr_ctx->vad_state == SWITCH_VAD_STATE_STOP_TALKING) {
103 | if(!sentence_timeout) {
104 | sentence_timeout = asr_ctx->silence_sec + switch_epoch_time_now(NULL);
105 | }
106 | }
107 | if(sentence_timeout && (asr_ctx->vad_state == SWITCH_VAD_STATE_START_TALKING || asr_ctx->vad_state == SWITCH_VAD_STATE_TALKING)) {
108 | sentence_timeout = 0;
109 | }
110 | }
111 |
112 | if(sentence_timeout && sentence_timeout <= switch_epoch_time_now(NULL)) {
113 | const void *chunk_buffer_ptr = NULL;
114 | const void *http_response_ptr = NULL;
115 | uint32_t buf_len = 0, http_recv_len = 0, stt_failed = 0;
116 | char *chunk_fname = NULL;
117 |
118 | if((buf_len = switch_buffer_peek_zerocopy(chunk_buffer, &chunk_buffer_ptr)) > 0 && chunk_buffer_ptr) {
119 | chunk_fname = chunk_write((switch_byte_t *)chunk_buffer_ptr, buf_len, asr_ctx->channels, asr_ctx->samplerate, globals.opt_encoding);
120 | }
121 | if(chunk_fname) {
122 | for(uint32_t rqtry = 0; rqtry < asr_ctx->retries_on_error; rqtry++) {
123 | switch_buffer_zero(curl_recv_buffer);
124 | status = curl_perform(curl_recv_buffer, asr_ctx->opt_api_key, asr_ctx->opt_model, chunk_fname, &globals);
125 | if(status == SWITCH_STATUS_SUCCESS || globals.fl_shutdown || asr_ctx->fl_destroyed) { break; }
126 | switch_yield(100000);
127 | }
128 |
129 | http_recv_len = switch_buffer_peek_zerocopy(curl_recv_buffer, &http_response_ptr);
130 | if(status == SWITCH_STATUS_SUCCESS) {
131 | if(http_response_ptr && http_recv_len) {
132 | char *txt = parse_response((char *)http_response_ptr, NULL);
133 | #ifdef MOD_OPENAI_ASR_DEBUG
134 | switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "Service response [%s]\n", (char *)http_response_ptr);
135 | switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "Text [%s]\n", txt ? txt : "null");
136 | #endif
137 | if(!txt) txt = strdup("");
138 | if(switch_queue_trypush(asr_ctx->q_text, txt) == SWITCH_STATUS_SUCCESS) {
139 | switch_mutex_lock(asr_ctx->mutex);
140 | asr_ctx->transcription_results++;
141 | switch_mutex_unlock(asr_ctx->mutex);
142 | } else {
143 | switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Queue is full!\n");
144 | switch_safe_free(txt);
145 | }
146 | } else {
147 | stt_failed = 1;
148 | switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Empty service response!\n");
149 | }
150 | } else {
151 | stt_failed = 1;
152 | switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Unable to perform request!\n");
153 | }
154 |
155 | if(stt_failed) {
156 | char *txt = strdup("[transcription failed]");
157 | if(switch_queue_trypush(asr_ctx->q_text, txt) == SWITCH_STATUS_SUCCESS) {
158 | switch_mutex_lock(asr_ctx->mutex);
159 | asr_ctx->transcription_results++;
160 | switch_mutex_unlock(asr_ctx->mutex);
161 | } else {
162 | switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Queue is full!\n");
163 | switch_safe_free(txt);
164 | }
165 | }
166 |
167 | schunks = 0;
168 | sentence_timeout = 0;
169 | unlink(chunk_fname);
170 | switch_safe_free(chunk_fname);
171 | switch_buffer_zero(chunk_buffer);
172 | }
173 | }
174 |
175 | timer_next:
176 | switch_yield(10000);
177 | }
178 |
179 | out:
180 | if(json != NULL) {
181 | cJSON_Delete(json);
182 | }
183 | if(curl_recv_buffer) {
184 | switch_buffer_destroy(&curl_recv_buffer);
185 | }
186 | if(chunk_buffer) {
187 | switch_buffer_destroy(&chunk_buffer);
188 | }
189 | if(pool) {
190 | switch_core_destroy_memory_pool(&pool);
191 | }
192 |
193 | switch_mutex_lock(asr_ctx->mutex);
194 | if(asr_ctx->refs > 0) asr_ctx->refs--;
195 | switch_mutex_unlock(asr_ctx->mutex);
196 |
197 | switch_mutex_lock(globals.mutex);
198 | if(globals.active_threads) globals.active_threads--;
199 | switch_mutex_unlock(globals.mutex);
200 |
201 | return NULL;
202 | }
203 |
204 | // ---------------------------------------------------------------------------------------------------------------------------------------------
205 | static switch_status_t asr_open(switch_asr_handle_t *ah, const char *codec, int samplerate, const char *dest, switch_asr_flag_t *flags) {
206 | switch_status_t status = SWITCH_STATUS_SUCCESS;
207 | switch_threadattr_t *attr = NULL;
208 | switch_thread_t *thread = NULL;
209 | asr_ctx_t *asr_ctx = NULL;
210 |
211 | if(strcmp(codec, "L16") !=0) {
212 | switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Unsupported encoding (%s)\n", codec);
213 | switch_goto_status(SWITCH_STATUS_FALSE, out);
214 | }
215 |
216 | if((asr_ctx = switch_core_alloc(ah->memory_pool, sizeof(asr_ctx_t))) == NULL) {
217 | switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "switch_core_alloc()\n");
218 | switch_goto_status(SWITCH_STATUS_GENERR, out);
219 | }
220 |
221 | asr_ctx->channels = 1;
222 | asr_ctx->chunk_buffer_size = 0;
223 | asr_ctx->samplerate = samplerate;
224 | asr_ctx->silence_sec = globals.speech_silence_sec;
225 | asr_ctx->retries_on_error = globals.retries_on_error;
226 |
227 | asr_ctx->opt_model = globals.opt_model;
228 | asr_ctx->opt_api_key = globals.api_key;
229 |
230 | if((status = switch_mutex_init(&asr_ctx->mutex, SWITCH_MUTEX_NESTED, ah->memory_pool)) != SWITCH_STATUS_SUCCESS) {
231 | switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "switch_mutex_init()\n");
232 | switch_goto_status(SWITCH_STATUS_GENERR, out);
233 | }
234 |
235 | switch_queue_create(&asr_ctx->q_audio, QUEUE_SIZE, ah->memory_pool);
236 | switch_queue_create(&asr_ctx->q_text, QUEUE_SIZE, ah->memory_pool);
237 |
238 | asr_ctx->vad_buffer = NULL;
239 | asr_ctx->frame_len = 0;
240 | asr_ctx->vad_buffer_size = 0;
241 | asr_ctx->vad_stored_frames = 0;
242 | asr_ctx->fl_vad_first_cycle = SWITCH_TRUE;
243 |
244 | if((asr_ctx->vad = switch_vad_init(asr_ctx->samplerate, asr_ctx->channels)) == NULL) {
245 | switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "switch_vad_init()\n");
246 | switch_goto_status(SWITCH_STATUS_GENERR, out);
247 | }
248 | switch_vad_set_mode(asr_ctx->vad, -1);
249 | switch_vad_set_param(asr_ctx->vad, "debug", globals.fl_vad_debug);
250 | if(globals.vad_silence_ms > 0) { switch_vad_set_param(asr_ctx->vad, "silence_ms", globals.vad_silence_ms); }
251 | if(globals.vad_voice_ms > 0) { switch_vad_set_param(asr_ctx->vad, "voice_ms", globals.vad_voice_ms); }
252 | if(globals.vad_threshold > 0) { switch_vad_set_param(asr_ctx->vad, "thresh", globals.vad_threshold); }
253 |
254 | ah->private_info = asr_ctx;
255 |
256 | switch_mutex_lock(globals.mutex);
257 | globals.active_threads++;
258 | switch_mutex_unlock(globals.mutex);
259 |
260 | switch_threadattr_create(&attr, ah->memory_pool);
261 | switch_threadattr_detach_set(attr, 1);
262 | switch_threadattr_stacksize_set(attr, SWITCH_THREAD_STACKSIZE);
263 | switch_thread_create(&thread, attr, transcribe_thread, asr_ctx, ah->memory_pool);
264 |
265 | out:
266 | return status;
267 | }
268 |
269 | static switch_status_t asr_close(switch_asr_handle_t *ah, switch_asr_flag_t *flags) {
270 | asr_ctx_t *asr_ctx = (asr_ctx_t *)ah->private_info;
271 | uint8_t fl_wloop = SWITCH_TRUE;
272 |
273 | assert(asr_ctx != NULL);
274 |
275 | asr_ctx->fl_abort = SWITCH_TRUE;
276 | asr_ctx->fl_destroyed = SWITCH_TRUE;
277 |
278 | switch_mutex_lock(asr_ctx->mutex);
279 | fl_wloop = (asr_ctx->refs != 0);
280 | switch_mutex_unlock(asr_ctx->mutex);
281 |
282 | if(fl_wloop) {
283 | switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "Waiting for unlock (refs=%d)...\n", asr_ctx->refs);
284 | while(fl_wloop) {
285 | switch_mutex_lock(asr_ctx->mutex);
286 | fl_wloop = (asr_ctx->refs != 0);
287 | switch_mutex_unlock(asr_ctx->mutex);
288 | switch_yield(100000);
289 | }
290 | }
291 |
292 | if(asr_ctx->q_audio) {
293 | xdata_buffer_queue_clean(asr_ctx->q_audio);
294 | switch_queue_term(asr_ctx->q_audio);
295 | }
296 | if(asr_ctx->q_text) {
297 | text_queue_clean(asr_ctx->q_text);
298 | switch_queue_term(asr_ctx->q_text);
299 | }
300 | if(asr_ctx->vad) {
301 | switch_vad_destroy(&asr_ctx->vad);
302 | }
303 | if(asr_ctx->vad_buffer) {
304 | switch_buffer_destroy(&asr_ctx->vad_buffer);
305 | }
306 |
307 | switch_set_flag(ah, SWITCH_ASR_FLAG_CLOSED);
308 |
309 | return SWITCH_STATUS_SUCCESS;
310 | }
311 |
312 | static switch_status_t asr_feed(switch_asr_handle_t *ah, void *data, unsigned int data_len, switch_asr_flag_t *flags) {
313 | asr_ctx_t *asr_ctx = (asr_ctx_t *) ah->private_info;
314 | switch_vad_state_t vad_state = 0;
315 | uint8_t fl_has_audio = SWITCH_FALSE;
316 |
317 | assert(asr_ctx != NULL);
318 |
319 | if(switch_test_flag(ah, SWITCH_ASR_FLAG_CLOSED)) {
320 | return SWITCH_STATUS_BREAK;
321 | }
322 | if(asr_ctx->fl_destroyed || asr_ctx->fl_abort) {
323 | return SWITCH_STATUS_BREAK;
324 | }
325 | if(asr_ctx->fl_pause) {
326 | return SWITCH_STATUS_SUCCESS;
327 | }
328 | if(!data || !data_len) {
329 | return SWITCH_STATUS_BREAK;
330 | }
331 |
332 | if(data_len > 0 && asr_ctx->frame_len == 0) {
333 | switch_mutex_lock(asr_ctx->mutex);
334 | asr_ctx->frame_len = data_len;
335 | asr_ctx->vad_buffer_size = asr_ctx->frame_len * VAD_STORE_FRAMES;
336 | asr_ctx->chunk_buffer_size = asr_ctx->samplerate * globals.speech_max_sec;
337 | switch_mutex_unlock(asr_ctx->mutex);
338 |
339 | if(switch_buffer_create(ah->memory_pool, &asr_ctx->vad_buffer, asr_ctx->vad_buffer_size) != SWITCH_STATUS_SUCCESS) {
340 | asr_ctx->vad_buffer_size = 0;
341 | switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "switch_buffer_create()\n");
342 | }
343 | }
344 |
345 | if(asr_ctx->vad_buffer_size) {
346 | if(asr_ctx->vad_state == SWITCH_VAD_STATE_STOP_TALKING || (asr_ctx->vad_state == vad_state && vad_state == SWITCH_VAD_STATE_NONE)) {
347 | if(data_len <= asr_ctx->frame_len) {
348 | if(asr_ctx->vad_stored_frames >= VAD_STORE_FRAMES) {
349 | switch_buffer_zero(asr_ctx->vad_buffer);
350 | asr_ctx->vad_stored_frames = 0;
351 | asr_ctx->fl_vad_first_cycle = SWITCH_FALSE;
352 | }
353 | switch_buffer_write(asr_ctx->vad_buffer, data, MIN(asr_ctx->frame_len, data_len));
354 | asr_ctx->vad_stored_frames++;
355 | }
356 | }
357 |
358 | vad_state = switch_vad_process(asr_ctx->vad, (int16_t *)data, (data_len / sizeof(int16_t)));
359 | if(vad_state == SWITCH_VAD_STATE_START_TALKING) {
360 | asr_ctx->vad_state = vad_state;
361 | fl_has_audio = SWITCH_TRUE;
362 | } else if (vad_state == SWITCH_VAD_STATE_STOP_TALKING) {
363 | asr_ctx->vad_state = vad_state;
364 | fl_has_audio = SWITCH_FALSE;
365 | switch_vad_reset(asr_ctx->vad);
366 | } else if (vad_state == SWITCH_VAD_STATE_TALKING) {
367 | asr_ctx->vad_state = vad_state;
368 | fl_has_audio = SWITCH_TRUE;
369 | }
370 | } else {
371 | fl_has_audio = SWITCH_TRUE;
372 | }
373 |
374 | if(fl_has_audio) {
375 | if(vad_state == SWITCH_VAD_STATE_START_TALKING && asr_ctx->vad_stored_frames > 0) {
376 | xdata_buffer_t *tau_buf = NULL;
377 | const void *ptr = NULL;
378 | switch_size_t vblen = 0;
379 | uint32_t rframes = 0, rlen = 0;
380 | int ofs = 0;
381 |
382 | if((vblen = switch_buffer_peek_zerocopy(asr_ctx->vad_buffer, &ptr)) && ptr && vblen > 0) {
383 | rframes = (asr_ctx->vad_stored_frames >= VAD_RECOVERY_FRAMES ? VAD_RECOVERY_FRAMES : (asr_ctx->fl_vad_first_cycle ? asr_ctx->vad_stored_frames : VAD_RECOVERY_FRAMES));
384 | rlen = (rframes * asr_ctx->frame_len);
385 | ofs = (vblen - rlen);
386 |
387 | if(ofs < 0) {
388 | uint32_t hdr_sz = -ofs;
389 | uint32_t hdr_ofs = (asr_ctx->vad_buffer_size - hdr_sz);
390 |
391 | switch_zmalloc(tau_buf, sizeof(xdata_buffer_t));
392 |
393 | tau_buf->len = (hdr_sz + vblen + data_len);
394 | switch_malloc(tau_buf->data, tau_buf->len);
395 |
396 | memcpy(tau_buf->data, (void *)(ptr + hdr_ofs), hdr_sz);
397 | memcpy(tau_buf->data + hdr_sz , (void *)(ptr + 0), vblen);
398 | memcpy(tau_buf->data + rlen, data, data_len);
399 |
400 | if(switch_queue_trypush(asr_ctx->q_audio, tau_buf) != SWITCH_STATUS_SUCCESS) {
401 | xdata_buffer_free(&tau_buf);
402 | }
403 |
404 | switch_buffer_zero(asr_ctx->vad_buffer);
405 | asr_ctx->vad_stored_frames = 0;
406 | } else {
407 | switch_zmalloc(tau_buf, sizeof(xdata_buffer_t));
408 |
409 | tau_buf->len = (rlen + data_len);
410 | switch_malloc(tau_buf->data, tau_buf->len);
411 |
412 | memcpy(tau_buf->data, (void *)(ptr + ofs), rlen);
413 | memcpy(tau_buf->data + rlen, data, data_len);
414 |
415 | if(switch_queue_trypush(asr_ctx->q_audio, tau_buf) != SWITCH_STATUS_SUCCESS) {
416 | xdata_buffer_free(&tau_buf);
417 | }
418 |
419 | switch_buffer_zero(asr_ctx->vad_buffer);
420 | asr_ctx->vad_stored_frames = 0;
421 | }
422 | }
423 | } else {
424 | xdata_buffer_push(asr_ctx->q_audio, data, data_len);
425 | }
426 | }
427 |
428 | return SWITCH_STATUS_SUCCESS;
429 | }
430 |
431 | static switch_status_t asr_check_results(switch_asr_handle_t *ah, switch_asr_flag_t *flags) {
432 | asr_ctx_t *asr_ctx = (asr_ctx_t *)ah->private_info;
433 |
434 | assert(asr_ctx != NULL);
435 |
436 | if(asr_ctx->fl_pause) {
437 | return SWITCH_STATUS_FALSE;
438 | }
439 |
440 | return (asr_ctx->transcription_results > 0 ? SWITCH_STATUS_SUCCESS : SWITCH_STATUS_FALSE);
441 | }
442 |
443 | static switch_status_t asr_get_results(switch_asr_handle_t *ah, char **xmlstr, switch_asr_flag_t *flags) {
444 | asr_ctx_t *asr_ctx = (asr_ctx_t *)ah->private_info;
445 | switch_status_t status = SWITCH_STATUS_FALSE;
446 | void *pop = NULL;
447 |
448 | assert(asr_ctx != NULL);
449 |
450 | if(switch_queue_trypop(asr_ctx->q_text, &pop) == SWITCH_STATUS_SUCCESS) {
451 | if(pop) {
452 | *xmlstr = (char *)pop;
453 | status = SWITCH_STATUS_SUCCESS;
454 |
455 | switch_mutex_lock(asr_ctx->mutex);
456 | if(asr_ctx->transcription_results > 0) asr_ctx->transcription_results--;
457 | switch_mutex_unlock(asr_ctx->mutex);
458 | }
459 | }
460 |
461 | return status;
462 | }
463 |
464 | static switch_status_t asr_start_input_timers(switch_asr_handle_t *ah) {
465 | asr_ctx_t *asr_ctx = (asr_ctx_t *)ah->private_info;
466 |
467 | assert(asr_ctx != NULL);
468 |
469 | asr_ctx->fl_start_timers = SWITCH_TRUE;
470 |
471 | return SWITCH_STATUS_SUCCESS;
472 | }
473 |
474 | static switch_status_t asr_pause(switch_asr_handle_t *ah) {
475 | asr_ctx_t *asr_ctx = (asr_ctx_t *)ah->private_info;
476 |
477 | assert(asr_ctx != NULL);
478 |
479 | asr_ctx->fl_pause = SWITCH_TRUE;
480 |
481 | return SWITCH_STATUS_SUCCESS;
482 | }
483 |
484 | static switch_status_t asr_resume(switch_asr_handle_t *ah) {
485 | asr_ctx_t *asr_ctx = (asr_ctx_t *)ah->private_info;
486 |
487 | assert(asr_ctx != NULL);
488 |
489 | asr_ctx->fl_pause = SWITCH_FALSE;
490 |
491 | return SWITCH_STATUS_SUCCESS;
492 | }
493 |
494 | static void asr_text_param(switch_asr_handle_t *ah, char *param, const char *val) {
495 | asr_ctx_t *asr_ctx = (asr_ctx_t *)ah->private_info;
496 |
497 | assert(asr_ctx != NULL);
498 |
499 | if(strcasecmp(param, "lang") == 0) {
500 | if(val) asr_ctx->opt_lang = switch_core_strdup(ah->memory_pool, val);
501 | } else if(strcasecmp(param, "model") == 0) {
502 | if(val) asr_ctx->opt_model = switch_core_strdup(ah->memory_pool, val);
503 | } else if(strcasecmp(param, "key") == 0) {
504 | if(val) asr_ctx->opt_api_key = switch_core_strdup(ah->memory_pool, val);
505 | } else if(strcasecmp(param, "silence") == 0) {
506 | if(val) asr_ctx->silence_sec = atoi(val);
507 | }
508 | }
509 |
510 | static void asr_numeric_param(switch_asr_handle_t *ah, char *param, int val) {
511 | }
512 |
513 | static void asr_float_param(switch_asr_handle_t *ah, char *param, double val) {
514 | }
515 |
516 | static switch_status_t asr_load_grammar(switch_asr_handle_t *ah, const char *grammar, const char *name) {
517 | return SWITCH_STATUS_SUCCESS;
518 | }
519 |
520 | static switch_status_t asr_unload_grammar(switch_asr_handle_t *ah, const char *name) {
521 | return SWITCH_STATUS_SUCCESS;
522 | }
523 |
524 | // ---------------------------------------------------------------------------------------------------------------------------------------------
525 | #define CMD_SYNTAX "path_to/filename.(mp3|wav) [key=altkey model=altModel]\n"
526 | SWITCH_STANDARD_API(openai_asr_cmd_handler) {
527 | switch_status_t status = 0;
528 | char *mycmd = NULL, *argv[10] = { 0 }; int argc = 0;
529 | switch_buffer_t *recv_buf = NULL;
530 | const void *response_ptr = NULL;
531 | char *opt_api_key = globals.api_key;
532 | char *opt_model = globals.opt_model;
533 | char *file_name = NULL, *file_ext = NULL;
534 | uint32_t recv_len = 0;
535 |
536 | if (!zstr(cmd)) {
537 | mycmd = strdup(cmd);
538 | switch_assert(mycmd);
539 | argc = switch_separate_string(mycmd, ' ', argv, (sizeof(argv) / sizeof(argv[0])));
540 | }
541 | if(argc == 0) {
542 | goto usage;
543 | }
544 |
545 | file_name = argv[0];
546 | if(switch_file_exists(file_name, NULL) != SWITCH_STATUS_SUCCESS) {
547 | stream->write_function(stream, "-ERR: file not found (%s)\n", file_name);
548 | goto out;
549 | }
550 |
551 | file_ext = strrchr(file_name, '.');
552 | if(!file_ext) {
553 | stream->write_function(stream, "-ERR: unsupported file encoding (null)\n");
554 | goto out;
555 | }
556 |
557 | file_ext++;
558 | if(strcasecmp("mp3", file_ext) && strcasecmp("wav", file_ext)) {
559 | stream->write_function(stream, "-ERR: unsupported file encoding (%s)\n", file_ext);
560 | goto out;
561 | }
562 |
563 | if(switch_buffer_create_dynamic(&recv_buf, 1024, 2048, 8192) != SWITCH_STATUS_SUCCESS) {
564 | stream->write_function(stream, "-ERR: switch_buffer_create_dynamic()\n");
565 | goto out;
566 | }
567 |
568 | if(argc > 1) {
569 | for(int i = 1; i < argc; i++) {
570 | char *kvp[2] = { 0 };
571 | if(switch_separate_string(argv[i], '=', kvp, 2) >= 2) {
572 | if(strcasecmp(kvp[0], "key") == 0) {
573 | if(kvp[1]) opt_api_key = kvp[1];
574 | } else if(strcasecmp(kvp[0], "model") == 0) {
575 | if(kvp[1]) opt_model = kvp[1];
576 | }
577 | }
578 | }
579 | }
580 |
581 | status = curl_perform(recv_buf, opt_api_key, opt_model, file_name, &globals);
582 |
583 | recv_len = switch_buffer_peek_zerocopy(recv_buf, &response_ptr);
584 | if(status == SWITCH_STATUS_SUCCESS && response_ptr && recv_len) {
585 | char *txt = parse_response((char *)response_ptr, stream);
586 | if(txt) {
587 | stream->write_function(stream, "+OK: %s\n", txt);
588 | }
589 | switch_safe_free(txt);
590 | } else {
591 | stream->write_function(stream, "-ERR: unable to perform request\n");
592 | }
593 |
594 | goto out;
595 | usage:
596 | stream->write_function(stream, "-ERR:\nUsage: %s\n", CMD_SYNTAX);
597 |
598 | out:
599 | if(recv_buf) {
600 | switch_buffer_destroy(&recv_buf);
601 | }
602 |
603 | switch_safe_free(mycmd);
604 | return SWITCH_STATUS_SUCCESS;
605 | }
606 |
607 | // ---------------------------------------------------------------------------------------------------------------------------------------------
608 | // main
609 | // ---------------------------------------------------------------------------------------------------------------------------------------------
610 | SWITCH_MODULE_LOAD_FUNCTION(mod_openai_asr_load) {
611 | switch_status_t status = SWITCH_STATUS_SUCCESS;
612 | switch_xml_t cfg, xml, settings, param;
613 | switch_api_interface_t *commands_interface;
614 | switch_asr_interface_t *asr_interface;
615 |
616 | memset(&globals, 0, sizeof(globals));
617 | switch_mutex_init(&globals.mutex, SWITCH_MUTEX_NESTED, pool);
618 |
619 | if((xml = switch_xml_open_cfg(MOD_CONFIG_NAME, &cfg, NULL)) == NULL) {
620 | switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Unable to open configuration: %s\n", MOD_CONFIG_NAME);
621 | switch_goto_status(SWITCH_STATUS_GENERR, out);
622 | }
623 |
624 | if((settings = switch_xml_child(cfg, "settings"))) {
625 | for (param = switch_xml_child(settings, "param"); param; param = param->next) {
626 | char *var = (char *) switch_xml_attr_soft(param, "name");
627 | char *val = (char *) switch_xml_attr_soft(param, "value");
628 |
629 | if(!strcasecmp(var, "vad-silence-ms")) {
630 | if(val) globals.vad_silence_ms = atoi (val);
631 | } else if(!strcasecmp(var, "vad-voice-ms")) {
632 | if(val) globals.vad_voice_ms = atoi (val);
633 | } else if(!strcasecmp(var, "vad-threshold")) {
634 | if(val) globals.vad_threshold = atoi (val);
635 | } else if(!strcasecmp(var, "vad-debug")) {
636 | if(val) globals.fl_vad_debug = switch_true(val);
637 | } else if(!strcasecmp(var, "api-key")) {
638 | if(val) globals.api_key = switch_core_strdup(pool, val);
639 | } else if(!strcasecmp(var, "api-url")) {
640 | if(val) globals.api_url = switch_core_strdup(pool, val);
641 | } else if(!strcasecmp(var, "user-agent")) {
642 | if(val) globals.user_agent = switch_core_strdup(pool, val);
643 | } else if(!strcasecmp(var, "proxy")) {
644 | if(val) globals.proxy = switch_core_strdup(pool, val);
645 | } else if(!strcasecmp(var, "proxy-credentials")) {
646 | if(val) globals.proxy_credentials = switch_core_strdup(pool, val);
647 | } else if(!strcasecmp(var, "encoding")) {
648 | if(val) globals.opt_encoding = switch_core_strdup(pool, val);
649 | } else if(!strcasecmp(var, "model")) {
650 | if(val) globals.opt_model= switch_core_strdup(pool, val);
651 | } else if(!strcasecmp(var, "speech-max-sec")) {
652 | if(val) globals.speech_max_sec = atoi(val);
653 | } else if(!strcasecmp(var, "speech-silence-sec")) {
654 | if(val) globals.speech_silence_sec = atoi(val);
655 | } else if(!strcasecmp(var, "request-timeout")) {
656 | if(val) globals.request_timeout = atoi(val);
657 | } else if(!strcasecmp(var, "connect-timeout")) {
658 | if(val) globals.connect_timeout = atoi(val);
659 | } else if(!strcasecmp(var, "log-http-errors")) {
660 | if(val) globals.fl_log_http_errors = switch_true(val);
661 | } else if(!strcasecmp(var, "retries-on-error")) {
662 | if(val) globals.retries_on_error = atoi(val);
663 | }
664 | }
665 | }
666 |
667 | if(!globals.api_url) {
668 | switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Missing required parameter: api-url\n");
669 | switch_goto_status(SWITCH_STATUS_GENERR, out);
670 | }
671 |
672 | globals.opt_encoding = globals.opt_encoding ? globals.opt_encoding : "wav";
673 | globals.speech_max_sec = !globals.speech_max_sec ? 35 : globals.speech_max_sec;
674 | globals.speech_silence_sec = !globals.speech_silence_sec ? 3 : globals.speech_silence_sec;
675 | globals.retries_on_error = !globals.retries_on_error ? 1 : globals.retries_on_error;
676 |
677 | globals.tmp_path = switch_core_sprintf(pool, "%s%sopenai-asr-cache", SWITCH_GLOBAL_dirs.temp_dir, SWITCH_PATH_SEPARATOR);
678 | if(switch_directory_exists(globals.tmp_path, NULL) != SWITCH_STATUS_SUCCESS) {
679 | switch_dir_make(globals.tmp_path, SWITCH_FPROT_OS_DEFAULT, NULL);
680 | }
681 |
682 | *module_interface = switch_loadable_module_create_module_interface(pool, modname);
683 | SWITCH_ADD_API(commands_interface, "openai_asr_transcript", "OpenAI speech-to-text", openai_asr_cmd_handler, CMD_SYNTAX);
684 |
685 | asr_interface = switch_loadable_module_create_interface(*module_interface, SWITCH_ASR_INTERFACE);
686 | asr_interface->interface_name = "openai";
687 | asr_interface->asr_open = asr_open;
688 | asr_interface->asr_close = asr_close;
689 | asr_interface->asr_feed = asr_feed;
690 | asr_interface->asr_pause = asr_pause;
691 | asr_interface->asr_resume = asr_resume;
692 | asr_interface->asr_check_results = asr_check_results;
693 | asr_interface->asr_get_results = asr_get_results;
694 | asr_interface->asr_start_input_timers = asr_start_input_timers;
695 | asr_interface->asr_text_param = asr_text_param;
696 | asr_interface->asr_numeric_param = asr_numeric_param;
697 | asr_interface->asr_float_param = asr_float_param;
698 | asr_interface->asr_load_grammar = asr_load_grammar;
699 | asr_interface->asr_unload_grammar = asr_unload_grammar;
700 |
701 | switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_NOTICE, "OpenAI-ASR (%s)\n", MOD_VERSION);
702 | out:
703 | if(xml) {
704 | switch_xml_free(xml);
705 | }
706 | return status;
707 | }
708 |
709 | SWITCH_MODULE_SHUTDOWN_FUNCTION(mod_openai_asr_shutdown) {
710 | uint8_t fl_wloop = SWITCH_TRUE;
711 |
712 | globals.fl_shutdown = SWITCH_TRUE;
713 |
714 | switch_mutex_lock(globals.mutex);
715 | fl_wloop = (globals.active_threads > 0);
716 | switch_mutex_unlock(globals.mutex);
717 |
718 | if(fl_wloop) {
719 | switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "Waiting for termination (%d) threads...\n", globals.active_threads);
720 | while(fl_wloop) {
721 | switch_mutex_lock(globals.mutex);
722 | fl_wloop = (globals.active_threads > 0);
723 | switch_mutex_unlock(globals.mutex);
724 | switch_yield(100000);
725 | }
726 | }
727 |
728 | return SWITCH_STATUS_SUCCESS;
729 | }
730 |
--------------------------------------------------------------------------------
/sources/mod_openai_asr.h:
--------------------------------------------------------------------------------
1 | /*
2 | * FreeSWITCH Modular Media Switching Software Library / Soft-Switch Application
3 | * Copyright (C) 2005-2014, Anthony Minessale II
4 | *
5 | * Version: MPL 1.1
6 | *
7 | * The contents of this file are subject to the Mozilla Public License Version
8 | * 1.1 (the "License"); you may not use this file except in compliance with
9 | * the License. You may obtain a copy of the License at
10 | * http://www.mozilla.org/MPL/
11 | *
12 | * Software distributed under the License is distributed on an "AS IS" basis,
13 | * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
14 | * for the specific language governing rights and limitations under the
15 | * License.
16 | *
17 | * Module Contributor(s):
18 | * Konstantin Alexandrin
19 | *
20 | *
21 | */
22 | #ifndef MOD_OPENAI_ASR_H
23 | #define MOD_OPENAI_ASR_H
24 |
25 | #include
26 | #include
27 | #include
28 |
29 | #define MIN(a,b) (((a)<(b))?(a):(b))
30 | #define MAX(a,b) (((a)>(b))?(a):(b))
31 |
32 | #define MOD_CONFIG_NAME "openai_asr.conf"
33 | #define MOD_VERSION "1.0.4"
34 | #define QUEUE_SIZE 128
35 | #define VAD_STORE_FRAMES 64
36 | #define VAD_RECOVERY_FRAMES 20
37 |
38 | //#define MOD_OPENAI_ASR_DEBUG
39 |
40 | typedef struct {
41 | switch_mutex_t *mutex;
42 | uint32_t active_threads;
43 | uint32_t speech_max_sec;
44 | uint32_t speech_silence_sec;
45 | uint32_t vad_silence_ms;
46 | uint32_t vad_voice_ms;
47 | uint32_t vad_threshold;
48 | uint32_t request_timeout; // secondss
49 | uint32_t connect_timeout; // seconds
50 | uint32_t retries_on_error;
51 | uint8_t fl_vad_debug;
52 | uint8_t fl_shutdown;
53 | uint8_t fl_log_http_errors;
54 | char *tmp_path;
55 | char *api_key;
56 | char *api_url;
57 | char *user_agent;
58 | char *proxy;
59 | char *proxy_credentials;
60 | char *opt_encoding;
61 | char *opt_model;
62 | } globals_t;
63 |
64 | typedef struct {
65 | switch_memory_pool_t *pool;
66 | switch_vad_t *vad;
67 | switch_buffer_t *vad_buffer;
68 | switch_mutex_t *mutex;
69 | switch_queue_t *q_audio;
70 | switch_queue_t *q_text;
71 | switch_buffer_t *curl_recv_buffer_ref;
72 | switch_vad_state_t vad_state;
73 | char *opt_lang;
74 | char *opt_model;
75 | char *opt_api_key;
76 | int32_t transcription_results;
77 | uint32_t retries_on_error;
78 | uint32_t vad_buffer_size;
79 | uint32_t vad_stored_frames;
80 | uint32_t chunk_buffer_size;
81 | uint32_t refs;
82 | uint32_t samplerate;
83 | uint32_t channels;
84 | uint32_t frame_len;
85 | uint32_t silence_sec;
86 | uint8_t fl_start_timers;
87 | uint8_t fl_pause;
88 | uint8_t fl_vad_first_cycle;
89 | uint8_t fl_destroyed;
90 | uint8_t fl_abort;
91 | } asr_ctx_t;
92 |
93 | typedef struct {
94 | uint32_t len;
95 | switch_byte_t *data;
96 | } xdata_buffer_t;
97 |
98 | /* curl.c */
99 | switch_status_t curl_perform(switch_buffer_t *recv_buffer, char *api_key, char *model_name, char *filename, globals_t *globals);
100 |
101 | /* utils.c */
102 | char *chunk_write(switch_byte_t *buf, uint32_t buf_len, uint32_t channels, uint32_t samplerate, const char *file_ext);
103 | switch_status_t xdata_buffer_push(switch_queue_t *queue, switch_byte_t *data, uint32_t data_len);
104 | switch_status_t xdata_buffer_alloc(xdata_buffer_t **out, switch_byte_t *data, uint32_t data_len);
105 | void xdata_buffer_free(xdata_buffer_t **buf);
106 | void xdata_buffer_queue_clean(switch_queue_t *queue);
107 | void text_queue_clean(switch_queue_t *queue);
108 | char *parse_response(char *data, switch_stream_handle_t *stream);
109 |
110 | #endif
111 |
--------------------------------------------------------------------------------
/sources/utils.c:
--------------------------------------------------------------------------------
1 | /*
2 | * FreeSWITCH Modular Media Switching Software Library / Soft-Switch Application
3 | * Copyright (C) 2005-2014, Anthony Minessale II
4 | *
5 | * Version: MPL 1.1
6 | *
7 | * The contents of this file are subject to the Mozilla Public License Version
8 | * 1.1 (the "License"); you may not use this file except in compliance with
9 | * the License. You may obtain a copy of the License at
10 | * http://www.mozilla.org/MPL/
11 | *
12 | * Software distributed under the License is distributed on an "AS IS" basis,
13 | * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
14 | * for the specific language governing rights and limitations under the
15 | * License.
16 | *
17 | * Module Contributor(s):
18 | * Konstantin Alexandrin
19 | *
20 | *
21 | */
22 | #include "mod_openai_asr.h"
23 |
24 | extern globals_t globals;
25 |
26 | switch_status_t xdata_buffer_alloc(xdata_buffer_t **out, switch_byte_t *data, uint32_t data_len) {
27 | xdata_buffer_t *buf = NULL;
28 |
29 | switch_zmalloc(buf, sizeof(xdata_buffer_t));
30 |
31 | if(data_len) {
32 | switch_malloc(buf->data, data_len);
33 | switch_assert(buf->data);
34 |
35 | buf->len = data_len;
36 | memcpy(buf->data, data, data_len);
37 | }
38 |
39 | *out = buf;
40 | return SWITCH_STATUS_SUCCESS;
41 | }
42 |
43 | void xdata_buffer_free(xdata_buffer_t **buf) {
44 | if(buf && *buf) {
45 | switch_safe_free((*buf)->data);
46 | free(*buf);
47 | }
48 | }
49 |
50 | void xdata_buffer_queue_clean(switch_queue_t *queue) {
51 | xdata_buffer_t *data = NULL;
52 |
53 | if(!queue || !switch_queue_size(queue)) {
54 | return;
55 | }
56 |
57 | while(switch_queue_trypop(queue, (void *) &data) == SWITCH_STATUS_SUCCESS) {
58 | if(data) { xdata_buffer_free(&data); }
59 | }
60 | }
61 |
62 | switch_status_t xdata_buffer_push(switch_queue_t *queue, switch_byte_t *data, uint32_t data_len) {
63 | xdata_buffer_t *buff = NULL;
64 |
65 | if(xdata_buffer_alloc(&buff, data, data_len) == SWITCH_STATUS_SUCCESS) {
66 | if(switch_queue_trypush(queue, buff) == SWITCH_STATUS_SUCCESS) {
67 | return SWITCH_STATUS_SUCCESS;
68 | }
69 | xdata_buffer_free(&buff);
70 | }
71 | return SWITCH_STATUS_FALSE;
72 | }
73 |
74 | char *chunk_write(switch_byte_t *buf, uint32_t buf_len, uint32_t channels, uint32_t samplerate, const char *file_ext) {
75 | switch_status_t status = SWITCH_STATUS_FALSE;
76 | switch_size_t len = (buf_len / sizeof(int16_t));
77 | switch_file_handle_t fh = { 0 };
78 | char *file_name = NULL;
79 | char name_uuid[SWITCH_UUID_FORMATTED_LENGTH + 1] = { 0 };
80 | int flags = (SWITCH_FILE_FLAG_WRITE | SWITCH_FILE_DATA_SHORT);
81 |
82 | switch_uuid_str((char *)name_uuid, sizeof(name_uuid));
83 | file_name = switch_mprintf("%s%s%s.%s", globals.tmp_path, SWITCH_PATH_SEPARATOR, name_uuid, (file_ext == NULL ? "wav" : file_ext) );
84 |
85 | if((status = switch_core_file_open(&fh, file_name, channels, samplerate, flags, NULL)) != SWITCH_STATUS_SUCCESS) {
86 | switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Unable to open file (%s)\n", file_name);
87 | goto out;
88 | }
89 |
90 | if((status = switch_core_file_write(&fh, buf, &len)) != SWITCH_STATUS_SUCCESS) {
91 | switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Unable to write (%s)\n", file_name);
92 | goto out;
93 | }
94 |
95 | switch_core_file_close(&fh);
96 | out:
97 | if(status != SWITCH_STATUS_SUCCESS) {
98 | if(file_name) {
99 | unlink(file_name);
100 | switch_safe_free(file_name);
101 | }
102 | return NULL;
103 | }
104 |
105 | return file_name;
106 | }
107 |
108 | void text_queue_clean(switch_queue_t *queue) {
109 | void *data = NULL;
110 |
111 | if(!queue || !switch_queue_size(queue)) {
112 | return;
113 | }
114 |
115 | while(switch_queue_trypop(queue, (void *)&data) == SWITCH_STATUS_SUCCESS) {
116 | switch_safe_free(data);
117 | }
118 | }
119 |
120 | char *parse_response(char *data, switch_stream_handle_t *stream) {
121 | char *result = NULL;
122 | cJSON *json = NULL;
123 |
124 | if(!data) {
125 | return NULL;
126 | }
127 |
128 | if(!(json = cJSON_Parse(data))) {
129 | switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Unable to parse json (%s)\n", data);
130 | if(stream) stream->write_function(stream, "-ERR: Unable to parse json (see log)\n");
131 | } else {
132 | cJSON *jres = cJSON_GetObjectItem(json, "error");
133 | if(jres) {
134 | switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Service returns error (%s)\n", data);
135 | if(stream) stream->write_function(stream, "-ERR: Service returns error (see log)\n");
136 | } else {
137 | cJSON *jres = cJSON_GetObjectItem(json, "text");
138 | if(jres) {
139 | result = strdup(jres->valuestring);
140 | }
141 | }
142 | }
143 |
144 | if(json) {
145 | cJSON_Delete(json);
146 | }
147 |
148 | return result;
149 | }
150 |
--------------------------------------------------------------------------------