├── Makefile ├── README.md └── mod_vad.c /Makefile: -------------------------------------------------------------------------------- 1 | #BASE=/usr/local/src/freeswitch-1.10.1.-release 2 | BASE=../../../../ 3 | include $(BASE)/build/modmake.rules 4 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # mod_vad 2 | a voice activity detection module for freeswitch. 3 | 4 | just put the module directory under the folder freeswitch-xx.xx.xx/src/mod/application/ and make! 5 | -------------------------------------------------------------------------------- /mod_vad.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | typedef struct { 5 | switch_core_session_t *session; 6 | switch_codec_implementation_t *read_impl; 7 | switch_media_bug_t *read_bug; 8 | switch_audio_resampler_t *read_resampler; 9 | 10 | int talking; 11 | int talked; 12 | int talk_hits; 13 | int listen_hits; 14 | int hangover; 15 | int hangover_len; 16 | int divisor; 17 | int thresh; 18 | int channels; 19 | int sample_rate; 20 | int debug; 21 | int _hangover_len; 22 | int _thresh; 23 | int _listen_hits; 24 | switch_vad_state_t vad_state; 25 | switch_vad_t *svad; 26 | } wavin_vad_t; 27 | 28 | #define VAD_MODULE_DESC "module voice activity detection(vad)" 29 | 30 | #define VAD_PRIVATE "_vad_" 31 | #define VAD_EVENT_SUBCLASS "vad::detection" 32 | 33 | SWITCH_MODULE_LOAD_FUNCTION(mod_vad_load); 34 | SWITCH_MODULE_SHUTDOWN_FUNCTION(mod_vad_shutdown); 35 | SWITCH_MODULE_DEFINITION(mod_vad, mod_vad_load, mod_vad_shutdown, NULL); 36 | 37 | // define vad functions 38 | SWITCH_STANDARD_APP(vad_app_function); 39 | static switch_bool_t fire_vad_event(switch_core_session_t *session, switch_vad_state_t vad_state); 40 | SWITCH_DECLARE(const char *) get_vad_state(switch_vad_state_t state); 41 | static switch_bool_t vad_audio_callback(switch_media_bug_t *bug, void *user_data, switch_abc_type_t type); 42 | SWITCH_STANDARD_API(vad_api_function); 43 | 44 | #define VAD_SYNTAX " " 45 | SWITCH_STANDARD_API(vad_api_function) 46 | { 47 | switch_core_session_t *target_session; 48 | 49 | char *lbuf = NULL, *argv[2]; 50 | int argc = 0; 51 | 52 | if (!zstr(cmd) && (lbuf = strdup(cmd)) && 53 | (argc = switch_separate_string(lbuf, ' ', argv, (sizeof(argv) / sizeof(argv[0])))) == 2) { 54 | if ((target_session = switch_core_session_locate(argv[1]))) { 55 | vad_app_function(target_session, argv[0]); 56 | switch_core_session_rwunlock(target_session); 57 | } 58 | } else { 59 | stream->write_function(stream, "-USAGE: %s\n", VAD_SYNTAX); 60 | return SWITCH_STATUS_SUCCESS; 61 | } 62 | 63 | return SWITCH_STATUS_SUCCESS; 64 | } 65 | // implements vad functions 66 | SWITCH_STANDARD_APP(vad_app_function) 67 | { 68 | switch_status_t status; 69 | wavin_vad_t *s_vad = NULL; 70 | switch_channel_t *channel = switch_core_session_get_channel(session); 71 | switch_codec_implementation_t imp = {0}; 72 | int flags = 0; 73 | int mode = -1; 74 | const char *var = NULL; 75 | int tmp; 76 | 77 | if (!zstr(data)) { 78 | switch_log_printf(SWITCH_CHANNEL_SESSION_LOG(session), SWITCH_LOG_DEBUG, "VAD input parameter %s\n", data); 79 | } 80 | 81 | if ((var = switch_channel_get_variable(channel, "vad_mode"))) { 82 | mode = atoi(var); 83 | if (mode > 3) mode = 3; 84 | } 85 | 86 | if (mode == -1) { mode = 0; } 87 | 88 | if ((s_vad = (wavin_vad_t *)switch_channel_get_private(channel, VAD_PRIVATE))) { 89 | if (!zstr(data) && !strcasecmp(data, "stop")) { 90 | switch_channel_set_private(channel, VAD_PRIVATE, NULL); 91 | if (s_vad->read_bug) { 92 | switch_core_media_bug_remove(session, &s_vad->read_bug); 93 | s_vad->read_bug = NULL; 94 | switch_core_session_reset(session, SWITCH_TRUE, SWITCH_TRUE); 95 | } 96 | switch_log_printf(SWITCH_CHANNEL_SESSION_LOG(session), SWITCH_LOG_DEBUG, "Stopped VAD detection\n"); 97 | } else { 98 | switch_log_printf(SWITCH_CHANNEL_SESSION_LOG(session), SWITCH_LOG_WARNING, 99 | "Cannot run vad detection 2 times on the same session!\n"); 100 | } 101 | return; 102 | } 103 | 104 | s_vad = switch_core_session_alloc(session, sizeof(*s_vad)); 105 | switch_assert(s_vad); 106 | memset(s_vad, 0, sizeof(*s_vad)); 107 | s_vad->session = session; 108 | 109 | switch_core_session_raw_read(session); 110 | switch_core_session_get_read_impl(session, &imp); 111 | switch_log_printf(SWITCH_CHANNEL_SESSION_LOG(session), SWITCH_LOG_INFO, "Read imp sample:[%u] channels:[%u].\n", 112 | imp.samples_per_second, imp.number_of_channels); 113 | s_vad->sample_rate = imp.samples_per_second ? imp.samples_per_second : 8000; 114 | s_vad->channels = imp.number_of_channels; 115 | 116 | s_vad->svad = switch_vad_init(s_vad->sample_rate, s_vad->channels); 117 | switch_assert(s_vad->svad); 118 | 119 | switch_vad_set_mode(s_vad->svad, mode); 120 | 121 | if ((var = switch_channel_get_variable(channel, "vad_debug"))) { 122 | tmp = atoi(var); 123 | 124 | if (tmp < 0) tmp = 0; 125 | if (tmp > 1) tmp = 1; 126 | 127 | switch_vad_set_param(s_vad->svad, "debug", tmp); 128 | } 129 | 130 | if ((var = switch_channel_get_variable(channel, "vad_silence_ms"))) { 131 | tmp = atoi(var); 132 | 133 | if (tmp > 0) switch_vad_set_param(s_vad->svad, "silence_ms", tmp); 134 | } 135 | 136 | if ((var = switch_channel_get_variable(channel, "vad_thresh"))) { 137 | tmp = atoi(var); 138 | 139 | if (tmp > 0) switch_vad_set_param(s_vad->svad, "thresh", tmp); 140 | } 141 | 142 | if ((var = switch_channel_get_variable(channel, "vad_voice_ms"))) { 143 | tmp = atoi(var); 144 | 145 | if (tmp > 0) switch_vad_set_param(s_vad->svad, "voice_ms", tmp); 146 | } 147 | 148 | flags = SMBF_READ_REPLACE | SMBF_ANSWER_REQ; 149 | status = 150 | switch_core_media_bug_add(session, "vad_read", NULL, vad_audio_callback, s_vad, 0, flags, &s_vad->read_bug); 151 | 152 | if (status != SWITCH_STATUS_SUCCESS) { 153 | switch_log_printf(SWITCH_CHANNEL_SESSION_LOG(session), SWITCH_LOG_ERROR, 154 | "Failed to attach vad to media stream!\n"); 155 | return; 156 | } 157 | 158 | switch_channel_set_private(channel, VAD_PRIVATE, s_vad); 159 | } 160 | 161 | SWITCH_MODULE_LOAD_FUNCTION(mod_vad_load) 162 | { 163 | switch_application_interface_t *app_interface; 164 | switch_api_interface_t *api_interface; 165 | 166 | switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "mod_vad loading...\n"); 167 | 168 | *module_interface = switch_loadable_module_create_module_interface(pool, modname); 169 | SWITCH_ADD_APP(app_interface, "vad", "voice activity detection(vad)", VAD_MODULE_DESC, vad_app_function, 170 | "", SAF_MEDIA_TAP); 171 | 172 | SWITCH_ADD_API(api_interface, "uuid_vad", "voice activity detection(vad)", vad_api_function, VAD_SYNTAX); 173 | 174 | switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "mod_vad loaded successful...\n"); 175 | return SWITCH_STATUS_SUCCESS; 176 | } 177 | 178 | SWITCH_MODULE_SHUTDOWN_FUNCTION(mod_vad_shutdown) 179 | { 180 | switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "mod_vad shutdown...\n"); 181 | return SWITCH_STATUS_SUCCESS; 182 | } 183 | 184 | // ·¢ËÍʼþ 185 | static switch_bool_t fire_vad_event(switch_core_session_t *session, switch_vad_state_t vad_state) 186 | { 187 | switch_event_t *event = NULL; 188 | switch_channel_t *channel = switch_core_session_get_channel(session); 189 | 190 | switch_log_printf(SWITCH_CHANNEL_SESSION_LOG(session), SWITCH_LOG_INFO, "Fire VAD event %s\n", 191 | get_vad_state(vad_state)); 192 | switch_event_create_subclass(&event, SWITCH_EVENT_CUSTOM, VAD_EVENT_SUBCLASS); 193 | if (event) { 194 | switch (vad_state) { 195 | case SWITCH_VAD_STATE_START_TALKING: 196 | switch_event_add_header_string(event, SWITCH_STACK_BOTTOM, "vad_state", "start_talking"); 197 | break; 198 | case SWITCH_VAD_STATE_STOP_TALKING: 199 | switch_event_add_header_string(event, SWITCH_STACK_BOTTOM, "vad_state", "stop_talking"); 200 | break; 201 | default: 202 | break; 203 | } 204 | switch_event_add_header_string(event, SWITCH_STACK_BOTTOM, "vender", "wavin"); 205 | switch_event_add_header_string(event, SWITCH_STACK_BOTTOM, "product_name", "vad"); 206 | switch_channel_event_set_data(channel, event); 207 | switch_event_fire(&event); 208 | } else { 209 | switch_log_printf(SWITCH_CHANNEL_SESSION_LOG(session), SWITCH_LOG_ERROR, 210 | "Failed to fire VAD Complete event %d\n", vad_state); 211 | } 212 | switch_event_destroy(&event); 213 | return SWITCH_TRUE; 214 | } 215 | 216 | SWITCH_DECLARE(const char *) get_vad_state(switch_vad_state_t state) 217 | { 218 | switch (state) { 219 | case SWITCH_VAD_STATE_NONE: 220 | return "none"; 221 | case SWITCH_VAD_STATE_START_TALKING: 222 | return "start_talking"; 223 | case SWITCH_VAD_STATE_TALKING: 224 | return "talking"; 225 | case SWITCH_VAD_STATE_STOP_TALKING: 226 | return "stop_talking"; 227 | default: 228 | return "error"; 229 | } 230 | } 231 | 232 | static switch_bool_t vad_audio_callback(switch_media_bug_t *bug, void *user_data, switch_abc_type_t type) 233 | { 234 | wavin_vad_t *vad = (wavin_vad_t *)user_data; 235 | switch_core_session_t *session = vad->session; 236 | switch_vad_state_t vad_state; 237 | switch_frame_t *linear_frame; 238 | uint32_t linear_len = 0; 239 | switch_channel_t *channel = switch_core_session_get_channel(session); 240 | 241 | if (!switch_channel_ready(channel)) { 242 | switch_log_printf(SWITCH_CHANNEL_SESSION_LOG(session), SWITCH_LOG_DEBUG, "Channel isn't ready\n"); 243 | return SWITCH_FALSE; 244 | } 245 | 246 | if (!switch_channel_media_ready(channel)) { 247 | switch_log_printf(SWITCH_CHANNEL_SESSION_LOG(session), SWITCH_LOG_DEBUG, "Channel codec isn't ready\n"); 248 | return SWITCH_FALSE; 249 | } 250 | 251 | switch (type) { 252 | case SWITCH_ABC_TYPE_INIT: 253 | switch_log_printf(SWITCH_CHANNEL_SESSION_LOG(session), SWITCH_LOG_INFO, 254 | "Starting VAD detection for audio stream\n"); 255 | break; 256 | case SWITCH_ABC_TYPE_CLOSE: 257 | if (vad->read_resampler) { switch_resample_destroy(&vad->read_resampler); } 258 | 259 | if (vad->svad) { 260 | switch_vad_destroy(&vad->svad); 261 | vad->svad = NULL; 262 | } 263 | 264 | switch_core_media_bug_flush(bug); 265 | switch_core_session_reset(session, SWITCH_TRUE, SWITCH_TRUE); 266 | switch_log_printf(SWITCH_CHANNEL_SESSION_LOG(session), SWITCH_LOG_INFO, 267 | "Stopping VAD detection for audio stream\n"); 268 | break; 269 | case SWITCH_ABC_TYPE_READ: 270 | case SWITCH_ABC_TYPE_READ_REPLACE: 271 | linear_frame = switch_core_media_bug_get_read_replace_frame(bug); 272 | linear_len = linear_frame->datalen; 273 | 274 | vad_state = switch_vad_process(vad->svad, linear_frame->data, linear_len / 2); 275 | if (vad_state == SWITCH_VAD_STATE_START_TALKING) { 276 | switch_log_printf(SWITCH_CHANNEL_SESSION_LOG(session), SWITCH_LOG_INFO, "START TALKING\n"); 277 | fire_vad_event(session, vad_state); 278 | } else if (vad_state == SWITCH_VAD_STATE_STOP_TALKING) { 279 | switch_log_printf(SWITCH_CHANNEL_SESSION_LOG(session), SWITCH_LOG_INFO, "STOP TALKING\n"); 280 | fire_vad_event(session, vad_state); 281 | switch_vad_reset(vad->svad); 282 | } else if (vad_state == SWITCH_VAD_STATE_TALKING) { 283 | switch_log_printf(SWITCH_CHANNEL_SESSION_LOG(session), SWITCH_LOG_DEBUG, "State - TALKING\n"); 284 | } 285 | break; 286 | default: 287 | break; 288 | } 289 | 290 | return SWITCH_TRUE; 291 | } 292 | --------------------------------------------------------------------------------