├── README.md ├── vad ├── Makefile ├── typedef.h ├── vadtest.c ├── wb_vad.c ├── wb_vad.h └── wb_vad_c.h └── vad_baidu ├── Makefile ├── base64.c ├── base64.h ├── cJSON.c ├── cJSON.h ├── http_request.c ├── http_request.h ├── https_request.c ├── https_request.h ├── typedef.h ├── vadtest.c ├── wb_vad.c ├── wb_vad.h └── wb_vad_c.h /README.md: -------------------------------------------------------------------------------- 1 | # vad 2 | VAD(voice activity detection) implement and using for baidu voice recognition 3 | -------------------------------------------------------------------------------- /vad/Makefile: -------------------------------------------------------------------------------- 1 | all:vadtest.c wb_vad.c 2 | gcc -o alsa_vad vadtest.c wb_vad.c -lasound -lm 3 | clean: 4 | rm -f alsa_vad 5 | -------------------------------------------------------------------------------- /vad/typedef.h: -------------------------------------------------------------------------------- 1 | /* 2 | *=================================================================== 3 | * 3GPP AMR Wideband Floating-point Speech Codec 4 | *=================================================================== 5 | */ 6 | #ifndef typedef_h 7 | #define typedef_h 8 | 9 | /* change these typedef declarations to correspond with your platform */ 10 | typedef char Word8; 11 | typedef unsigned char UWord8; 12 | typedef short Word16; 13 | typedef unsigned short UWord16; 14 | typedef long Word32; 15 | typedef double Float64; 16 | typedef float Float32; 17 | 18 | #endif 19 | -------------------------------------------------------------------------------- /vad/vadtest.c: -------------------------------------------------------------------------------- 1 | #include "stdio.h" 2 | #include "wb_vad.h" 3 | /* Use the newer ALSA API */ 4 | #define ALSA_PCM_NEW_HW_PARAMS_API 5 | #include 6 | #include 7 | 8 | void main(int argc,char* argv[]) 9 | { 10 | if(argc != 4){ 11 | printf("usage: alsa_vad pcm_device output record_period_seconds\nfor example: ./alsa_vad hw:1,0 out 10\n"); 12 | return; 13 | } 14 | 15 | char* pcm_device_name = argv[1]; 16 | char* output_file_name = argv[2]; 17 | int record_seconds = atoi(argv[3]); 18 | // for alsa 19 | long loops; 20 | int rc; 21 | int size; 22 | int size_one_channel; 23 | snd_pcm_t *handle; 24 | snd_pcm_hw_params_t *params; 25 | int dir; 26 | char *buffer; 27 | char *buffer_one_channel; 28 | int i; 29 | 30 | snd_pcm_stream_t stream = SND_PCM_STREAM_CAPTURE; 31 | snd_pcm_access_t mode = SND_PCM_ACCESS_RW_INTERLEAVED; 32 | snd_pcm_format_t format = SND_PCM_FORMAT_U16_LE; 33 | unsigned int channels = 2; 34 | unsigned int rate = 16000; 35 | snd_pcm_uframes_t frames = FRAME_LEN; 36 | int bit_per_sample,bit_per_frame,chunk_bytes; 37 | 38 | //for vda 39 | float indata[FRAME_LEN]; 40 | short outdata[FRAME_LEN]; 41 | VadVars *vadstate; 42 | char name[128]; 43 | int temp,vad; 44 | int recording = 0; 45 | int count = 0; 46 | FILE *fp = NULL; 47 | FILE *fp_all = NULL; 48 | // alsa init 49 | rc = snd_pcm_open(&handle, pcm_device_name,stream, 0); 50 | if (rc < 0) { 51 | fprintf(stderr,"unable to open pcm device: %s\n",snd_strerror(rc)); 52 | exit(1); 53 | } 54 | snd_pcm_hw_params_alloca(¶ms); 55 | snd_pcm_hw_params_any(handle, params); 56 | snd_pcm_hw_params_set_access(handle, params,mode); 57 | snd_pcm_hw_params_set_format(handle, params,format); 58 | snd_pcm_hw_params_set_channels(handle, params, channels); 59 | snd_pcm_hw_params_set_rate_near(handle, params,&rate, 0); 60 | snd_pcm_hw_params_set_period_size_near(handle,params, &frames, 0); 61 | rc = snd_pcm_hw_params(handle, params); 62 | if (rc < 0) { 63 | fprintf(stderr,"unable to set hw parameters: %s\n",snd_strerror(rc)); 64 | exit(1); 65 | } 66 | snd_pcm_hw_params_get_period_size(params,&frames, 0); 67 | size = frames * 4; /* 2 bytes/sample, 2 channels */ 68 | size_one_channel = frames * 2; 69 | buffer = (char*) malloc(size); 70 | buffer_one_channel = (char*) malloc(size_one_channel); 71 | 72 | //vad init 73 | wb_vad_init(&(vadstate)); 74 | sprintf(name,"%s.pcm",output_file_name); 75 | fp_all = fopen(name,"wb"); 76 | 77 | /* We want to loop for 5 seconds */ 78 | snd_pcm_hw_params_get_period_time(params,&rate, &dir); 79 | loops = record_seconds * 1000000 / rate; 80 | 81 | while (loops > 0) { 82 | loops--; 83 | rc = snd_pcm_readi(handle, buffer, frames); 84 | if (rc == -EPIPE) { 85 | /* EPIPE means overrun */ 86 | fprintf(stderr, "overrun occurred\n"); 87 | snd_pcm_prepare(handle); 88 | } else if (rc < 0) { 89 | fprintf(stderr, "error from read: %s\n",snd_strerror(rc)); 90 | } else if (rc != (int)frames) { 91 | fprintf(stderr, "short read, read %d frames\n", rc); 92 | } 93 | for(i = 0; i< frames; i++) 94 | { 95 | indata[i]=0; 96 | temp = 0; 97 | memcpy(&temp,buffer+4*i,2); 98 | indata[i]=(float)temp; 99 | outdata[i]=temp; 100 | if(indata[i]>65535/2) 101 | indata[i]=indata[i]-65536; 102 | } 103 | vad=wb_vad(vadstate,indata); 104 | if(vad == 1) 105 | recording = 1; 106 | else 107 | recording = 0; 108 | if(recording ==1 && fp == NULL) 109 | { 110 | sprintf(name,"%s.%d.pcm",output_file_name,count); 111 | fp=fopen(name,"wb"); 112 | } 113 | if(recording == 0 && fp != NULL) 114 | { 115 | fclose(fp); 116 | fp = NULL; 117 | count++; 118 | } 119 | if(fp != NULL && recording == 1) 120 | fwrite(outdata,2,FRAME_LEN,fp); 121 | if(fp_all != NULL) 122 | fwrite(outdata,2,FRAME_LEN,fp_all); 123 | } 124 | fcloseall(); 125 | } 126 | -------------------------------------------------------------------------------- /vad/wb_vad.c: -------------------------------------------------------------------------------- 1 | /****************************************************************************** 2 | * INCLUDE FILES 3 | ******************************************************************************/ 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include "wb_vad.h" 9 | /****************************************************************************** 10 | * PRIVATE PROGRAM CODE 11 | ******************************************************************************/ 12 | /****************************************************************************** 13 | * 14 | * Function : filter5 15 | * Purpose : Fifth-order half-band lowpass/highpass filter pair with 16 | * decimation. 17 | * 18 | ******************************************************************************* 19 | */ 20 | static void filter5( 21 | float *in0, /* i/o : input values; output low-pass part */ 22 | float *in1, /* i/o : input values; output high-pass part */ 23 | float data[] /* i/o : updated filter memory */ 24 | ) 25 | { 26 | float temp0, temp1, temp2; 27 | temp0 = *in0 - COEFF5_1 * data[0]; 28 | temp1 = data[0] + COEFF5_1 * temp0; 29 | data[0] = temp0; 30 | temp0 = *in1 - COEFF5_2 * data[1]; 31 | temp2 = data[1] + COEFF5_2 * temp0; 32 | data[1] = temp0; 33 | *in0 = (temp1 + temp2)/2.0f; 34 | *in1 = (temp1 - temp2)/2.0f; 35 | } 36 | /****************************************************************************** 37 | * 38 | * Function : filter3 39 | * Purpose : Third-order half-band lowpass/highpass filter pair with 40 | * decimation. 41 | * 42 | ******************************************************************************* 43 | */ 44 | static void filter3( 45 | float *in0, /* i/o : input values; output low-pass part */ 46 | float *in1, /* i/o : input values; output high-pass part */ 47 | float *data /* i/o : updated filter memory */ 48 | ) 49 | { 50 | float temp1, temp2; 51 | temp1 = *in1 - COEFF3 * *data; 52 | temp2 = *data + COEFF3 * temp1; 53 | *data = temp1; 54 | *in1 = (*in0 - temp2)/2.0f; 55 | *in0 = (*in0 + temp2)/2.0f; 56 | } 57 | /****************************************************************************** 58 | * 59 | * Function : level_calculation 60 | * Purpose : Calculate signal level in a sub-band. Level is calculated 61 | * by summing absolute values of the input data. 62 | * 63 | * Because speech coder has a lookahead, signal level calculated 64 | * over the lookahead (data[count1 - count2]) is stored (*sub_level) 65 | * and added to the level of the next frame. Additionally, group 66 | * delay and decimation of the filter bank is taken into the count 67 | * for the values of the counters (count1, count2). 68 | * 69 | ******************************************************************************* 70 | */ 71 | static float level_calculation( /* return: signal level */ 72 | float data[], /* i : signal buffer */ 73 | float *sub_level, /* i : level calculated at the end of the previous frame*/ 74 | /* o : level of signal calculated from the last */ 75 | /* (count2 - count1) samples */ 76 | Word16 count1, /* i : number of samples to be counted */ 77 | Word16 count2, /* i : number of samples to be counted */ 78 | Word16 ind_m, /* i : step size for the index of the data buffer */ 79 | Word16 ind_a, /* i : starting index of the data buffer */ 80 | float scale /* i : scaling for the level calculation */ 81 | ) 82 | { 83 | double l_temp1, l_temp2; 84 | float level; 85 | Word16 i; 86 | l_temp1 = 0.0; 87 | for (i = count1; i < count2; i++) 88 | { 89 | l_temp1 += fabs(data[ind_m*i+ind_a]); 90 | } 91 | l_temp1 *= 2.0f; 92 | l_temp2 = l_temp1 + *sub_level/scale; 93 | *sub_level = (float)(l_temp1*scale); 94 | for (i = 0; i < count1; i++) 95 | { 96 | l_temp2 += 2.0f*fabs(data[ind_m*i+ind_a]); 97 | } 98 | level = (float)(l_temp2*scale); 99 | return level; 100 | } 101 | /****************************************************************************** 102 | * 103 | * Function : filter_bank 104 | * Purpose : Divide input signal into bands and calculate level of 105 | * the signal in each band 106 | * 107 | ******************************************************************************* 108 | */ 109 | static void filter_bank( 110 | VadVars *st, /* i/o : State struct */ 111 | float in[], /* i : input frame */ 112 | float level[] /* 0 : signal levels at each band */ 113 | ) 114 | { 115 | Word16 i; 116 | float tmp_buf[FRAME_LEN]; 117 | /* shift input 1 bit down for safe scaling */ 118 | for (i = 0; i < FRAME_LEN; i++) { 119 | tmp_buf[i] = in[i]/2.0f; 120 | } 121 | /* run the filter bank */ 122 | for (i = 0;i < FRAME_LEN/2; i++) { 123 | filter5(&tmp_buf[2*i],&tmp_buf[2*i+1],st->a_data5[0]); 124 | } 125 | for (i = 0;i < FRAME_LEN/4; i++) { 126 | filter5(&tmp_buf[4*i],&tmp_buf[4*i+2],st->a_data5[1]); 127 | filter5(&tmp_buf[4*i+1],&tmp_buf[4*i+3],st->a_data5[2]); 128 | } 129 | for (i = 0; i < FRAME_LEN/8; i++) 130 | { 131 | filter5(&tmp_buf[8*i], &tmp_buf[8*i+4], st->a_data5[3]); 132 | filter5(&tmp_buf[8*i+2], &tmp_buf[8*i+6], st->a_data5[4]); 133 | filter3(&tmp_buf[8*i+3],&tmp_buf[8*i+7],&st->a_data3[0]); 134 | } 135 | for (i = 0; i < FRAME_LEN/16; i++) 136 | { 137 | filter3(&tmp_buf[16*i+0], &tmp_buf[16*i+8], &st->a_data3[1]); 138 | filter3(&tmp_buf[16*i+4], &tmp_buf[16*i+12], &st->a_data3[2]); 139 | filter3(&tmp_buf[16*i+6], &tmp_buf[16*i+14], &st->a_data3[3]); 140 | } 141 | for (i = 0; i < FRAME_LEN/32; i++) 142 | { 143 | filter3(&tmp_buf[32*i+0], &tmp_buf[32*i+16], &st->a_data3[4]); 144 | filter3(&tmp_buf[32*i+8], &tmp_buf[32*i+24], &st->a_data3[5]); 145 | } 146 | /* calculate levels in each frequency band */ 147 | /* 4800 - 6400 Hz*/ 148 | level[11] = level_calculation(tmp_buf, &st->sub_level[11], 149 | FRAME_LEN/4-48, FRAME_LEN/4, 4, 1, 0.25); 150 | /* 4000 - 4800 Hz*/ 151 | level[10] = level_calculation(tmp_buf, &st->sub_level[10], 152 | FRAME_LEN/8-24, FRAME_LEN/8, 8, 7, 0.5); 153 | /* 3200 - 4000 Hz*/ 154 | level[9] = level_calculation(tmp_buf, &st->sub_level[9], 155 | FRAME_LEN/8-24, FRAME_LEN/8, 8, 3, 0.5); 156 | /* 2400 - 3200 Hz*/ 157 | level[8] = level_calculation(tmp_buf, &st->sub_level[8], 158 | FRAME_LEN/8-24, FRAME_LEN/8, 8, 2, 0.5); 159 | /* 2000 - 2400 Hz*/ 160 | level[7] = level_calculation(tmp_buf, &st->sub_level[7], 161 | FRAME_LEN/16-12, FRAME_LEN/16, 16, 14, 1.0); 162 | /* 1600 - 2000 Hz*/ 163 | level[6] = level_calculation(tmp_buf, &st->sub_level[6], 164 | FRAME_LEN/16-12, FRAME_LEN/16, 16, 6, 1.0); 165 | /* 1200 - 1600 Hz*/ 166 | level[5] = level_calculation(tmp_buf, &st->sub_level[5], 167 | FRAME_LEN/16-12, FRAME_LEN/16, 16, 4, 1.0); 168 | /* 800 - 1200 Hz*/ 169 | level[4] = level_calculation(tmp_buf, &st->sub_level[4], 170 | FRAME_LEN/16-12, FRAME_LEN/16, 16, 12, 1.0); 171 | /* 600 - 800 Hz*/ 172 | level[3] = level_calculation(tmp_buf, &st->sub_level[3], 173 | FRAME_LEN/32-6, FRAME_LEN/32, 32, 8, 2.0); 174 | /* 400 - 600 Hz*/ 175 | level[2] = level_calculation(tmp_buf, &st->sub_level[2], 176 | FRAME_LEN/32-6, FRAME_LEN/32, 32, 24, 2.0); 177 | /* 200 - 400 Hz*/ 178 | level[1] = level_calculation(tmp_buf, &st->sub_level[1], 179 | FRAME_LEN/32-6, FRAME_LEN/32, 32, 16, 2.0); 180 | /* 0 - 200 Hz*/ 181 | level[0] = level_calculation(tmp_buf, &st->sub_level[0], 182 | FRAME_LEN/32-6, FRAME_LEN/32, 32, 0, 2.0); 183 | } 184 | /****************************************************************************** 185 | * 186 | * Function : update_cntrl 187 | * Purpose : Control update of the background noise estimate. 188 | * 189 | ******************************************************************************* 190 | */ 191 | static void update_cntrl( 192 | VadVars *st, /* i/o : State structure */ 193 | float level[] /* i : sub-band levels of the input frame */ 194 | ) 195 | { 196 | Word16 i; 197 | float stat_rat; 198 | float num, denom; 199 | float alpha; 200 | /* if fullband pitch or tone have been detected for a while, initialize stat_count */ 201 | if ((st->pitch_tone & 0x7c00) == 0x7c00) 202 | { 203 | st->stat_count = STAT_COUNT; 204 | } 205 | else 206 | { 207 | /* if 8 last vad-decisions have been "0", reinitialize stat_count */ 208 | if ((st->vadreg & 0x7f80) == 0) 209 | { 210 | st->stat_count = STAT_COUNT; 211 | } 212 | else 213 | { 214 | stat_rat = 0; 215 | for (i = 0; i < COMPLEN; i++) 216 | { 217 | if (level[i] > st->ave_level[i]) 218 | { 219 | num = level[i]; 220 | denom = st->ave_level[i]; 221 | } 222 | else 223 | { 224 | num = st->ave_level[i]; 225 | denom = level[i]; 226 | } 227 | /* Limit nimimum value of num and denom to STAT_THR_LEVEL */ 228 | if (num < STAT_THR_LEVEL) 229 | { 230 | num = STAT_THR_LEVEL; 231 | } 232 | if (denom < STAT_THR_LEVEL) 233 | { 234 | denom = STAT_THR_LEVEL; 235 | } 236 | stat_rat += num/denom * 64; 237 | } 238 | /* compare stat_rat with a threshold and update stat_count */ 239 | if (stat_rat > STAT_THR) 240 | { 241 | st->stat_count = STAT_COUNT; 242 | } 243 | else 244 | { 245 | if ((st->vadreg & 0x4000) != 0) 246 | { 247 | if (st->stat_count != 0) 248 | { 249 | st->stat_count--; 250 | } 251 | } 252 | } 253 | } 254 | } 255 | /* Update average amplitude estimate for stationarity estimation */ 256 | alpha = ALPHA4; 257 | if (st->stat_count == STAT_COUNT) 258 | { 259 | alpha = 1.0; 260 | } 261 | else if ((st->vadreg & 0x4000) == 0) 262 | { 263 | alpha = ALPHA5; 264 | } 265 | for (i = 0; i < COMPLEN; i++) 266 | { 267 | st->ave_level[i] += alpha *(level[i]- st->ave_level[i]); 268 | } 269 | } 270 | /****************************************************************************** 271 | * 272 | * Function : hangover_addition 273 | * Purpose : Add hangover after speech bursts 274 | * 275 | ******************************************************************************* 276 | */ 277 | static Word16 hangover_addition( /* return: VAD_flag indicating final VAD decision */ 278 | VadVars *st, /* i/o : State structure */ 279 | Word16 low_power, /* i : flag power of the input frame */ 280 | Word16 hang_len, /* i : hangover length */ 281 | Word16 burst_len /* i : minimum burst length for hangover addition */ 282 | ) 283 | { 284 | /* if the input power (pow_sum) is lower than a threshold, clear 285 | counters and set VAD_flag to "0" "fast exit" */ 286 | if (low_power != 0) 287 | { 288 | st->burst_count = 0; 289 | st->hang_count = 0; 290 | return 0; 291 | } 292 | /* update the counters (hang_count, burst_count) */ 293 | if ((st->vadreg & 0x4000) != 0) 294 | { 295 | st->burst_count++; 296 | if (st->burst_count >= burst_len) 297 | { 298 | st->hang_count = hang_len; 299 | } 300 | return 1; 301 | } 302 | else 303 | { 304 | st->burst_count = 0; 305 | if (st->hang_count > 0) 306 | { 307 | st->hang_count--; 308 | return 1; 309 | } 310 | } 311 | return 0; 312 | } 313 | /****************************************************************************** 314 | * 315 | * Function : noise_estimate_update 316 | * Purpose : Update of background noise estimate 317 | * 318 | ******************************************************************************* 319 | */ 320 | static void noise_estimate_update( 321 | VadVars *st, /* i/o : State structure */ 322 | float level[] /* i : sub-band levels of the input frame */ 323 | ) 324 | { 325 | Word16 i; 326 | float alpha_up, alpha_down, bckr_add, temp; 327 | /* Control update of bckr_est[] */ 328 | update_cntrl(st, level); 329 | /* Choose update speed */ 330 | bckr_add = 2.0; 331 | if ((0x7800 & st->vadreg) == 0) 332 | { 333 | alpha_up = ALPHA_UP1; 334 | alpha_down = ALPHA_DOWN1; 335 | } 336 | else 337 | { 338 | if (st->stat_count == 0) 339 | { 340 | alpha_up = ALPHA_UP2; 341 | alpha_down = ALPHA_DOWN2; 342 | } 343 | else 344 | { 345 | alpha_up = 0.0; 346 | alpha_down = ALPHA3; 347 | bckr_add = 0.0; 348 | } 349 | } 350 | /* Update noise estimate (bckr_est) */ 351 | for (i = 0; i < COMPLEN; i++) 352 | { 353 | temp = st->old_level[i] - st->bckr_est[i]; 354 | if (temp < 0.0) 355 | { /* update downwards*/ 356 | st->bckr_est[i] += -2 + (alpha_down * temp); 357 | /* limit minimum value of the noise estimate to NOISE_MIN */ 358 | if (st->bckr_est[i] < NOISE_MIN) 359 | { 360 | st->bckr_est[i] = NOISE_MIN; 361 | } 362 | } 363 | else 364 | { /* update upwards */ 365 | st->bckr_est[i] += bckr_add +(alpha_up * temp); 366 | /* limit maximum value of the noise estimate to NOISE_MAX */ 367 | if (st->bckr_est[i] > NOISE_MAX) 368 | { 369 | st->bckr_est[i] = NOISE_MAX; 370 | } 371 | } 372 | } 373 | /* Update signal levels of the previous frame (old_level) */ 374 | for(i = 0; i < COMPLEN; i++) 375 | { 376 | st->old_level[i] = level[i]; 377 | } 378 | } 379 | /****************************************************************************** 380 | * 381 | * Function : vad_decision 382 | * Purpose : Calculates VAD_flag 383 | * 384 | ******************************************************************************* 385 | */ 386 | static Word16 vad_decision( /*return value : VAD_flag */ 387 | VadVars *st, /* i/o : State structure */ 388 | float level[COMPLEN], /* i : sub-band levels of the input frame */ 389 | double pow_sum /* i : power of the input frame */ 390 | ) 391 | { 392 | Word16 i; 393 | double L_snr_sum; 394 | double L_temp; 395 | float vad_thr, temp, noise_level; 396 | Word16 low_power_flag; 397 | Word16 hang_len,burst_len; 398 | float ilog2_speech_level,ilog2_noise_level; 399 | float temp2; 400 | /* 401 | Calculate squared sum of the input levels (level) 402 | divided by the background noise components (bckr_est). 403 | */ 404 | L_snr_sum = 0.0; 405 | for (i = 0; i < COMPLEN; i++) 406 | { 407 | temp = level[i]/st->bckr_est[i]; 408 | L_snr_sum += temp * temp; 409 | } 410 | /* Calculate average level of estimated background noise */ 411 | L_temp = 0.0; 412 | for (i = 1; i < COMPLEN; i++) /* ignore lowest band */ 413 | { 414 | L_temp += st->bckr_est[i]; 415 | } 416 | noise_level = (float)(L_temp/16.0f); 417 | /* 418 | if SNR is lower than a threshold (MIN_SPEECH_SNR), 419 | and increase speech_level 420 | */ 421 | temp = noise_level*MIN_SPEECH_SNR*8; 422 | if (st->speech_level < temp) { 423 | st->speech_level = temp; 424 | } 425 | ilog2_noise_level = (float)(-1024.0f*log10(noise_level / 2147483648.0f)/log10(2.0f)); 426 | /* 427 | If SNR is very poor, speech_level is probably corrupted by noise level. This 428 | is correctred by subtracting -MIN_SPEECH_SNR*noise_level from speech level 429 | */ 430 | ilog2_speech_level = (float)(-1024.0f*log10((st->speech_level-temp) / 2147483648.0f)/log10(2.0f)); 431 | /*ilog2_speech_level = ilog2(st->speech_level);*/ 432 | temp = NO_SLOPE * (ilog2_noise_level- NO_P1)+ THR_HIGH; 433 | temp2 = SP_CH_MIN + SP_SLOPE*(ilog2_speech_level - SP_P1); 434 | if (temp2 < SP_CH_MIN) { 435 | temp2 = SP_CH_MIN; 436 | } 437 | if (temp2 > SP_CH_MAX) { 438 | temp2 = SP_CH_MAX; 439 | } 440 | vad_thr = temp + temp2; 441 | if (vad_thr < THR_MIN) 442 | { 443 | vad_thr = THR_MIN; 444 | } 445 | /* Shift VAD decision register */ 446 | st->vadreg = (short)((st->vadreg)>>1); 447 | /* Make intermediate VAD decision */ 448 | if (L_snr_sum > (vad_thr*(float)COMPLEN/128.0f)) 449 | { 450 | st->vadreg = (Word16)(st->vadreg | 0x4000); 451 | } 452 | /* primary vad decsion made */ 453 | /* check if the input power (pow_sum) is lower than a threshold" */ 454 | printf("power sum=%ld\n",pow_sum); 455 | if (pow_sum < VAD_POW_LOW) 456 | { 457 | low_power_flag = 1; 458 | } 459 | else 460 | { 461 | low_power_flag = 0; 462 | } 463 | /* Update speech subband background noise estimates */ 464 | noise_estimate_update(st, level); 465 | hang_len = (Word16)((Word16)(HANG_SLOPE * (vad_thr - HANG_P1) - 0.5) + HANG_HIGH); 466 | if (hang_len < HANG_LOW) { 467 | hang_len = HANG_LOW; 468 | }; 469 | burst_len = (Word16)((Word16)(BURST_SLOPE * (vad_thr - BURST_P1) - 0.5) + BURST_HIGH); 470 | return(hangover_addition(st, low_power_flag, hang_len,burst_len)); 471 | } 472 | /****************************************************************************** 473 | * 474 | * Estimate_Speech() 475 | * Purpose : Estimate speech level 476 | * 477 | * Maximum signal level is searched and stored to the variable sp_max. 478 | * The speech frames must locate within SP_EST_COUNT number of frames to be counted. 479 | * Thus, noisy frames having occasional VAD = "1" decisions will not 480 | * affect to the estimated speech_level. 481 | * 482 | ******************************************************************************* 483 | */ 484 | static void Estimate_Speech( 485 | VadVars *st, /* i/o : State structure */ 486 | float in_level /* level of the input frame */ 487 | ) 488 | { 489 | float alpha, tmp; 490 | /* if the required activity count cannot be achieved, reset counters */ 491 | if (SP_ACTIVITY_COUNT > (SP_EST_COUNT - st->sp_est_cnt + st->sp_max_cnt)) 492 | { 493 | st->sp_est_cnt = 0; 494 | st->sp_max = 0.0; 495 | st->sp_max_cnt = 0; 496 | } 497 | st->sp_est_cnt++; 498 | if (((st->vadreg & 0x4000) || (in_level > st->speech_level)) 499 | && (in_level > MIN_SPEECH_LEVEL1)) 500 | { 501 | if (in_level > st->sp_max) { 502 | st->sp_max = in_level; 503 | } 504 | st->sp_max_cnt++; 505 | if (st->sp_max_cnt >= SP_ACTIVITY_COUNT) { 506 | tmp = st->sp_max/2.0f; /* scale to get "average" speech level*/ 507 | if (tmp > st->speech_level) { 508 | alpha = ALPHA_SP_UP; 509 | } 510 | else { 511 | alpha = ALPHA_SP_DOWN; 512 | } 513 | if (tmp > MIN_SPEECH_LEVEL2) { 514 | st->speech_level += alpha*(tmp - st->speech_level); 515 | } 516 | st->sp_max = 0.0; 517 | st->sp_max_cnt = 0; 518 | st->sp_est_cnt = 0; 519 | } 520 | } 521 | } 522 | /****************************************************************************** 523 | * PUBLIC PROGRAM CODE 524 | ******************************************************************************/ 525 | /****************************************************************************** 526 | * 527 | * Function: wb_vad_init 528 | * Purpose: Allocates state memory and initializes state memory 529 | * 530 | ******************************************************************************* 531 | */ 532 | int wb_vad_init ( /* return: non-zero with error, zero for ok. */ 533 | VadVars **state /* i/o : State structure */ 534 | ) 535 | { 536 | VadVars* s; 537 | if (state == (VadVars **) NULL){ 538 | fprintf(stderr, "vad_init: invalid parameter\n"); 539 | return -1; 540 | } 541 | *state = NULL; 542 | /* allocate memory */ 543 | if ((s = (VadVars *) malloc(sizeof(VadVars))) == NULL){ 544 | fprintf(stderr, "vad_init: can not malloc state structure\n"); 545 | return -1; 546 | } 547 | wb_vad_reset(s); 548 | *state = s; 549 | return 0; 550 | } 551 | /****************************************************************************** 552 | * 553 | * Function: wb_vad_reset 554 | * Purpose: Initializes state memory to zero 555 | * 556 | ******************************************************************************* 557 | ******************************************************************************* 558 | */ 559 | int wb_vad_reset ( /* return: non-zero with error, zero for ok. */ 560 | VadVars *state /* i/o : State structure */ 561 | ) 562 | { 563 | Word16 i, j; 564 | if (state == (VadVars *) NULL){ 565 | fprintf(stderr, "vad_reset: invalid parameter\n"); 566 | return -1; 567 | } 568 | /* Initialize pitch detection variables */ 569 | state->pitch_tone = 0; 570 | state->vadreg = 0; 571 | state->hang_count = 0; 572 | state->burst_count = 0; 573 | state->hang_count = 0; 574 | /* initialize memory used by the filter bank */ 575 | for (i = 0; i < F_5TH_CNT; i++) 576 | { 577 | for (j = 0; j < 2; j++) 578 | { 579 | state->a_data5[i][j] = 0.0; 580 | } 581 | } 582 | for (i = 0; i < F_3TH_CNT; i++) 583 | { 584 | state->a_data3[i] = 0.0; 585 | } 586 | /* initialize the rest of the memory */ 587 | for (i = 0; i < COMPLEN; i++) 588 | { 589 | state->bckr_est[i] = NOISE_INIT; 590 | state->old_level[i] = NOISE_INIT; 591 | state->ave_level[i] = NOISE_INIT; 592 | state->sub_level[i] = 0; 593 | state->level[i] = 0.0; 594 | state->prevLevel[i] = 0.0; 595 | } 596 | state->sp_est_cnt = 0; 597 | state->sp_max = 0; 598 | state->sp_max_cnt = 0; 599 | state->speech_level = SPEECH_LEVEL_INIT; 600 | state->prev_pow_sum = 0; 601 | return 0; 602 | } 603 | /****************************************************************************** 604 | * 605 | * Function: wb_vad_exit 606 | * Purpose: The memory used for state memory is freed 607 | * 608 | ******************************************************************************* 609 | ******************************************************************************* 610 | */ 611 | void wb_vad_exit ( 612 | VadVars **state /* i/o : State structure */ 613 | ) 614 | { 615 | if (state == NULL || *state == NULL) 616 | return; 617 | /* deallocate memory */ 618 | free(*state); 619 | *state = NULL; 620 | return; 621 | } 622 | /****************************************************************************** 623 | * 624 | * Function : wb_vad_tone_detection 625 | * Purpose : Set tone flag if pitch gain is high. This is used to detect 626 | * signaling tones and other signals with high pitch gain. 627 | * 628 | ******************************************************************************* 629 | */ 630 | void wb_vad_pitch_tone_detection ( 631 | VadVars *st, /* i/o : State struct */ 632 | float p_gain /* pitch gain */ 633 | ) 634 | { 635 | /* update tone flag and pitch flag */ 636 | st->pitch_tone = (Word16)((st->pitch_tone)>>1); 637 | /* if (pitch_gain > TONE_THR) 638 | set tone flag 639 | */ 640 | if (p_gain > TONE_THR) 641 | { 642 | st->pitch_tone = (Word16)(st->pitch_tone | 0x4000); 643 | } 644 | } 645 | /****************************************************************************** 646 | * 647 | * Function : wb_vad 648 | * Purpose : Main program for Voice Activity Detection (VAD) for AMR 649 | * 650 | ******************************************************************************* 651 | */ 652 | Word16 wb_vad( /* Return value : VAD Decision, 1 = speech, 0 = noise */ 653 | VadVars *st, /* i/o : State structure */ 654 | float in_buf[] /* i : samples of the input frame */ 655 | ) 656 | { 657 | Word16 i; 658 | Word16 VAD_flag; 659 | float temp; 660 | double L_temp, pow_sum; 661 | for(i=0;iprevLevel[i] = st->level[i]; 663 | } 664 | /* Calculate power of the input frame. */ 665 | L_temp = 0.0; 666 | for (i = 0; i < FRAME_LEN; i++) 667 | { 668 | L_temp += in_buf[i] * in_buf[i]; 669 | } 670 | L_temp *= 2.0; 671 | /* pow_sum = power of current frame and previous frame */ 672 | pow_sum = L_temp + st->prev_pow_sum; 673 | /* save power of current frame for next call */ 674 | st->prev_pow_sum = L_temp; 675 | /* If input power is very low, clear tone flag */ 676 | if (pow_sum < POW_PITCH_TONE_THR) 677 | { 678 | st->pitch_tone = (Word16)(st->pitch_tone & 0x1fff); 679 | } 680 | /* Run the filter bank and calculate signal levels at each band */ 681 | filter_bank(st, in_buf, st->level); 682 | /* compute VAD decision */ 683 | VAD_flag = vad_decision(st, st->level, pow_sum); 684 | /* Calculate input level */ 685 | L_temp = 0.0; 686 | for (i = 1; i < COMPLEN; i++) /* ignore lowest band */ 687 | { 688 | L_temp += st->level[i]; 689 | } 690 | temp = (float)(L_temp/16.0f); 691 | Estimate_Speech(st, temp); /* Estimate speech level */ 692 | return(VAD_flag); 693 | } 694 | -------------------------------------------------------------------------------- /vad/wb_vad.h: -------------------------------------------------------------------------------- 1 | #ifndef wb_vad_h 2 | #define wb_vad_h 3 | 4 | /****************************************************************************** 5 | * INCLUDE FILES 6 | ******************************************************************************/ 7 | #include "typedef.h" 8 | #include "wb_vad_c.h" 9 | 10 | /****************************************************************************** 11 | * DEFINITION OF DATA TYPES 12 | ******************************************************************************/ 13 | 14 | typedef struct 15 | { 16 | float bckr_est[COMPLEN]; /* background noise estimate */ 17 | float ave_level[COMPLEN]; /* averaged input components for stationary */ 18 | /* estimation */ 19 | float old_level[COMPLEN]; /* input levels of the previous frame */ 20 | float sub_level[COMPLEN]; /* input levels calculated at the end of a frame (lookahead) */ 21 | float a_data5[F_5TH_CNT][2]; /* memory for the filter bank */ 22 | float a_data3[F_3TH_CNT]; /* memory for the filter bank */ 23 | 24 | Word16 burst_count; /* counts length of a speech burst */ 25 | Word16 hang_count; /* hangover counter */ 26 | Word16 stat_count; /* stationary counter */ 27 | 28 | /* Note that each of the following two variables holds 15 flags. Each flag reserves 1 bit of the variable. The newest flag is 29 | * in the bit 15 (assuming that LSB is bit 1 and MSB is bit 16). */ 30 | Word16 vadreg; /* flags for intermediate VAD decisions */ 31 | Word16 pitch_tone; /* flags for pitch and tone detection */ 32 | 33 | Word16 sp_est_cnt; /* counter for speech level estimation */ 34 | float sp_max; /* maximum level */ 35 | Word16 sp_max_cnt; /* counts frames that contains speech */ 36 | float speech_level; /* estimated speech level */ 37 | double prev_pow_sum; /* power of previous frame */ 38 | 39 | float level[COMPLEN]; 40 | float prevLevel[COMPLEN]; 41 | } VadVars; 42 | 43 | /* 44 | ******************************************************************************** 45 | * DECLARATION OF PROTOTYPES 46 | ******************************************************************************** 47 | */ 48 | int wb_vad_init(VadVars ** st); 49 | int wb_vad_reset(VadVars * st); 50 | void wb_vad_exit(VadVars ** st); 51 | void wb_vad_pitch_tone_detection(VadVars * st, float p_gain); 52 | Word16 wb_vad(VadVars * st, float in_buf[]); 53 | 54 | #endif 55 | -------------------------------------------------------------------------------- /vad/wb_vad_c.h: -------------------------------------------------------------------------------- 1 | #ifndef wb_vad_c_h 2 | #define wb_vad_c_h 3 | 4 | /* */ 5 | #define FRAME_LEN 256 /* Length (samples) of the input frame */ 6 | #define COMPLEN 12 /* Number of sub-bands used by VAD */ 7 | 8 | #define UNIRSHFT 7 /* = log2(MAX_16/UNITY), UNITY = 256 */ 9 | #define SCALE 128 /* (UNITY*UNITY)/512 */ 10 | 11 | #define TONE_THR 0.65f /* Threshold for tone detection */ 12 | 13 | /* constants for speech level estimation */ 14 | #define SP_EST_COUNT 80 15 | #define SP_ACTIVITY_COUNT 25 16 | #define LOG2_SP_ACTIVITY_COUNT 5 17 | #define ALPHA_SP_UP (1.0f - 0.85f) 18 | #define ALPHA_SP_DOWN (1.0f - 0.85f) 19 | 20 | #define NOM_LEVEL 2050.0f /* about -26 dBov */ 21 | #define SPEECH_LEVEL_INIT NOM_LEVEL 22 | #define MIN_SPEECH_LEVEL1 (NOM_LEVEL * 0.063f) /* NOM_LEVEL -24 dB */ 23 | #define MIN_SPEECH_LEVEL2 (NOM_LEVEL * 0.2f) /* NOM_LEVEL -14 dB */ 24 | #define MIN_SPEECH_SNR 0.125f /* 0 dB, lowest SNR estimation */ 25 | 26 | /* Constants for background spectrum update */ 27 | #define ALPHA_UP1 (1.0f - 0.95f) /* Normal update, upwards: */ 28 | #define ALPHA_DOWN1 (1.0f - 0.936f) /* Normal update, downwards */ 29 | #define ALPHA_UP2 (1.0f - 0.985f) /* Forced update, upwards */ 30 | #define ALPHA_DOWN2 (1.0f - 0.943f) /* Forced update, downwards */ 31 | #define ALPHA3 (1.0f - 0.95f) /* Update downwards */ 32 | #define ALPHA4 (1.0f - 0.9f) /* For stationary estimation */ 33 | #define ALPHA5 (1.0f - 0.5f) /* For stationary estimation */ 34 | 35 | /* Constants for VAD threshold */ 36 | #define THR_MIN (1.6f*SCALE) /* Minimum threshold */ 37 | 38 | #define THR_HIGH (6.0f*SCALE) /* Highest threshold */ 39 | #define THR_LOW (1.7f*SCALE) /* Lowest threshold */ 40 | 41 | #define NO_P1 31744.0f /* ilog2(1), Noise level for highest threshold */ 42 | #define NO_P2 19786.0f /* ilog2(0.1, Noise level for lowest threshold */ 43 | 44 | #define NO_SLOPE ((float)(THR_LOW - THR_HIGH)/(float)(NO_P2 - NO_P1)) 45 | 46 | #define SP_CH_MIN (-0.75f*SCALE) 47 | #define SP_CH_MAX (0.75f*SCALE) 48 | 49 | #define SP_P1 22527.0f /* ilog2(NOM_LEVEL/4) */ 50 | #define SP_P2 17832.0f /* ilog2(NOM_LEVEL*4) */ 51 | 52 | #define SP_SLOPE ((float)(SP_CH_MAX - SP_CH_MIN)/(float)(SP_P2 - SP_P1)) 53 | 54 | /* Constants for hangover length */ 55 | #define HANG_HIGH 12 /* longest hangover */ 56 | #define HANG_LOW 2 /* shortest hangover */ 57 | #define HANG_P1 THR_LOW /* threshold for longest hangover */ 58 | #define HANG_P2 (4*SCALE) /* threshold for shortest hangover */ 59 | #define HANG_SLOPE ((float)(HANG_LOW-HANG_HIGH)/(float)(HANG_P2-HANG_P1)) 60 | 61 | /* Constants for burst length */ 62 | #define BURST_HIGH 8 /* longest burst length */ 63 | #define BURST_LOW 3 /* shortest burst length */ 64 | #define BURST_P1 THR_HIGH /* threshold for longest burst */ 65 | #define BURST_P2 THR_LOW /* threshold for shortest burst */ 66 | #define BURST_SLOPE ((float)(BURST_LOW-BURST_HIGH)/(float)(BURST_P2-BURST_P1)) 67 | 68 | /* Parameters for background spectrum recovery function */ 69 | #define STAT_COUNT 20 /* threshold of stationary detection counter */ 70 | 71 | #define STAT_THR_LEVEL 184 /* Threshold level for stationarity detection */ 72 | #define STAT_THR 1000 /* Threshold for stationarity detection */ 73 | 74 | /* Limits for background noise estimate */ 75 | 76 | #define NOISE_MIN 40 /* minimum */ 77 | #define NOISE_MAX 20000 /* maximum */ 78 | #define NOISE_INIT 150 /* initial */ 79 | 80 | /* Thresholds for signal power (now calculated on 2 frames) */ 81 | #define VAD_POW_LOW 30000.0f /* If input power is lower than this, VAD is set to 0 */ 82 | #define POW_PITCH_TONE_THR 686080.0f /* If input power is lower, pitch */ 83 | /* detection is ignored */ 84 | 85 | /* Constants for the filter bank */ 86 | #define COEFF3 0.407806f /* coefficient for the 3rd order filter */ 87 | #define COEFF5_1 0.670013f /* 1st coefficient the for 5th order filter */ 88 | #define COEFF5_2 0.195007f /* 2nd coefficient the for 5th order filter */ 89 | #define F_5TH_CNT 5 /* number of 5th order filters */ 90 | #define F_3TH_CNT 6 /* number of 3th order filters */ 91 | 92 | #endif 93 | 94 | -------------------------------------------------------------------------------- /vad_baidu/Makefile: -------------------------------------------------------------------------------- 1 | all:vadtest.c wb_vad.c http_request.c https_request.c base64.c cJSON.c 2 | gcc -o vad_baidu vadtest.c wb_vad.c http_request.c https_request.c base64.c cJSON.c -lcurl -lasound -lm 3 | clean: 4 | rm -f vad_baidu 5 | -------------------------------------------------------------------------------- /vad_baidu/base64.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include "base64.h" 4 | #include 5 | 6 | const char base[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/="; 7 | static char find_pos(char ch); 8 | /* */ 9 | char *base64_encode(const char* data, int data_len) 10 | { 11 | //int data_len = strlen(data); 12 | int prepare = 0; 13 | int ret_len; 14 | int temp = 0; 15 | char *ret = NULL; 16 | char *f = NULL; 17 | int tmp = 0; 18 | char changed[4]; 19 | int i = 0; 20 | ret_len = data_len / 3; 21 | temp = data_len % 3; 22 | if (temp > 0) 23 | { 24 | ret_len += 1; 25 | } 26 | ret_len = ret_len*4 + 1; 27 | ret = (char *)malloc(ret_len); 28 | 29 | if ( ret == NULL) 30 | { 31 | printf("No enough memory.\n"); 32 | exit(0); 33 | } 34 | memset(ret, 0, ret_len); 35 | f = ret; 36 | while (tmp < data_len) 37 | { 38 | temp = 0; 39 | prepare = 0; 40 | memset(changed, '\0', 4); 41 | while (temp < 3) 42 | { 43 | //printf("tmp = %d\n", tmp); 44 | if (tmp >= data_len) 45 | { 46 | break; 47 | } 48 | prepare = ((prepare << 8) | (data[tmp] & 0xFF)); 49 | tmp++; 50 | temp++; 51 | } 52 | prepare = (prepare<<((3-temp)*8)); 53 | //printf("before for : temp = %d, prepare = %d\n", temp, prepare); 54 | for (i = 0; i < 4 ;i++ ) 55 | { 56 | if (temp < i) 57 | { 58 | changed[i] = 0x40; 59 | } 60 | else 61 | { 62 | changed[i] = (prepare>>((3-i)*6)) & 0x3F; 63 | } 64 | *f = base[(unsigned int)changed[i]]; 65 | //printf("%.2X", changed[i]); 66 | f++; 67 | } 68 | } 69 | *f = '\0'; 70 | 71 | return ret; 72 | 73 | } 74 | /* */ 75 | static char find_pos(char ch) 76 | { 77 | char *ptr = (char*)strrchr(base, ch);//the last position (the only) in base[] 78 | return (ptr - base); 79 | } 80 | /* */ 81 | char *base64_decode(const char *data, int data_len) 82 | { 83 | int ret_len = (data_len / 4) * 3; 84 | int equal_count = 0; 85 | char *ret = NULL; 86 | char *f = NULL; 87 | int tmp = 0; 88 | int temp = 0; 89 | char need[3]; 90 | int prepare = 0; 91 | int i = 0; 92 | if (*(data + data_len - 1) == '=') 93 | { 94 | equal_count += 1; 95 | } 96 | if (*(data + data_len - 2) == '=') 97 | { 98 | equal_count += 1; 99 | } 100 | if (*(data + data_len - 3) == '=') 101 | {//seems impossible 102 | equal_count += 1; 103 | } 104 | switch (equal_count) 105 | { 106 | case 0: 107 | ret_len += 4;//3 + 1 [1 for NULL] 108 | break; 109 | case 1: 110 | ret_len += 4;//Ceil((6*3)/8)+1 111 | break; 112 | case 2: 113 | ret_len += 3;//Ceil((6*2)/8)+1 114 | break; 115 | case 3: 116 | ret_len += 2;//Ceil((6*1)/8)+1 117 | break; 118 | } 119 | ret = (char *)malloc(ret_len); 120 | if (ret == NULL) 121 | { 122 | printf("No enough memory.\n"); 123 | exit(0); 124 | } 125 | memset(ret, 0, ret_len); 126 | f = ret; 127 | while (tmp < (data_len - equal_count)) 128 | { 129 | temp = 0; 130 | prepare = 0; 131 | memset(need, 0, 4); 132 | while (temp < 4) 133 | { 134 | if (tmp >= (data_len - equal_count)) 135 | { 136 | break; 137 | } 138 | prepare = (prepare << 6) | (find_pos(data[tmp])); 139 | temp++; 140 | tmp++; 141 | } 142 | prepare = prepare << ((4-temp) * 6); 143 | for (i=0; i<3 ;i++ ) 144 | { 145 | if (i == temp) 146 | { 147 | break; 148 | } 149 | *f = (char)((prepare>>((2-i)*8)) & 0xFF); 150 | f++; 151 | } 152 | } 153 | *f = '\0'; 154 | return ret; 155 | } 156 | -------------------------------------------------------------------------------- /vad_baidu/base64.h: -------------------------------------------------------------------------------- 1 | /*************************************************************************** 2 | * 3 | * Copyright (c) 2014 Baidu.com, Inc. All Rights Reserved 4 | * 5 | **************************************************************************/ 6 | 7 | 8 | #ifndef __ABASE64_H_ 9 | #define __ABASE64_H_ 10 | 11 | char* base64_encode(const char* data, int data_len); 12 | char *base64_decode(const char* data, int data_len); 13 | 14 | #endif //__BASE64_H_ 15 | 16 | -------------------------------------------------------------------------------- /vad_baidu/cJSON.c: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright (c) 2009 Dave Gamble 3 | 4 | Permission is hereby granted, free of charge, to any person obtaining a copy 5 | of this software and associated documentation files (the "Software"), to deal 6 | in the Software without restriction, including without limitation the rights 7 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 8 | copies of the Software, and to permit persons to whom the Software is 9 | furnished to do so, subject to the following conditions: 10 | 11 | The above copyright notice and this permission notice shall be included in 12 | all copies or substantial portions of the Software. 13 | 14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 17 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 19 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 20 | THE SOFTWARE. 21 | */ 22 | 23 | /* cJSON */ 24 | /* JSON parser in C. */ 25 | 26 | #include 27 | #include 28 | #include 29 | #include 30 | #include 31 | #include 32 | #include 33 | #include "cJSON.h" 34 | 35 | static const char *ep; 36 | 37 | const char *cJSON_GetErrorPtr(void) {return ep;} 38 | 39 | static int cJSON_strcasecmp(const char *s1,const char *s2) 40 | { 41 | if (!s1) return (s1==s2)?0:1;if (!s2) return 1; 42 | for(; tolower(*s1) == tolower(*s2); ++s1, ++s2) if(*s1 == 0) return 0; 43 | return tolower(*(const unsigned char *)s1) - tolower(*(const unsigned char *)s2); 44 | } 45 | 46 | static void *(*cJSON_malloc)(size_t sz) = malloc; 47 | static void (*cJSON_free)(void *ptr) = free; 48 | 49 | static char* cJSON_strdup(const char* str) 50 | { 51 | size_t len; 52 | char* copy; 53 | 54 | len = strlen(str) + 1; 55 | if (!(copy = (char*)cJSON_malloc(len))) return 0; 56 | memcpy(copy,str,len); 57 | return copy; 58 | } 59 | 60 | void cJSON_InitHooks(cJSON_Hooks* hooks) 61 | { 62 | if (!hooks) { /* Reset hooks */ 63 | cJSON_malloc = malloc; 64 | cJSON_free = free; 65 | return; 66 | } 67 | 68 | cJSON_malloc = (hooks->malloc_fn)?hooks->malloc_fn:malloc; 69 | cJSON_free = (hooks->free_fn)?hooks->free_fn:free; 70 | } 71 | 72 | /* Internal constructor. */ 73 | static cJSON *cJSON_New_Item(void) 74 | { 75 | cJSON* node = (cJSON*)cJSON_malloc(sizeof(cJSON)); 76 | if (node) memset(node,0,sizeof(cJSON)); 77 | return node; 78 | } 79 | 80 | /* Delete a cJSON structure. */ 81 | void cJSON_Delete(cJSON *c) 82 | { 83 | cJSON *next; 84 | while (c) 85 | { 86 | next=c->next; 87 | if (!(c->type&cJSON_IsReference) && c->child) cJSON_Delete(c->child); 88 | if (!(c->type&cJSON_IsReference) && c->valuestring) cJSON_free(c->valuestring); 89 | if (c->string) cJSON_free(c->string); 90 | cJSON_free(c); 91 | c=next; 92 | } 93 | } 94 | 95 | /* Parse the input text to generate a number, and populate the result into item. */ 96 | static const char *parse_number(cJSON *item,const char *num) 97 | { 98 | double n=0,sign=1,scale=0;int subscale=0,signsubscale=1; 99 | 100 | if (*num=='-') sign=-1,num++; /* Has sign? */ 101 | if (*num=='0') num++; /* is zero */ 102 | if (*num>='1' && *num<='9') do n=(n*10.0)+(*num++ -'0'); while (*num>='0' && *num<='9'); /* Number? */ 103 | if (*num=='.' && num[1]>='0' && num[1]<='9') {num++; do n=(n*10.0)+(*num++ -'0'),scale--; while (*num>='0' && *num<='9');} /* Fractional part? */ 104 | if (*num=='e' || *num=='E') /* Exponent? */ 105 | { num++;if (*num=='+') num++; else if (*num=='-') signsubscale=-1,num++; /* With sign? */ 106 | while (*num>='0' && *num<='9') subscale=(subscale*10)+(*num++ - '0'); /* Number? */ 107 | } 108 | 109 | n=sign*n*pow(10.0,(scale+subscale*signsubscale)); /* number = +/- number.fraction * 10^+/- exponent */ 110 | 111 | item->valuedouble=n; 112 | item->valueint=(int)n; 113 | item->type=cJSON_Number; 114 | return num; 115 | } 116 | 117 | /* Render the number nicely from the given item into a string. */ 118 | static char *print_number(cJSON *item) 119 | { 120 | char *str; 121 | double d=item->valuedouble; 122 | if (fabs(((double)item->valueint)-d)<=DBL_EPSILON && d<=INT_MAX && d>=INT_MIN) 123 | { 124 | str=(char*)cJSON_malloc(21); /* 2^64+1 can be represented in 21 chars. */ 125 | if (str) sprintf(str,"%d",item->valueint); 126 | } 127 | else 128 | { 129 | str=(char*)cJSON_malloc(64); /* This is a nice tradeoff. */ 130 | if (str) 131 | { 132 | if (fabs(floor(d)-d)<=DBL_EPSILON && fabs(d)<1.0e60)sprintf(str,"%.0f",d); 133 | else if (fabs(d)<1.0e-6 || fabs(d)>1.0e9) sprintf(str,"%e",d); 134 | else sprintf(str,"%f",d); 135 | } 136 | } 137 | return str; 138 | } 139 | 140 | static unsigned parse_hex4(const char *str) 141 | { 142 | unsigned h=0; 143 | if (*str>='0' && *str<='9') h+=(*str)-'0'; else if (*str>='A' && *str<='F') h+=10+(*str)-'A'; else if (*str>='a' && *str<='f') h+=10+(*str)-'a'; else return 0; 144 | h=h<<4;str++; 145 | if (*str>='0' && *str<='9') h+=(*str)-'0'; else if (*str>='A' && *str<='F') h+=10+(*str)-'A'; else if (*str>='a' && *str<='f') h+=10+(*str)-'a'; else return 0; 146 | h=h<<4;str++; 147 | if (*str>='0' && *str<='9') h+=(*str)-'0'; else if (*str>='A' && *str<='F') h+=10+(*str)-'A'; else if (*str>='a' && *str<='f') h+=10+(*str)-'a'; else return 0; 148 | h=h<<4;str++; 149 | if (*str>='0' && *str<='9') h+=(*str)-'0'; else if (*str>='A' && *str<='F') h+=10+(*str)-'A'; else if (*str>='a' && *str<='f') h+=10+(*str)-'a'; else return 0; 150 | return h; 151 | } 152 | 153 | /* Parse the input text into an unescaped cstring, and populate item. */ 154 | static const unsigned char firstByteMark[7] = { 0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC }; 155 | static const char *parse_string(cJSON *item,const char *str) 156 | { 157 | const char *ptr=str+1;char *ptr2;char *out;int len=0;unsigned uc,uc2; 158 | if (*str!='\"') {ep=str;return 0;} /* not a string! */ 159 | 160 | while (*ptr!='\"' && *ptr && ++len) if (*ptr++ == '\\') ptr++; /* Skip escaped quotes. */ 161 | 162 | out=(char*)cJSON_malloc(len+1); /* This is how long we need for the string, roughly. */ 163 | if (!out) return 0; 164 | 165 | ptr=str+1;ptr2=out; 166 | while (*ptr!='\"' && *ptr) 167 | { 168 | if (*ptr!='\\') *ptr2++=*ptr++; 169 | else 170 | { 171 | ptr++; 172 | switch (*ptr) 173 | { 174 | case 'b': *ptr2++='\b'; break; 175 | case 'f': *ptr2++='\f'; break; 176 | case 'n': *ptr2++='\n'; break; 177 | case 'r': *ptr2++='\r'; break; 178 | case 't': *ptr2++='\t'; break; 179 | case 'u': /* transcode utf16 to utf8. */ 180 | uc=parse_hex4(ptr+1);ptr+=4; /* get the unicode char. */ 181 | 182 | if ((uc>=0xDC00 && uc<=0xDFFF) || uc==0) break; /* check for invalid. */ 183 | 184 | if (uc>=0xD800 && uc<=0xDBFF) /* UTF16 surrogate pairs. */ 185 | { 186 | if (ptr[1]!='\\' || ptr[2]!='u') break; /* missing second-half of surrogate. */ 187 | uc2=parse_hex4(ptr+3);ptr+=6; 188 | if (uc2<0xDC00 || uc2>0xDFFF) break; /* invalid second-half of surrogate. */ 189 | uc=0x10000 + (((uc&0x3FF)<<10) | (uc2&0x3FF)); 190 | } 191 | 192 | len=4;if (uc<0x80) len=1;else if (uc<0x800) len=2;else if (uc<0x10000) len=3; ptr2+=len; 193 | 194 | switch (len) { 195 | case 4: *--ptr2 =((uc | 0x80) & 0xBF); uc >>= 6; 196 | case 3: *--ptr2 =((uc | 0x80) & 0xBF); uc >>= 6; 197 | case 2: *--ptr2 =((uc | 0x80) & 0xBF); uc >>= 6; 198 | case 1: *--ptr2 =(uc | firstByteMark[len]); 199 | } 200 | ptr2+=len; 201 | break; 202 | default: *ptr2++=*ptr; break; 203 | } 204 | ptr++; 205 | } 206 | } 207 | *ptr2=0; 208 | if (*ptr=='\"') ptr++; 209 | item->valuestring=out; 210 | item->type=cJSON_String; 211 | return ptr; 212 | } 213 | 214 | /* Render the cstring provided to an escaped version that can be printed. */ 215 | static char *print_string_ptr(const char *str) 216 | { 217 | const char *ptr;char *ptr2,*out;int len=0;unsigned char token; 218 | 219 | if (!str) return cJSON_strdup(""); 220 | ptr=str;while ((token=*ptr) && ++len) {if (strchr("\"\\\b\f\n\r\t",token)) len++; else if (token<32) len+=5;ptr++;} 221 | 222 | out=(char*)cJSON_malloc(len+3); 223 | if (!out) return 0; 224 | 225 | ptr2=out;ptr=str; 226 | *ptr2++='\"'; 227 | while (*ptr) 228 | { 229 | if ((unsigned char)*ptr>31 && *ptr!='\"' && *ptr!='\\') *ptr2++=*ptr++; 230 | else 231 | { 232 | *ptr2++='\\'; 233 | switch (token=*ptr++) 234 | { 235 | case '\\': *ptr2++='\\'; break; 236 | case '\"': *ptr2++='\"'; break; 237 | case '\b': *ptr2++='b'; break; 238 | case '\f': *ptr2++='f'; break; 239 | case '\n': *ptr2++='n'; break; 240 | case '\r': *ptr2++='r'; break; 241 | case '\t': *ptr2++='t'; break; 242 | default: sprintf(ptr2,"u%04x",token);ptr2+=5; break; /* escape and print */ 243 | } 244 | } 245 | } 246 | *ptr2++='\"';*ptr2++=0; 247 | return out; 248 | } 249 | /* Invote print_string_ptr (which is useful) on an item. */ 250 | static char *print_string(cJSON *item) {return print_string_ptr(item->valuestring);} 251 | 252 | /* Predeclare these prototypes. */ 253 | static const char *parse_value(cJSON *item,const char *value); 254 | static char *print_value(cJSON *item,int depth,int fmt); 255 | static const char *parse_array(cJSON *item,const char *value); 256 | static char *print_array(cJSON *item,int depth,int fmt); 257 | static const char *parse_object(cJSON *item,const char *value); 258 | static char *print_object(cJSON *item,int depth,int fmt); 259 | 260 | /* Utility to jump whitespace and cr/lf */ 261 | static const char *skip(const char *in) {while (in && *in && (unsigned char)*in<=32) in++; return in;} 262 | 263 | /* Parse an object - create a new root, and populate. */ 264 | cJSON *cJSON_ParseWithOpts(const char *value,const char **return_parse_end,int require_null_terminated) 265 | { 266 | const char *end=0; 267 | cJSON *c=cJSON_New_Item(); 268 | ep=0; 269 | if (!c) return 0; /* memory fail */ 270 | 271 | end=parse_value(c,skip(value)); 272 | if (!end) {cJSON_Delete(c);return 0;} /* parse failure. ep is set. */ 273 | 274 | /* if we require null-terminated JSON without appended garbage, skip and then check for a null terminator */ 275 | if (require_null_terminated) {end=skip(end);if (*end) {cJSON_Delete(c);ep=end;return 0;}} 276 | if (return_parse_end) *return_parse_end=end; 277 | return c; 278 | } 279 | /* Default options for cJSON_Parse */ 280 | cJSON *cJSON_Parse(const char *value) {return cJSON_ParseWithOpts(value,0,0);} 281 | 282 | /* Render a cJSON item/entity/structure to text. */ 283 | char *cJSON_Print(cJSON *item) {return print_value(item,0,1);} 284 | char *cJSON_PrintUnformatted(cJSON *item) {return print_value(item,0,0);} 285 | 286 | /* Parser core - when encountering text, process appropriately. */ 287 | static const char *parse_value(cJSON *item,const char *value) 288 | { 289 | if (!value) return 0; /* Fail on null. */ 290 | if (!strncmp(value,"null",4)) { item->type=cJSON_NULL; return value+4; } 291 | if (!strncmp(value,"false",5)) { item->type=cJSON_False; return value+5; } 292 | if (!strncmp(value,"true",4)) { item->type=cJSON_True; item->valueint=1; return value+4; } 293 | if (*value=='\"') { return parse_string(item,value); } 294 | if (*value=='-' || (*value>='0' && *value<='9')) { return parse_number(item,value); } 295 | if (*value=='[') { return parse_array(item,value); } 296 | if (*value=='{') { return parse_object(item,value); } 297 | 298 | ep=value;return 0; /* failure. */ 299 | } 300 | 301 | /* Render a value to text. */ 302 | static char *print_value(cJSON *item,int depth,int fmt) 303 | { 304 | char *out=0; 305 | if (!item) return 0; 306 | switch ((item->type)&255) 307 | { 308 | case cJSON_NULL: out=cJSON_strdup("null"); break; 309 | case cJSON_False: out=cJSON_strdup("false");break; 310 | case cJSON_True: out=cJSON_strdup("true"); break; 311 | case cJSON_Number: out=print_number(item);break; 312 | case cJSON_String: out=print_string(item);break; 313 | case cJSON_Array: out=print_array(item,depth,fmt);break; 314 | case cJSON_Object: out=print_object(item,depth,fmt);break; 315 | } 316 | return out; 317 | } 318 | 319 | /* Build an array from input text. */ 320 | static const char *parse_array(cJSON *item,const char *value) 321 | { 322 | cJSON *child; 323 | if (*value!='[') {ep=value;return 0;} /* not an array! */ 324 | 325 | item->type=cJSON_Array; 326 | value=skip(value+1); 327 | if (*value==']') return value+1; /* empty array. */ 328 | 329 | item->child=child=cJSON_New_Item(); 330 | if (!item->child) return 0; /* memory fail */ 331 | value=skip(parse_value(child,skip(value))); /* skip any spacing, get the value. */ 332 | if (!value) return 0; 333 | 334 | while (*value==',') 335 | { 336 | cJSON *new_item; 337 | if (!(new_item=cJSON_New_Item())) return 0; /* memory fail */ 338 | child->next=new_item;new_item->prev=child;child=new_item; 339 | value=skip(parse_value(child,skip(value+1))); 340 | if (!value) return 0; /* memory fail */ 341 | } 342 | 343 | if (*value==']') return value+1; /* end of array */ 344 | ep=value;return 0; /* malformed. */ 345 | } 346 | 347 | /* Render an array to text */ 348 | static char *print_array(cJSON *item,int depth,int fmt) 349 | { 350 | char **entries; 351 | char *out=0,*ptr,*ret;int len=5; 352 | cJSON *child=item->child; 353 | int numentries=0,i=0,fail=0; 354 | 355 | /* How many entries in the array? */ 356 | while (child) numentries++,child=child->next; 357 | /* Explicitly handle numentries==0 */ 358 | if (!numentries) 359 | { 360 | out=(char*)cJSON_malloc(3); 361 | if (out) strcpy(out,"[]"); 362 | return out; 363 | } 364 | /* Allocate an array to hold the values for each */ 365 | entries=(char**)cJSON_malloc(numentries*sizeof(char*)); 366 | if (!entries) return 0; 367 | memset(entries,0,numentries*sizeof(char*)); 368 | /* Retrieve all the results: */ 369 | child=item->child; 370 | while (child && !fail) 371 | { 372 | ret=print_value(child,depth+1,fmt); 373 | entries[i++]=ret; 374 | if (ret) len+=strlen(ret)+2+(fmt?1:0); else fail=1; 375 | child=child->next; 376 | } 377 | 378 | /* If we didn't fail, try to malloc the output string */ 379 | if (!fail) out=(char*)cJSON_malloc(len); 380 | /* If that fails, we fail. */ 381 | if (!out) fail=1; 382 | 383 | /* Handle failure. */ 384 | if (fail) 385 | { 386 | for (i=0;itype=cJSON_Object; 412 | value=skip(value+1); 413 | if (*value=='}') return value+1; /* empty array. */ 414 | 415 | item->child=child=cJSON_New_Item(); 416 | if (!item->child) return 0; 417 | value=skip(parse_string(child,skip(value))); 418 | if (!value) return 0; 419 | child->string=child->valuestring;child->valuestring=0; 420 | if (*value!=':') {ep=value;return 0;} /* fail! */ 421 | value=skip(parse_value(child,skip(value+1))); /* skip any spacing, get the value. */ 422 | if (!value) return 0; 423 | 424 | while (*value==',') 425 | { 426 | cJSON *new_item; 427 | if (!(new_item=cJSON_New_Item())) return 0; /* memory fail */ 428 | child->next=new_item;new_item->prev=child;child=new_item; 429 | value=skip(parse_string(child,skip(value+1))); 430 | if (!value) return 0; 431 | child->string=child->valuestring;child->valuestring=0; 432 | if (*value!=':') {ep=value;return 0;} /* fail! */ 433 | value=skip(parse_value(child,skip(value+1))); /* skip any spacing, get the value. */ 434 | if (!value) return 0; 435 | } 436 | 437 | if (*value=='}') return value+1; /* end of array */ 438 | ep=value;return 0; /* malformed. */ 439 | } 440 | 441 | /* Render an object to text. */ 442 | static char *print_object(cJSON *item,int depth,int fmt) 443 | { 444 | char **entries=0,**names=0; 445 | char *out=0,*ptr,*ret,*str;int len=7,i=0,j; 446 | cJSON *child=item->child; 447 | int numentries=0,fail=0; 448 | /* Count the number of entries. */ 449 | while (child) numentries++,child=child->next; 450 | /* Explicitly handle empty object case */ 451 | if (!numentries) 452 | { 453 | out=(char*)cJSON_malloc(fmt?depth+4:3); 454 | if (!out) return 0; 455 | ptr=out;*ptr++='{'; 456 | if (fmt) {*ptr++='\n';for (i=0;ichild;depth++;if (fmt) len+=depth; 470 | while (child) 471 | { 472 | names[i]=str=print_string_ptr(child->string); 473 | entries[i++]=ret=print_value(child,depth,fmt); 474 | if (str && ret) len+=strlen(ret)+strlen(str)+2+(fmt?2+depth:0); else fail=1; 475 | child=child->next; 476 | } 477 | 478 | /* Try to allocate the output string */ 479 | if (!fail) out=(char*)cJSON_malloc(len); 480 | if (!out) fail=1; 481 | 482 | /* Handle failure */ 483 | if (fail) 484 | { 485 | for (i=0;ichild;int i=0;while(c)i++,c=c->next;return i;} 511 | cJSON *cJSON_GetArrayItem(cJSON *array,int item) {cJSON *c=array->child; while (c && item>0) item--,c=c->next; return c;} 512 | cJSON *cJSON_GetObjectItem(cJSON *object,const char *string) {cJSON *c=object->child; while (c && cJSON_strcasecmp(c->string,string)) c=c->next; return c;} 513 | 514 | /* Utility for array list handling. */ 515 | static void suffix_object(cJSON *prev,cJSON *item) {prev->next=item;item->prev=prev;} 516 | /* Utility for handling references. */ 517 | static cJSON *create_reference(cJSON *item) {cJSON *ref=cJSON_New_Item();if (!ref) return 0;memcpy(ref,item,sizeof(cJSON));ref->string=0;ref->type|=cJSON_IsReference;ref->next=ref->prev=0;return ref;} 518 | 519 | /* Add item to array/object. */ 520 | void cJSON_AddItemToArray(cJSON *array, cJSON *item) {cJSON *c=array->child;if (!item) return; if (!c) {array->child=item;} else {while (c && c->next) c=c->next; suffix_object(c,item);}} 521 | void cJSON_AddItemToObject(cJSON *object,const char *string,cJSON *item) {if (!item) return; if (item->string) cJSON_free(item->string);item->string=cJSON_strdup(string);cJSON_AddItemToArray(object,item);} 522 | void cJSON_AddItemReferenceToArray(cJSON *array, cJSON *item) {cJSON_AddItemToArray(array,create_reference(item));} 523 | void cJSON_AddItemReferenceToObject(cJSON *object,const char *string,cJSON *item) {cJSON_AddItemToObject(object,string,create_reference(item));} 524 | 525 | cJSON *cJSON_DetachItemFromArray(cJSON *array,int which) {cJSON *c=array->child;while (c && which>0) c=c->next,which--;if (!c) return 0; 526 | if (c->prev) c->prev->next=c->next;if (c->next) c->next->prev=c->prev;if (c==array->child) array->child=c->next;c->prev=c->next=0;return c;} 527 | void cJSON_DeleteItemFromArray(cJSON *array,int which) {cJSON_Delete(cJSON_DetachItemFromArray(array,which));} 528 | cJSON *cJSON_DetachItemFromObject(cJSON *object,const char *string) {int i=0;cJSON *c=object->child;while (c && cJSON_strcasecmp(c->string,string)) i++,c=c->next;if (c) return cJSON_DetachItemFromArray(object,i);return 0;} 529 | void cJSON_DeleteItemFromObject(cJSON *object,const char *string) {cJSON_Delete(cJSON_DetachItemFromObject(object,string));} 530 | 531 | /* Replace array/object items with new ones. */ 532 | void cJSON_ReplaceItemInArray(cJSON *array,int which,cJSON *newitem) {cJSON *c=array->child;while (c && which>0) c=c->next,which--;if (!c) return; 533 | newitem->next=c->next;newitem->prev=c->prev;if (newitem->next) newitem->next->prev=newitem; 534 | if (c==array->child) array->child=newitem; else newitem->prev->next=newitem;c->next=c->prev=0;cJSON_Delete(c);} 535 | void cJSON_ReplaceItemInObject(cJSON *object,const char *string,cJSON *newitem){int i=0;cJSON *c=object->child;while(c && cJSON_strcasecmp(c->string,string))i++,c=c->next;if(c){newitem->string=cJSON_strdup(string);cJSON_ReplaceItemInArray(object,i,newitem);}} 536 | 537 | /* Create basic types: */ 538 | cJSON *cJSON_CreateNull(void) {cJSON *item=cJSON_New_Item();if(item)item->type=cJSON_NULL;return item;} 539 | cJSON *cJSON_CreateTrue(void) {cJSON *item=cJSON_New_Item();if(item)item->type=cJSON_True;return item;} 540 | cJSON *cJSON_CreateFalse(void) {cJSON *item=cJSON_New_Item();if(item)item->type=cJSON_False;return item;} 541 | cJSON *cJSON_CreateBool(int b) {cJSON *item=cJSON_New_Item();if(item)item->type=b?cJSON_True:cJSON_False;return item;} 542 | cJSON *cJSON_CreateNumber(double num) {cJSON *item=cJSON_New_Item();if(item){item->type=cJSON_Number;item->valuedouble=num;item->valueint=(int)num;}return item;} 543 | cJSON *cJSON_CreateString(const char *string) {cJSON *item=cJSON_New_Item();if(item){item->type=cJSON_String;item->valuestring=cJSON_strdup(string);}return item;} 544 | cJSON *cJSON_CreateArray(void) {cJSON *item=cJSON_New_Item();if(item)item->type=cJSON_Array;return item;} 545 | cJSON *cJSON_CreateObject(void) {cJSON *item=cJSON_New_Item();if(item)item->type=cJSON_Object;return item;} 546 | 547 | /* Create Arrays: */ 548 | cJSON *cJSON_CreateIntArray(const int *numbers,int count) {int i;cJSON *n=0,*p=0,*a=cJSON_CreateArray();for(i=0;a && ichild=n;else suffix_object(p,n);p=n;}return a;} 549 | cJSON *cJSON_CreateFloatArray(const float *numbers,int count) {int i;cJSON *n=0,*p=0,*a=cJSON_CreateArray();for(i=0;a && ichild=n;else suffix_object(p,n);p=n;}return a;} 550 | cJSON *cJSON_CreateDoubleArray(const double *numbers,int count) {int i;cJSON *n=0,*p=0,*a=cJSON_CreateArray();for(i=0;a && ichild=n;else suffix_object(p,n);p=n;}return a;} 551 | cJSON *cJSON_CreateStringArray(const char **strings,int count) {int i;cJSON *n=0,*p=0,*a=cJSON_CreateArray();for(i=0;a && ichild=n;else suffix_object(p,n);p=n;}return a;} 552 | 553 | /* Duplication */ 554 | cJSON *cJSON_Duplicate(cJSON *item,int recurse) 555 | { 556 | cJSON *newitem,*cptr,*nptr=0,*newchild; 557 | /* Bail on bad ptr */ 558 | if (!item) return 0; 559 | /* Create new item */ 560 | newitem=cJSON_New_Item(); 561 | if (!newitem) return 0; 562 | /* Copy over all vars */ 563 | newitem->type=item->type&(~cJSON_IsReference),newitem->valueint=item->valueint,newitem->valuedouble=item->valuedouble; 564 | if (item->valuestring) {newitem->valuestring=cJSON_strdup(item->valuestring); if (!newitem->valuestring) {cJSON_Delete(newitem);return 0;}} 565 | if (item->string) {newitem->string=cJSON_strdup(item->string); if (!newitem->string) {cJSON_Delete(newitem);return 0;}} 566 | /* If non-recursive, then we're done! */ 567 | if (!recurse) return newitem; 568 | /* Walk the ->next chain for the child. */ 569 | cptr=item->child; 570 | while (cptr) 571 | { 572 | newchild=cJSON_Duplicate(cptr,1); /* Duplicate (with recurse) each item in the ->next chain */ 573 | if (!newchild) {cJSON_Delete(newitem);return 0;} 574 | if (nptr) {nptr->next=newchild,newchild->prev=nptr;nptr=newchild;} /* If newitem->child already set, then crosswire ->prev and ->next and move on */ 575 | else {newitem->child=newchild;nptr=newchild;} /* Set newitem->child and move to it */ 576 | cptr=cptr->next; 577 | } 578 | return newitem; 579 | } 580 | 581 | void cJSON_Minify(char *json) 582 | { 583 | char *into=json; 584 | while (*json) 585 | { 586 | if (*json==' ') json++; 587 | else if (*json=='\t') json++; // Whitespace characters. 588 | else if (*json=='\r') json++; 589 | else if (*json=='\n') json++; 590 | else if (*json=='/' && json[1]=='/') while (*json && *json!='\n') json++; // double-slash comments, to end of line. 591 | else if (*json=='/' && json[1]=='*') {while (*json && !(*json=='*' && json[1]=='/')) json++;json+=2;} // multiline comments. 592 | else if (*json=='\"'){*into++=*json++;while (*json && *json!='\"'){if (*json=='\\') *into++=*json++;*into++=*json++;}*into++=*json++;} // string literals, which are \" sensitive. 593 | else *into++=*json++; // All other characters. 594 | } 595 | *into=0; // and null-terminate. 596 | } -------------------------------------------------------------------------------- /vad_baidu/cJSON.h: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright (c) 2009 Dave Gamble 3 | 4 | Permission is hereby granted, free of charge, to any person obtaining a copy 5 | of this software and associated documentation files (the "Software"), to deal 6 | in the Software without restriction, including without limitation the rights 7 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 8 | copies of the Software, and to permit persons to whom the Software is 9 | furnished to do so, subject to the following conditions: 10 | 11 | The above copyright notice and this permission notice shall be included in 12 | all copies or substantial portions of the Software. 13 | 14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 17 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 19 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 20 | THE SOFTWARE. 21 | */ 22 | 23 | #ifndef cJSON__h 24 | #define cJSON__h 25 | 26 | #ifdef __cplusplus 27 | extern "C" 28 | { 29 | #endif 30 | 31 | /* cJSON Types: */ 32 | #define cJSON_False 0 33 | #define cJSON_True 1 34 | #define cJSON_NULL 2 35 | #define cJSON_Number 3 36 | #define cJSON_String 4 37 | #define cJSON_Array 5 38 | #define cJSON_Object 6 39 | 40 | #define cJSON_IsReference 256 41 | 42 | /* The cJSON structure: */ 43 | typedef struct cJSON { 44 | struct cJSON *next,*prev; /* next/prev allow you to walk array/object chains. Alternatively, use GetArraySize/GetArrayItem/GetObjectItem */ 45 | struct cJSON *child; /* An array or object item will have a child pointer pointing to a chain of the items in the array/object. */ 46 | 47 | int type; /* The type of the item, as above. */ 48 | 49 | char *valuestring; /* The item's string, if type==cJSON_String */ 50 | int valueint; /* The item's number, if type==cJSON_Number */ 51 | double valuedouble; /* The item's number, if type==cJSON_Number */ 52 | 53 | char *string; /* The item's name string, if this item is the child of, or is in the list of subitems of an object. */ 54 | } cJSON; 55 | 56 | typedef struct cJSON_Hooks { 57 | void *(*malloc_fn)(size_t sz); 58 | void (*free_fn)(void *ptr); 59 | } cJSON_Hooks; 60 | 61 | /* Supply malloc, realloc and free functions to cJSON */ 62 | extern void cJSON_InitHooks(cJSON_Hooks* hooks); 63 | 64 | 65 | /* Supply a block of JSON, and this returns a cJSON object you can interrogate. Call cJSON_Delete when finished. */ 66 | extern cJSON *cJSON_Parse(const char *value); 67 | /* Render a cJSON entity to text for transfer/storage. Free the char* when finished. */ 68 | extern char *cJSON_Print(cJSON *item); 69 | /* Render a cJSON entity to text for transfer/storage without any formatting. Free the char* when finished. */ 70 | extern char *cJSON_PrintUnformatted(cJSON *item); 71 | /* Delete a cJSON entity and all subentities. */ 72 | extern void cJSON_Delete(cJSON *c); 73 | 74 | /* Returns the number of items in an array (or object). */ 75 | extern int cJSON_GetArraySize(cJSON *array); 76 | /* Retrieve item number "item" from array "array". Returns NULL if unsuccessful. */ 77 | extern cJSON *cJSON_GetArrayItem(cJSON *array,int item); 78 | /* Get item "string" from object. Case insensitive. */ 79 | extern cJSON *cJSON_GetObjectItem(cJSON *object,const char *string); 80 | 81 | /* For analysing failed parses. This returns a pointer to the parse error. You'll probably need to look a few chars back to make sense of it. Defined when cJSON_Parse() returns 0. 0 when cJSON_Parse() succeeds. */ 82 | extern const char *cJSON_GetErrorPtr(void); 83 | 84 | /* These calls create a cJSON item of the appropriate type. */ 85 | extern cJSON *cJSON_CreateNull(void); 86 | extern cJSON *cJSON_CreateTrue(void); 87 | extern cJSON *cJSON_CreateFalse(void); 88 | extern cJSON *cJSON_CreateBool(int b); 89 | extern cJSON *cJSON_CreateNumber(double num); 90 | extern cJSON *cJSON_CreateString(const char *string); 91 | extern cJSON *cJSON_CreateArray(void); 92 | extern cJSON *cJSON_CreateObject(void); 93 | 94 | /* These utilities create an Array of count items. */ 95 | extern cJSON *cJSON_CreateIntArray(const int *numbers,int count); 96 | extern cJSON *cJSON_CreateFloatArray(const float *numbers,int count); 97 | extern cJSON *cJSON_CreateDoubleArray(const double *numbers,int count); 98 | extern cJSON *cJSON_CreateStringArray(const char **strings,int count); 99 | 100 | /* Append item to the specified array/object. */ 101 | extern void cJSON_AddItemToArray(cJSON *array, cJSON *item); 102 | extern void cJSON_AddItemToObject(cJSON *object,const char *string,cJSON *item); 103 | /* Append reference to item to the specified array/object. Use this when you want to add an existing cJSON to a new cJSON, but don't want to corrupt your existing cJSON. */ 104 | extern void cJSON_AddItemReferenceToArray(cJSON *array, cJSON *item); 105 | extern void cJSON_AddItemReferenceToObject(cJSON *object,const char *string,cJSON *item); 106 | 107 | /* Remove/Detatch items from Arrays/Objects. */ 108 | extern cJSON *cJSON_DetachItemFromArray(cJSON *array,int which); 109 | extern void cJSON_DeleteItemFromArray(cJSON *array,int which); 110 | extern cJSON *cJSON_DetachItemFromObject(cJSON *object,const char *string); 111 | extern void cJSON_DeleteItemFromObject(cJSON *object,const char *string); 112 | 113 | /* Update array items. */ 114 | extern void cJSON_ReplaceItemInArray(cJSON *array,int which,cJSON *newitem); 115 | extern void cJSON_ReplaceItemInObject(cJSON *object,const char *string,cJSON *newitem); 116 | 117 | /* Duplicate a cJSON item */ 118 | extern cJSON *cJSON_Duplicate(cJSON *item,int recurse); 119 | /* Duplicate will create a new, identical cJSON item to the one you pass, in new memory that will 120 | need to be released. With recurse!=0, it will duplicate any children connected to the item. 121 | The item->next and ->prev pointers are always zero on return from Duplicate. */ 122 | 123 | /* ParseWithOpts allows you to require (and check) that the JSON is null terminated, and to retrieve the pointer to the final byte parsed. */ 124 | extern cJSON *cJSON_ParseWithOpts(const char *value,const char **return_parse_end,int require_null_terminated); 125 | 126 | extern void cJSON_Minify(char *json); 127 | 128 | /* Macros for creating things quickly. */ 129 | #define cJSON_AddNullToObject(object,name) cJSON_AddItemToObject(object, name, cJSON_CreateNull()) 130 | #define cJSON_AddTrueToObject(object,name) cJSON_AddItemToObject(object, name, cJSON_CreateTrue()) 131 | #define cJSON_AddFalseToObject(object,name) cJSON_AddItemToObject(object, name, cJSON_CreateFalse()) 132 | #define cJSON_AddBoolToObject(object,name,b) cJSON_AddItemToObject(object, name, cJSON_CreateBool(b)) 133 | #define cJSON_AddNumberToObject(object,name,n) cJSON_AddItemToObject(object, name, cJSON_CreateNumber(n)) 134 | #define cJSON_AddStringToObject(object,name,s) cJSON_AddItemToObject(object, name, cJSON_CreateString(s)) 135 | 136 | /* When assigning an integer value, it needs to be propagated to valuedouble too. */ 137 | #define cJSON_SetIntValue(object,val) ((object)?(object)->valueint=(object)->valuedouble=(val):(val)) 138 | 139 | #ifdef __cplusplus 140 | } 141 | #endif 142 | 143 | #endif 144 | -------------------------------------------------------------------------------- /vad_baidu/http_request.c: -------------------------------------------------------------------------------- 1 | 2 | #include 3 | #include 4 | #include 5 | #include "http_request.h" 6 | 7 | #define MAX_BYTES_RECV (102400) 8 | static char header_buffer[CURL_ERROR_SIZE]; 9 | 10 | static size_t write_data(void *ptr, size_t size, size_t nmemb, void *stream) 11 | { 12 | if(strlen((char *)stream) + strlen((char *)ptr) > MAX_BYTES_RECV) 13 | return 0; 14 | strcat(stream, (char *)ptr); 15 | return size*nmemb; 16 | } 17 | 18 | static size_t http_get_header(void *ptr, size_t size, size_t nmemb, void *stream) 19 | { 20 | if(strstr((char*)(ptr), "HTTP") != NULL) 21 | sprintf(header_buffer,"%s",(char*)(ptr)); 22 | return size*nmemb; 23 | } 24 | 25 | static int http_request(int dopost, char *url, char *fields, 26 | struct curl_slist *headers, char *cookie, char *response) 27 | { 28 | CURL *curl; 29 | CURLcode res; 30 | int ret = -1; 31 | char curl_errbuf[CURL_ERROR_SIZE]; 32 | 33 | curl_global_init(CURL_GLOBAL_DEFAULT); 34 | 35 | curl = curl_easy_init(); 36 | if(!curl) 37 | goto failed; 38 | 39 | if(dopost) 40 | curl_easy_setopt(curl, CURLOPT_POST, 1); 41 | curl_easy_setopt(curl, CURLOPT_URL, url); 42 | if(headers) 43 | curl_easy_setopt(curl, CURLOPT_HTTPHEADER, headers); 44 | if(fields) { 45 | curl_easy_setopt(curl, CURLOPT_POSTFIELDS, fields); 46 | curl_easy_setopt(curl,CURLOPT_POSTFIELDSIZE, strlen(fields)); 47 | } 48 | curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, write_data); 49 | curl_easy_setopt(curl, CURLOPT_WRITEDATA, response); 50 | 51 | // set http connection timeout 52 | curl_easy_setopt( curl, CURLOPT_CONNECTTIMEOUT, 60); 53 | // set http receive timeout 54 | curl_easy_setopt( curl, CURLOPT_TIMEOUT, 60); 55 | curl_easy_setopt(curl, CURLOPT_HEADERFUNCTION, *http_get_header); 56 | curl_easy_setopt(curl, CURLOPT_ERRORBUFFER, curl_errbuf); 57 | curl_easy_setopt(curl,CURLOPT_VERBOSE,0L); 58 | 59 | if(cookie) { 60 | curl_easy_setopt(curl, CURLOPT_COOKIEJAR, cookie); 61 | curl_easy_setopt(curl, CURLOPT_COOKIEFILE, cookie); 62 | } 63 | //curl_easy_setopt(curl, CURLOPT_VERBOSE, 1L); 64 | 65 | res = curl_easy_perform(curl); 66 | curl_easy_cleanup(curl); 67 | if(res == CURLE_OK) 68 | ret = 0; 69 | else 70 | sprintf(header_buffer,"%s",curl_errbuf); 71 | 72 | failed: 73 | curl_global_cleanup(); 74 | return ret; 75 | } 76 | 77 | int http_doPost(char *url, char *fields, 78 | struct curl_slist *headers, char *cookie, char *response) 79 | { 80 | return http_request(1, url, fields, headers, cookie, response); 81 | } 82 | 83 | int http_doGet(char *url, char *fields, 84 | struct curl_slist *headers, char *cookie, char *response) 85 | { 86 | return http_request(0, url, fields, headers, cookie, response); 87 | } 88 | 89 | int http_getHeader(char *header) 90 | { 91 | return sprintf(header,"%s",header_buffer); 92 | } 93 | -------------------------------------------------------------------------------- /vad_baidu/http_request.h: -------------------------------------------------------------------------------- 1 | 2 | #ifndef HTTP_REQUEST_HEADER 3 | #define HTTP_REQUEST_HEADER 4 | #include 5 | 6 | int http_doPost(char *url, char *fields, 7 | struct curl_slist *headers, char *cookie, char *response); 8 | int http_doGet(char *url, char *fields, 9 | struct curl_slist *headers, char *cookie, char *response); 10 | int http_getHeader(char *header); 11 | #endif //!HTTP_REQUEST_HEADER 12 | -------------------------------------------------------------------------------- /vad_baidu/https_request.c: -------------------------------------------------------------------------------- 1 | 2 | #include 3 | #include 4 | #include 5 | #include "https_request.h" 6 | //#include "log.h" 7 | 8 | #define MAX_BYTES_RECV (10240) 9 | static char header_buffer[CURL_ERROR_SIZE]; 10 | 11 | static size_t https_write_data(void *ptr, size_t size, size_t nmemb, void *stream) 12 | { 13 | if(strlen((char *)stream) + strlen((char *)ptr) > MAX_BYTES_RECV) 14 | return 0; 15 | strcat(stream, (char *)ptr); 16 | return size*nmemb; 17 | } 18 | 19 | static size_t https_get_header(void *ptr, size_t size, size_t nmemb, void *stream) 20 | { 21 | if(strstr((char*)(ptr), "HTTP") != NULL) 22 | sprintf(header_buffer,"%s",(char*)(ptr)); 23 | return size*nmemb; 24 | } 25 | 26 | static int https_request(int dopost, char *url, char *fields, 27 | struct curl_slist *headers, char *cookie, char *response, char* pClientCert, char* pRootCert) 28 | { 29 | CURL *curl; 30 | CURLcode res; 31 | int ret = -1; 32 | char curl_errbuf[CURL_ERROR_SIZE]; 33 | 34 | curl_global_init(CURL_GLOBAL_DEFAULT); 35 | 36 | curl = curl_easy_init(); 37 | if(!curl) 38 | goto failed; 39 | 40 | if(dopost) 41 | curl_easy_setopt(curl, CURLOPT_POST, 1); 42 | curl_easy_setopt(curl, CURLOPT_URL, url); 43 | if(headers) 44 | curl_easy_setopt(curl, CURLOPT_HTTPHEADER, headers); 45 | if(fields) { 46 | curl_easy_setopt(curl, CURLOPT_POSTFIELDS, fields); 47 | curl_easy_setopt(curl,CURLOPT_POSTFIELDSIZE, strlen(fields)); 48 | } 49 | curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, https_write_data); 50 | curl_easy_setopt(curl, CURLOPT_WRITEDATA, response); 51 | // set https connection timeout 52 | curl_easy_setopt( curl, CURLOPT_CONNECTTIMEOUT, 60); 53 | // set https receive timeout 54 | curl_easy_setopt( curl, CURLOPT_TIMEOUT, 60); 55 | // for https only 56 | curl_easy_setopt(curl,CURLOPT_SSLCERT,pClientCert); 57 | curl_easy_setopt(curl,CURLOPT_CAINFO,pRootCert); 58 | curl_easy_setopt(curl, CURLOPT_SSL_VERIFYPEER, 0L); 59 | curl_easy_setopt(curl, CURLOPT_SSL_VERIFYHOST, 1L); 60 | curl_easy_setopt(curl,CURLOPT_VERBOSE,0L); 61 | curl_easy_setopt(curl, CURLOPT_HEADERFUNCTION, *https_get_header); 62 | curl_easy_setopt(curl, CURLOPT_ERRORBUFFER, curl_errbuf); 63 | 64 | if(cookie) { 65 | curl_easy_setopt(curl, CURLOPT_COOKIEJAR, cookie); 66 | curl_easy_setopt(curl, CURLOPT_COOKIEFILE, cookie); 67 | } 68 | //curl_easy_setopt(curl, CURLOPT_VERBOSE, 1L); 69 | memset(header_buffer,0,sizeof(header_buffer)); 70 | res = curl_easy_perform(curl); 71 | curl_easy_cleanup(curl); 72 | if(res == CURLE_OK) 73 | ret = 0; 74 | else 75 | sprintf(header_buffer,"%s",curl_errbuf); 76 | failed: 77 | curl_global_cleanup(); 78 | return ret; 79 | } 80 | 81 | int https_doPost(char *url, char *fields, 82 | struct curl_slist *headers, char *cookie, char *response, char *pClientCert, char *pRootCert) 83 | { 84 | //log_debug2("url:%s\ndata:%s\n", (url)?url:"null", (fields)?fields:"null"); 85 | return https_request(1, url, fields, headers, cookie, response, pClientCert, pRootCert); 86 | } 87 | 88 | int https_doGet(char *url, char *fields, 89 | struct curl_slist *headers, char *cookie, char *response, char *pClientCert, char *pRootCert) 90 | { 91 | return https_request(0, url, fields, headers, cookie, response, pClientCert, pRootCert); 92 | } 93 | 94 | int https_getHeader(char *header) 95 | { 96 | return sprintf(header,"%s",header_buffer); 97 | } 98 | -------------------------------------------------------------------------------- /vad_baidu/https_request.h: -------------------------------------------------------------------------------- 1 | 2 | #ifndef HTTPS_REQUEST_HEADER 3 | #define HTTPS_REQUEST_HEADER 4 | #include 5 | 6 | int https_doPost(char *url, char *fields, 7 | struct curl_slist *headers, char *cookie, char *response, char* pClientCert, char* pRootCert); 8 | int https_doGet(char *url, char *fields, 9 | struct curl_slist *headers, char *cookie, char *response, char* pClientCert, char* pRootCert); 10 | int https_getHeader(char *header); 11 | #endif //!HTTPS_REQUEST_HEADER 12 | -------------------------------------------------------------------------------- /vad_baidu/typedef.h: -------------------------------------------------------------------------------- 1 | /* 2 | *=================================================================== 3 | * 3GPP AMR Wideband Floating-point Speech Codec 4 | *=================================================================== 5 | */ 6 | #ifndef typedef_h 7 | #define typedef_h 8 | 9 | /* change these typedef declarations to correspond with your platform */ 10 | typedef char Word8; 11 | typedef unsigned char UWord8; 12 | typedef short Word16; 13 | typedef unsigned short UWord16; 14 | typedef long Word32; 15 | typedef double Float64; 16 | typedef float Float32; 17 | 18 | #endif 19 | -------------------------------------------------------------------------------- /vad_baidu/vadtest.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include "wb_vad.h" 3 | /* Use the newer ALSA API */ 4 | #define ALSA_PCM_NEW_HW_PARAMS_API 5 | #include 6 | #include 7 | 8 | #include 9 | #include "cJSON.h" 10 | #include "http_request.h" 11 | #include "https_request.h" 12 | #include "base64.h" 13 | #define MAX_BUFFER_SIZE 512 14 | #define BUFFER_MAX_LENGTH FRAME_LEN*1024 15 | 16 | int getToken(char* cuid, char* apikey, char* secretkey, char* token) 17 | { 18 | int ret = -1; 19 | 20 | char host[MAX_BUFFER_SIZE]; 21 | char buffer[512]; 22 | 23 | cJSON *root = NULL; 24 | cJSON *item = NULL; 25 | 26 | memset(host,0,sizeof(host)); 27 | memset(buffer,0,sizeof(buffer)); 28 | snprintf(host, sizeof(host), 29 | "https://openapi.baidu.com/oauth/2.0/token?grant_type=client_credentials&client_id=%s&client_secret=%s", 30 | apikey, secretkey); 31 | https_doGet(host, NULL, NULL, NULL, buffer,NULL,NULL); 32 | 33 | root = cJSON_Parse(buffer); 34 | if(root == NULL) 35 | goto failed; 36 | item = cJSON_GetObjectItem(root, "access_token"); 37 | if(item != NULL){ 38 | strcpy(token, item->valuestring); 39 | ret = 0; 40 | } 41 | 42 | failed: 43 | return ret; 44 | } 45 | 46 | int getRecognitionResult(char* cuid, char* token, int rate, char* lan, char* audiodata, int content_len, char* response) 47 | { 48 | char host[MAX_BUFFER_SIZE]; 49 | memset(host, 0, sizeof(host)); 50 | sprintf(host, "%s", "http://vop.baidu.com/server_api"); 51 | 52 | char* encode_data = base64_encode((const char *)audiodata, content_len); 53 | if (0 == strlen(encode_data)) 54 | { 55 | printf("base64 encoded data is empty.\n"); 56 | return -1; 57 | } 58 | 59 | cJSON *root = NULL; 60 | char *body = NULL; 61 | int body_len; 62 | root = cJSON_CreateObject(); 63 | cJSON_AddStringToObject(root, "format", "pcm"); 64 | cJSON_AddNumberToObject(root, "rate", rate); 65 | cJSON_AddStringToObject(root, "lan", lan); 66 | cJSON_AddNumberToObject(root, "channel", 1); 67 | cJSON_AddStringToObject(root, "token", token); 68 | cJSON_AddStringToObject(root, "cuid", cuid); 69 | cJSON_AddNumberToObject(root, "len", content_len); 70 | cJSON_AddStringToObject(root, "speech", encode_data); 71 | body = cJSON_Print(root); 72 | body_len = strlen(body); 73 | cJSON_Delete(root); 74 | root = NULL; 75 | 76 | char tmp[MAX_BUFFER_SIZE]; 77 | memset(tmp, 0, sizeof(tmp)); 78 | struct curl_slist *headerlist = NULL; 79 | sprintf(tmp,"%s","application/json; charset=utf-8"); 80 | headerlist = curl_slist_append(headerlist, tmp); 81 | sprintf(tmp,"Content-Length: %d", body_len); 82 | headerlist = curl_slist_append(headerlist, tmp); 83 | 84 | http_doPost(host,body,headerlist,NULL,response); 85 | curl_slist_free_all(headerlist); 86 | return 0; 87 | } 88 | 89 | 90 | void main(int argc,char* argv[]) 91 | { 92 | char* pcm_device_name="hw:1,0"; 93 | if(argc == 1) 94 | { 95 | printf("use default pcm device: %s\n",pcm_device_name); 96 | } 97 | else if(argc == 2) 98 | { 99 | pcm_device_name= argv[1]; 100 | } 101 | else 102 | { 103 | printf("usage: alsa_vad_yuyin pcm_device\n"); 104 | return; 105 | } 106 | 107 | // for baidu yuyin, change it by your username and password 108 | char *cuid = "5692369"; 109 | char *apikey = "7IAD225W24bOdZwxqzpO9MSR"; 110 | char *secretkey = "vhC88sPg2VASSSYx1EPuNFd3GUuYDbA5"; 111 | char token[64]; 112 | char *lan = "zh"; 113 | int ret; 114 | char response[1024]; 115 | 116 | // for alsa 117 | int size; 118 | int size_one_channel; 119 | snd_pcm_t *handle; 120 | snd_pcm_hw_params_t *params; 121 | int dir; 122 | char *buffer; 123 | char *buffer_one_channel; 124 | int i; 125 | 126 | snd_pcm_stream_t stream = SND_PCM_STREAM_CAPTURE; 127 | snd_pcm_access_t mode = SND_PCM_ACCESS_RW_INTERLEAVED; 128 | snd_pcm_format_t format = SND_PCM_FORMAT_U16_LE; 129 | unsigned int channels = 2; 130 | unsigned int rate = 16000; 131 | snd_pcm_uframes_t frames = FRAME_LEN; 132 | int bit_per_sample,bit_per_frame,chunk_bytes; 133 | 134 | //for vda 135 | float indata[FRAME_LEN]; 136 | short outdata[FRAME_LEN]; 137 | VadVars *vadstate; 138 | int temp,vad; 139 | int recording = -1; 140 | char* audio_buffer; 141 | char* tmp_buffer; 142 | int content_len = 0; 143 | tmp_buffer = audio_buffer =(char*)malloc(BUFFER_MAX_LENGTH); 144 | // alsa init 145 | int rc = snd_pcm_open(&handle, pcm_device_name,stream, 0); 146 | if (rc < 0) { 147 | fprintf(stderr,"unable to open pcm device: %s\n",snd_strerror(rc)); 148 | exit(1); 149 | } 150 | snd_pcm_hw_params_alloca(¶ms); 151 | snd_pcm_hw_params_any(handle, params); 152 | snd_pcm_hw_params_set_access(handle, params,mode); 153 | snd_pcm_hw_params_set_format(handle, params,format); 154 | snd_pcm_hw_params_set_channels(handle, params, channels); 155 | snd_pcm_hw_params_set_rate_near(handle, params,&rate, 0); 156 | snd_pcm_hw_params_set_period_size_near(handle,params, &frames, 0); 157 | rc = snd_pcm_hw_params(handle, params); 158 | if (rc < 0) { 159 | fprintf(stderr,"unable to set hw parameters: %s\n",snd_strerror(rc)); 160 | exit(1); 161 | } 162 | snd_pcm_hw_params_get_period_size(params,&frames, 0); 163 | size = frames * 4; /* 2 bytes/sample, 2 channels */ 164 | size_one_channel = frames * 2; 165 | buffer = (char*) malloc(size); 166 | 167 | //vad init 168 | wb_vad_init(&(vadstate)); 169 | 170 | while (1) { 171 | rc = snd_pcm_readi(handle, buffer, frames); 172 | if (rc == -EPIPE) { 173 | /* EPIPE means overrun */ 174 | fprintf(stderr, "overrun occurred\n"); 175 | snd_pcm_prepare(handle); 176 | } else if (rc < 0) { 177 | fprintf(stderr, "error from read: %s\n",snd_strerror(rc)); 178 | } else if (rc != (int)frames) { 179 | fprintf(stderr, "short read, read %d frames\n", rc); 180 | } 181 | for(i = 0; i< frames; i++) 182 | { 183 | indata[i]=0; 184 | temp = 0; 185 | memcpy(&temp,buffer+4*i,2); 186 | indata[i]=(float)temp; 187 | outdata[i]=temp; 188 | if(indata[i]>65535/2) 189 | indata[i]=indata[i]-65536; 190 | } 191 | if(recording == -1) 192 | { 193 | tmp_buffer= audio_buffer; 194 | content_len = 0; 195 | memset(audio_buffer,0,BUFFER_MAX_LENGTH); 196 | memset(response,0,1024); 197 | } 198 | 199 | vad=wb_vad(vadstate,indata); 200 | if(vad == 1){ 201 | recording = 1; 202 | } 203 | else if(vad == 0 && recording == 1){ 204 | ret = getToken(cuid,apikey,secretkey,token); 205 | ret = getRecognitionResult(cuid, token, rate, lan, audio_buffer, content_len, response); 206 | if(strstr(response,"success") != NULL) 207 | printf("-------%s\n",response); 208 | recording = -1; 209 | } 210 | 211 | if(recording == 1) 212 | { 213 | memcpy(tmp_buffer,outdata,2*FRAME_LEN); 214 | tmp_buffer =tmp_buffer+2*FRAME_LEN; 215 | content_len =content_len+2*FRAME_LEN; 216 | } 217 | 218 | } 219 | free(buffer); 220 | free(audio_buffer); 221 | } 222 | -------------------------------------------------------------------------------- /vad_baidu/wb_vad.c: -------------------------------------------------------------------------------- 1 | /****************************************************************************** 2 | * INCLUDE FILES 3 | ******************************************************************************/ 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include "wb_vad.h" 9 | /****************************************************************************** 10 | * PRIVATE PROGRAM CODE 11 | ******************************************************************************/ 12 | /****************************************************************************** 13 | * 14 | * Function : filter5 15 | * Purpose : Fifth-order half-band lowpass/highpass filter pair with 16 | * decimation. 17 | * 18 | ******************************************************************************* 19 | */ 20 | static void filter5( 21 | float *in0, /* i/o : input values; output low-pass part */ 22 | float *in1, /* i/o : input values; output high-pass part */ 23 | float data[] /* i/o : updated filter memory */ 24 | ) 25 | { 26 | float temp0, temp1, temp2; 27 | temp0 = *in0 - COEFF5_1 * data[0]; 28 | temp1 = data[0] + COEFF5_1 * temp0; 29 | data[0] = temp0; 30 | temp0 = *in1 - COEFF5_2 * data[1]; 31 | temp2 = data[1] + COEFF5_2 * temp0; 32 | data[1] = temp0; 33 | *in0 = (temp1 + temp2)/2.0f; 34 | *in1 = (temp1 - temp2)/2.0f; 35 | } 36 | /****************************************************************************** 37 | * 38 | * Function : filter3 39 | * Purpose : Third-order half-band lowpass/highpass filter pair with 40 | * decimation. 41 | * 42 | ******************************************************************************* 43 | */ 44 | static void filter3( 45 | float *in0, /* i/o : input values; output low-pass part */ 46 | float *in1, /* i/o : input values; output high-pass part */ 47 | float *data /* i/o : updated filter memory */ 48 | ) 49 | { 50 | float temp1, temp2; 51 | temp1 = *in1 - COEFF3 * *data; 52 | temp2 = *data + COEFF3 * temp1; 53 | *data = temp1; 54 | *in1 = (*in0 - temp2)/2.0f; 55 | *in0 = (*in0 + temp2)/2.0f; 56 | } 57 | /****************************************************************************** 58 | * 59 | * Function : level_calculation 60 | * Purpose : Calculate signal level in a sub-band. Level is calculated 61 | * by summing absolute values of the input data. 62 | * 63 | * Because speech coder has a lookahead, signal level calculated 64 | * over the lookahead (data[count1 - count2]) is stored (*sub_level) 65 | * and added to the level of the next frame. Additionally, group 66 | * delay and decimation of the filter bank is taken into the count 67 | * for the values of the counters (count1, count2). 68 | * 69 | ******************************************************************************* 70 | */ 71 | static float level_calculation( /* return: signal level */ 72 | float data[], /* i : signal buffer */ 73 | float *sub_level, /* i : level calculated at the end of the previous frame*/ 74 | /* o : level of signal calculated from the last */ 75 | /* (count2 - count1) samples */ 76 | Word16 count1, /* i : number of samples to be counted */ 77 | Word16 count2, /* i : number of samples to be counted */ 78 | Word16 ind_m, /* i : step size for the index of the data buffer */ 79 | Word16 ind_a, /* i : starting index of the data buffer */ 80 | float scale /* i : scaling for the level calculation */ 81 | ) 82 | { 83 | double l_temp1, l_temp2; 84 | float level; 85 | Word16 i; 86 | l_temp1 = 0.0; 87 | for (i = count1; i < count2; i++) 88 | { 89 | l_temp1 += fabs(data[ind_m*i+ind_a]); 90 | } 91 | l_temp1 *= 2.0f; 92 | l_temp2 = l_temp1 + *sub_level/scale; 93 | *sub_level = (float)(l_temp1*scale); 94 | for (i = 0; i < count1; i++) 95 | { 96 | l_temp2 += 2.0f*fabs(data[ind_m*i+ind_a]); 97 | } 98 | level = (float)(l_temp2*scale); 99 | return level; 100 | } 101 | /****************************************************************************** 102 | * 103 | * Function : filter_bank 104 | * Purpose : Divide input signal into bands and calculate level of 105 | * the signal in each band 106 | * 107 | ******************************************************************************* 108 | */ 109 | static void filter_bank( 110 | VadVars *st, /* i/o : State struct */ 111 | float in[], /* i : input frame */ 112 | float level[] /* 0 : signal levels at each band */ 113 | ) 114 | { 115 | Word16 i; 116 | float tmp_buf[FRAME_LEN]; 117 | /* shift input 1 bit down for safe scaling */ 118 | for (i = 0; i < FRAME_LEN; i++) { 119 | tmp_buf[i] = in[i]/2.0f; 120 | } 121 | /* run the filter bank */ 122 | for (i = 0;i < FRAME_LEN/2; i++) { 123 | filter5(&tmp_buf[2*i],&tmp_buf[2*i+1],st->a_data5[0]); 124 | } 125 | for (i = 0;i < FRAME_LEN/4; i++) { 126 | filter5(&tmp_buf[4*i],&tmp_buf[4*i+2],st->a_data5[1]); 127 | filter5(&tmp_buf[4*i+1],&tmp_buf[4*i+3],st->a_data5[2]); 128 | } 129 | for (i = 0; i < FRAME_LEN/8; i++) 130 | { 131 | filter5(&tmp_buf[8*i], &tmp_buf[8*i+4], st->a_data5[3]); 132 | filter5(&tmp_buf[8*i+2], &tmp_buf[8*i+6], st->a_data5[4]); 133 | filter3(&tmp_buf[8*i+3],&tmp_buf[8*i+7],&st->a_data3[0]); 134 | } 135 | for (i = 0; i < FRAME_LEN/16; i++) 136 | { 137 | filter3(&tmp_buf[16*i+0], &tmp_buf[16*i+8], &st->a_data3[1]); 138 | filter3(&tmp_buf[16*i+4], &tmp_buf[16*i+12], &st->a_data3[2]); 139 | filter3(&tmp_buf[16*i+6], &tmp_buf[16*i+14], &st->a_data3[3]); 140 | } 141 | for (i = 0; i < FRAME_LEN/32; i++) 142 | { 143 | filter3(&tmp_buf[32*i+0], &tmp_buf[32*i+16], &st->a_data3[4]); 144 | filter3(&tmp_buf[32*i+8], &tmp_buf[32*i+24], &st->a_data3[5]); 145 | } 146 | /* calculate levels in each frequency band */ 147 | /* 4800 - 6400 Hz*/ 148 | level[11] = level_calculation(tmp_buf, &st->sub_level[11], 149 | FRAME_LEN/4-48, FRAME_LEN/4, 4, 1, 0.25); 150 | /* 4000 - 4800 Hz*/ 151 | level[10] = level_calculation(tmp_buf, &st->sub_level[10], 152 | FRAME_LEN/8-24, FRAME_LEN/8, 8, 7, 0.5); 153 | /* 3200 - 4000 Hz*/ 154 | level[9] = level_calculation(tmp_buf, &st->sub_level[9], 155 | FRAME_LEN/8-24, FRAME_LEN/8, 8, 3, 0.5); 156 | /* 2400 - 3200 Hz*/ 157 | level[8] = level_calculation(tmp_buf, &st->sub_level[8], 158 | FRAME_LEN/8-24, FRAME_LEN/8, 8, 2, 0.5); 159 | /* 2000 - 2400 Hz*/ 160 | level[7] = level_calculation(tmp_buf, &st->sub_level[7], 161 | FRAME_LEN/16-12, FRAME_LEN/16, 16, 14, 1.0); 162 | /* 1600 - 2000 Hz*/ 163 | level[6] = level_calculation(tmp_buf, &st->sub_level[6], 164 | FRAME_LEN/16-12, FRAME_LEN/16, 16, 6, 1.0); 165 | /* 1200 - 1600 Hz*/ 166 | level[5] = level_calculation(tmp_buf, &st->sub_level[5], 167 | FRAME_LEN/16-12, FRAME_LEN/16, 16, 4, 1.0); 168 | /* 800 - 1200 Hz*/ 169 | level[4] = level_calculation(tmp_buf, &st->sub_level[4], 170 | FRAME_LEN/16-12, FRAME_LEN/16, 16, 12, 1.0); 171 | /* 600 - 800 Hz*/ 172 | level[3] = level_calculation(tmp_buf, &st->sub_level[3], 173 | FRAME_LEN/32-6, FRAME_LEN/32, 32, 8, 2.0); 174 | /* 400 - 600 Hz*/ 175 | level[2] = level_calculation(tmp_buf, &st->sub_level[2], 176 | FRAME_LEN/32-6, FRAME_LEN/32, 32, 24, 2.0); 177 | /* 200 - 400 Hz*/ 178 | level[1] = level_calculation(tmp_buf, &st->sub_level[1], 179 | FRAME_LEN/32-6, FRAME_LEN/32, 32, 16, 2.0); 180 | /* 0 - 200 Hz*/ 181 | level[0] = level_calculation(tmp_buf, &st->sub_level[0], 182 | FRAME_LEN/32-6, FRAME_LEN/32, 32, 0, 2.0); 183 | } 184 | /****************************************************************************** 185 | * 186 | * Function : update_cntrl 187 | * Purpose : Control update of the background noise estimate. 188 | * 189 | ******************************************************************************* 190 | */ 191 | static void update_cntrl( 192 | VadVars *st, /* i/o : State structure */ 193 | float level[] /* i : sub-band levels of the input frame */ 194 | ) 195 | { 196 | Word16 i; 197 | float stat_rat; 198 | float num, denom; 199 | float alpha; 200 | /* if fullband pitch or tone have been detected for a while, initialize stat_count */ 201 | if ((st->pitch_tone & 0x7c00) == 0x7c00) 202 | { 203 | st->stat_count = STAT_COUNT; 204 | } 205 | else 206 | { 207 | /* if 8 last vad-decisions have been "0", reinitialize stat_count */ 208 | if ((st->vadreg & 0x7f80) == 0) 209 | { 210 | st->stat_count = STAT_COUNT; 211 | } 212 | else 213 | { 214 | stat_rat = 0; 215 | for (i = 0; i < COMPLEN; i++) 216 | { 217 | if (level[i] > st->ave_level[i]) 218 | { 219 | num = level[i]; 220 | denom = st->ave_level[i]; 221 | } 222 | else 223 | { 224 | num = st->ave_level[i]; 225 | denom = level[i]; 226 | } 227 | /* Limit nimimum value of num and denom to STAT_THR_LEVEL */ 228 | if (num < STAT_THR_LEVEL) 229 | { 230 | num = STAT_THR_LEVEL; 231 | } 232 | if (denom < STAT_THR_LEVEL) 233 | { 234 | denom = STAT_THR_LEVEL; 235 | } 236 | stat_rat += num/denom * 64; 237 | } 238 | /* compare stat_rat with a threshold and update stat_count */ 239 | if (stat_rat > STAT_THR) 240 | { 241 | st->stat_count = STAT_COUNT; 242 | } 243 | else 244 | { 245 | if ((st->vadreg & 0x4000) != 0) 246 | { 247 | if (st->stat_count != 0) 248 | { 249 | st->stat_count--; 250 | } 251 | } 252 | } 253 | } 254 | } 255 | /* Update average amplitude estimate for stationarity estimation */ 256 | alpha = ALPHA4; 257 | if (st->stat_count == STAT_COUNT) 258 | { 259 | alpha = 1.0; 260 | } 261 | else if ((st->vadreg & 0x4000) == 0) 262 | { 263 | alpha = ALPHA5; 264 | } 265 | for (i = 0; i < COMPLEN; i++) 266 | { 267 | st->ave_level[i] += alpha *(level[i]- st->ave_level[i]); 268 | } 269 | } 270 | /****************************************************************************** 271 | * 272 | * Function : hangover_addition 273 | * Purpose : Add hangover after speech bursts 274 | * 275 | ******************************************************************************* 276 | */ 277 | static Word16 hangover_addition( /* return: VAD_flag indicating final VAD decision */ 278 | VadVars *st, /* i/o : State structure */ 279 | Word16 low_power, /* i : flag power of the input frame */ 280 | Word16 hang_len, /* i : hangover length */ 281 | Word16 burst_len /* i : minimum burst length for hangover addition */ 282 | ) 283 | { 284 | /* if the input power (pow_sum) is lower than a threshold, clear 285 | counters and set VAD_flag to "0" "fast exit" */ 286 | if (low_power != 0) 287 | { 288 | st->burst_count = 0; 289 | st->hang_count = 0; 290 | return 0; 291 | } 292 | /* update the counters (hang_count, burst_count) */ 293 | if ((st->vadreg & 0x4000) != 0) 294 | { 295 | st->burst_count++; 296 | if (st->burst_count >= burst_len) 297 | { 298 | st->hang_count = hang_len; 299 | } 300 | return 1; 301 | } 302 | else 303 | { 304 | st->burst_count = 0; 305 | if (st->hang_count > 0) 306 | { 307 | st->hang_count--; 308 | return 1; 309 | } 310 | } 311 | return 0; 312 | } 313 | /****************************************************************************** 314 | * 315 | * Function : noise_estimate_update 316 | * Purpose : Update of background noise estimate 317 | * 318 | ******************************************************************************* 319 | */ 320 | static void noise_estimate_update( 321 | VadVars *st, /* i/o : State structure */ 322 | float level[] /* i : sub-band levels of the input frame */ 323 | ) 324 | { 325 | Word16 i; 326 | float alpha_up, alpha_down, bckr_add, temp; 327 | /* Control update of bckr_est[] */ 328 | update_cntrl(st, level); 329 | /* Choose update speed */ 330 | bckr_add = 2.0; 331 | if ((0x7800 & st->vadreg) == 0) 332 | { 333 | alpha_up = ALPHA_UP1; 334 | alpha_down = ALPHA_DOWN1; 335 | } 336 | else 337 | { 338 | if (st->stat_count == 0) 339 | { 340 | alpha_up = ALPHA_UP2; 341 | alpha_down = ALPHA_DOWN2; 342 | } 343 | else 344 | { 345 | alpha_up = 0.0; 346 | alpha_down = ALPHA3; 347 | bckr_add = 0.0; 348 | } 349 | } 350 | /* Update noise estimate (bckr_est) */ 351 | for (i = 0; i < COMPLEN; i++) 352 | { 353 | temp = st->old_level[i] - st->bckr_est[i]; 354 | if (temp < 0.0) 355 | { /* update downwards*/ 356 | st->bckr_est[i] += -2 + (alpha_down * temp); 357 | /* limit minimum value of the noise estimate to NOISE_MIN */ 358 | if (st->bckr_est[i] < NOISE_MIN) 359 | { 360 | st->bckr_est[i] = NOISE_MIN; 361 | } 362 | } 363 | else 364 | { /* update upwards */ 365 | st->bckr_est[i] += bckr_add +(alpha_up * temp); 366 | /* limit maximum value of the noise estimate to NOISE_MAX */ 367 | if (st->bckr_est[i] > NOISE_MAX) 368 | { 369 | st->bckr_est[i] = NOISE_MAX; 370 | } 371 | } 372 | } 373 | /* Update signal levels of the previous frame (old_level) */ 374 | for(i = 0; i < COMPLEN; i++) 375 | { 376 | st->old_level[i] = level[i]; 377 | } 378 | } 379 | /****************************************************************************** 380 | * 381 | * Function : vad_decision 382 | * Purpose : Calculates VAD_flag 383 | * 384 | ******************************************************************************* 385 | */ 386 | static Word16 vad_decision( /*return value : VAD_flag */ 387 | VadVars *st, /* i/o : State structure */ 388 | float level[COMPLEN], /* i : sub-band levels of the input frame */ 389 | double pow_sum /* i : power of the input frame */ 390 | ) 391 | { 392 | Word16 i; 393 | double L_snr_sum; 394 | double L_temp; 395 | float vad_thr, temp, noise_level; 396 | Word16 low_power_flag; 397 | Word16 hang_len,burst_len; 398 | float ilog2_speech_level,ilog2_noise_level; 399 | float temp2; 400 | /* 401 | Calculate squared sum of the input levels (level) 402 | divided by the background noise components (bckr_est). 403 | */ 404 | L_snr_sum = 0.0; 405 | for (i = 0; i < COMPLEN; i++) 406 | { 407 | temp = level[i]/st->bckr_est[i]; 408 | L_snr_sum += temp * temp; 409 | } 410 | /* Calculate average level of estimated background noise */ 411 | L_temp = 0.0; 412 | for (i = 1; i < COMPLEN; i++) /* ignore lowest band */ 413 | { 414 | L_temp += st->bckr_est[i]; 415 | } 416 | noise_level = (float)(L_temp/16.0f); 417 | /* 418 | if SNR is lower than a threshold (MIN_SPEECH_SNR), 419 | and increase speech_level 420 | */ 421 | temp = noise_level*MIN_SPEECH_SNR*8; 422 | if (st->speech_level < temp) { 423 | st->speech_level = temp; 424 | } 425 | ilog2_noise_level = (float)(-1024.0f*log10(noise_level / 2147483648.0f)/log10(2.0f)); 426 | /* 427 | If SNR is very poor, speech_level is probably corrupted by noise level. This 428 | is correctred by subtracting -MIN_SPEECH_SNR*noise_level from speech level 429 | */ 430 | ilog2_speech_level = (float)(-1024.0f*log10((st->speech_level-temp) / 2147483648.0f)/log10(2.0f)); 431 | /*ilog2_speech_level = ilog2(st->speech_level);*/ 432 | temp = NO_SLOPE * (ilog2_noise_level- NO_P1)+ THR_HIGH; 433 | temp2 = SP_CH_MIN + SP_SLOPE*(ilog2_speech_level - SP_P1); 434 | if (temp2 < SP_CH_MIN) { 435 | temp2 = SP_CH_MIN; 436 | } 437 | if (temp2 > SP_CH_MAX) { 438 | temp2 = SP_CH_MAX; 439 | } 440 | vad_thr = temp + temp2; 441 | if (vad_thr < THR_MIN) 442 | { 443 | vad_thr = THR_MIN; 444 | } 445 | /* Shift VAD decision register */ 446 | st->vadreg = (short)((st->vadreg)>>1); 447 | /* Make intermediate VAD decision */ 448 | if (L_snr_sum > (vad_thr*(float)COMPLEN/128.0f)) 449 | { 450 | st->vadreg = (Word16)(st->vadreg | 0x4000); 451 | } 452 | /* primary vad decsion made */ 453 | /* check if the input power (pow_sum) is lower than a threshold" */ 454 | if (pow_sum < VAD_POW_LOW) 455 | { 456 | low_power_flag = 1; 457 | } 458 | else 459 | { 460 | low_power_flag = 0; 461 | } 462 | /* Update speech subband background noise estimates */ 463 | noise_estimate_update(st, level); 464 | hang_len = (Word16)((Word16)(HANG_SLOPE * (vad_thr - HANG_P1) - 0.5) + HANG_HIGH); 465 | if (hang_len < HANG_LOW) { 466 | hang_len = HANG_LOW; 467 | }; 468 | burst_len = (Word16)((Word16)(BURST_SLOPE * (vad_thr - BURST_P1) - 0.5) + BURST_HIGH); 469 | return(hangover_addition(st, low_power_flag, hang_len,burst_len)); 470 | } 471 | /****************************************************************************** 472 | * 473 | * Estimate_Speech() 474 | * Purpose : Estimate speech level 475 | * 476 | * Maximum signal level is searched and stored to the variable sp_max. 477 | * The speech frames must locate within SP_EST_COUNT number of frames to be counted. 478 | * Thus, noisy frames having occasional VAD = "1" decisions will not 479 | * affect to the estimated speech_level. 480 | * 481 | ******************************************************************************* 482 | */ 483 | static void Estimate_Speech( 484 | VadVars *st, /* i/o : State structure */ 485 | float in_level /* level of the input frame */ 486 | ) 487 | { 488 | float alpha, tmp; 489 | /* if the required activity count cannot be achieved, reset counters */ 490 | if (SP_ACTIVITY_COUNT > (SP_EST_COUNT - st->sp_est_cnt + st->sp_max_cnt)) 491 | { 492 | st->sp_est_cnt = 0; 493 | st->sp_max = 0.0; 494 | st->sp_max_cnt = 0; 495 | } 496 | st->sp_est_cnt++; 497 | if (((st->vadreg & 0x4000) || (in_level > st->speech_level)) 498 | && (in_level > MIN_SPEECH_LEVEL1)) 499 | { 500 | if (in_level > st->sp_max) { 501 | st->sp_max = in_level; 502 | } 503 | st->sp_max_cnt++; 504 | if (st->sp_max_cnt >= SP_ACTIVITY_COUNT) { 505 | tmp = st->sp_max/2.0f; /* scale to get "average" speech level*/ 506 | if (tmp > st->speech_level) { 507 | alpha = ALPHA_SP_UP; 508 | } 509 | else { 510 | alpha = ALPHA_SP_DOWN; 511 | } 512 | if (tmp > MIN_SPEECH_LEVEL2) { 513 | st->speech_level += alpha*(tmp - st->speech_level); 514 | } 515 | st->sp_max = 0.0; 516 | st->sp_max_cnt = 0; 517 | st->sp_est_cnt = 0; 518 | } 519 | } 520 | } 521 | /****************************************************************************** 522 | * PUBLIC PROGRAM CODE 523 | ******************************************************************************/ 524 | /****************************************************************************** 525 | * 526 | * Function: wb_vad_init 527 | * Purpose: Allocates state memory and initializes state memory 528 | * 529 | ******************************************************************************* 530 | */ 531 | int wb_vad_init ( /* return: non-zero with error, zero for ok. */ 532 | VadVars **state /* i/o : State structure */ 533 | ) 534 | { 535 | VadVars* s; 536 | if (state == (VadVars **) NULL){ 537 | fprintf(stderr, "vad_init: invalid parameter\n"); 538 | return -1; 539 | } 540 | *state = NULL; 541 | /* allocate memory */ 542 | if ((s = (VadVars *) malloc(sizeof(VadVars))) == NULL){ 543 | fprintf(stderr, "vad_init: can not malloc state structure\n"); 544 | return -1; 545 | } 546 | wb_vad_reset(s); 547 | *state = s; 548 | return 0; 549 | } 550 | /****************************************************************************** 551 | * 552 | * Function: wb_vad_reset 553 | * Purpose: Initializes state memory to zero 554 | * 555 | ******************************************************************************* 556 | ******************************************************************************* 557 | */ 558 | int wb_vad_reset ( /* return: non-zero with error, zero for ok. */ 559 | VadVars *state /* i/o : State structure */ 560 | ) 561 | { 562 | Word16 i, j; 563 | if (state == (VadVars *) NULL){ 564 | fprintf(stderr, "vad_reset: invalid parameter\n"); 565 | return -1; 566 | } 567 | /* Initialize pitch detection variables */ 568 | state->pitch_tone = 0; 569 | state->vadreg = 0; 570 | state->hang_count = 0; 571 | state->burst_count = 0; 572 | state->hang_count = 0; 573 | /* initialize memory used by the filter bank */ 574 | for (i = 0; i < F_5TH_CNT; i++) 575 | { 576 | for (j = 0; j < 2; j++) 577 | { 578 | state->a_data5[i][j] = 0.0; 579 | } 580 | } 581 | for (i = 0; i < F_3TH_CNT; i++) 582 | { 583 | state->a_data3[i] = 0.0; 584 | } 585 | /* initialize the rest of the memory */ 586 | for (i = 0; i < COMPLEN; i++) 587 | { 588 | state->bckr_est[i] = NOISE_INIT; 589 | state->old_level[i] = NOISE_INIT; 590 | state->ave_level[i] = NOISE_INIT; 591 | state->sub_level[i] = 0; 592 | state->level[i] = 0.0; 593 | state->prevLevel[i] = 0.0; 594 | } 595 | state->sp_est_cnt = 0; 596 | state->sp_max = 0; 597 | state->sp_max_cnt = 0; 598 | state->speech_level = SPEECH_LEVEL_INIT; 599 | state->prev_pow_sum = 0; 600 | return 0; 601 | } 602 | /****************************************************************************** 603 | * 604 | * Function: wb_vad_exit 605 | * Purpose: The memory used for state memory is freed 606 | * 607 | ******************************************************************************* 608 | ******************************************************************************* 609 | */ 610 | void wb_vad_exit ( 611 | VadVars **state /* i/o : State structure */ 612 | ) 613 | { 614 | if (state == NULL || *state == NULL) 615 | return; 616 | /* deallocate memory */ 617 | free(*state); 618 | *state = NULL; 619 | return; 620 | } 621 | /****************************************************************************** 622 | * 623 | * Function : wb_vad_tone_detection 624 | * Purpose : Set tone flag if pitch gain is high. This is used to detect 625 | * signaling tones and other signals with high pitch gain. 626 | * 627 | ******************************************************************************* 628 | */ 629 | void wb_vad_pitch_tone_detection ( 630 | VadVars *st, /* i/o : State struct */ 631 | float p_gain /* pitch gain */ 632 | ) 633 | { 634 | /* update tone flag and pitch flag */ 635 | st->pitch_tone = (Word16)((st->pitch_tone)>>1); 636 | /* if (pitch_gain > TONE_THR) 637 | set tone flag 638 | */ 639 | if (p_gain > TONE_THR) 640 | { 641 | st->pitch_tone = (Word16)(st->pitch_tone | 0x4000); 642 | } 643 | } 644 | /****************************************************************************** 645 | * 646 | * Function : wb_vad 647 | * Purpose : Main program for Voice Activity Detection (VAD) for AMR 648 | * 649 | ******************************************************************************* 650 | */ 651 | Word16 wb_vad( /* Return value : VAD Decision, 1 = speech, 0 = noise */ 652 | VadVars *st, /* i/o : State structure */ 653 | float in_buf[] /* i : samples of the input frame */ 654 | ) 655 | { 656 | Word16 i; 657 | Word16 VAD_flag; 658 | float temp; 659 | double L_temp, pow_sum; 660 | for(i=0;iprevLevel[i] = st->level[i]; 662 | } 663 | /* Calculate power of the input frame. */ 664 | L_temp = 0.0; 665 | for (i = 0; i < FRAME_LEN; i++) 666 | { 667 | L_temp += in_buf[i] * in_buf[i]; 668 | } 669 | L_temp *= 2.0; 670 | /* pow_sum = power of current frame and previous frame */ 671 | pow_sum = L_temp + st->prev_pow_sum; 672 | /* save power of current frame for next call */ 673 | st->prev_pow_sum = L_temp; 674 | /* If input power is very low, clear tone flag */ 675 | if (pow_sum < POW_PITCH_TONE_THR) 676 | { 677 | st->pitch_tone = (Word16)(st->pitch_tone & 0x1fff); 678 | } 679 | /* Run the filter bank and calculate signal levels at each band */ 680 | filter_bank(st, in_buf, st->level); 681 | /* compute VAD decision */ 682 | VAD_flag = vad_decision(st, st->level, pow_sum); 683 | /* Calculate input level */ 684 | L_temp = 0.0; 685 | for (i = 1; i < COMPLEN; i++) /* ignore lowest band */ 686 | { 687 | L_temp += st->level[i]; 688 | } 689 | temp = (float)(L_temp/16.0f); 690 | Estimate_Speech(st, temp); /* Estimate speech level */ 691 | return(VAD_flag); 692 | } 693 | -------------------------------------------------------------------------------- /vad_baidu/wb_vad.h: -------------------------------------------------------------------------------- 1 | #ifndef wb_vad_h 2 | #define wb_vad_h 3 | 4 | /****************************************************************************** 5 | * INCLUDE FILES 6 | ******************************************************************************/ 7 | #include "typedef.h" 8 | #include "wb_vad_c.h" 9 | 10 | /****************************************************************************** 11 | * DEFINITION OF DATA TYPES 12 | ******************************************************************************/ 13 | 14 | typedef struct 15 | { 16 | float bckr_est[COMPLEN]; /* background noise estimate */ 17 | float ave_level[COMPLEN]; /* averaged input components for stationary */ 18 | /* estimation */ 19 | float old_level[COMPLEN]; /* input levels of the previous frame */ 20 | float sub_level[COMPLEN]; /* input levels calculated at the end of a frame (lookahead) */ 21 | float a_data5[F_5TH_CNT][2]; /* memory for the filter bank */ 22 | float a_data3[F_3TH_CNT]; /* memory for the filter bank */ 23 | 24 | Word16 burst_count; /* counts length of a speech burst */ 25 | Word16 hang_count; /* hangover counter */ 26 | Word16 stat_count; /* stationary counter */ 27 | 28 | /* Note that each of the following two variables holds 15 flags. Each flag reserves 1 bit of the variable. The newest flag is 29 | * in the bit 15 (assuming that LSB is bit 1 and MSB is bit 16). */ 30 | Word16 vadreg; /* flags for intermediate VAD decisions */ 31 | Word16 pitch_tone; /* flags for pitch and tone detection */ 32 | 33 | Word16 sp_est_cnt; /* counter for speech level estimation */ 34 | float sp_max; /* maximum level */ 35 | Word16 sp_max_cnt; /* counts frames that contains speech */ 36 | float speech_level; /* estimated speech level */ 37 | double prev_pow_sum; /* power of previous frame */ 38 | 39 | float level[COMPLEN]; 40 | float prevLevel[COMPLEN]; 41 | } VadVars; 42 | 43 | /* 44 | ******************************************************************************** 45 | * DECLARATION OF PROTOTYPES 46 | ******************************************************************************** 47 | */ 48 | int wb_vad_init(VadVars ** st); 49 | int wb_vad_reset(VadVars * st); 50 | void wb_vad_exit(VadVars ** st); 51 | void wb_vad_pitch_tone_detection(VadVars * st, float p_gain); 52 | Word16 wb_vad(VadVars * st, float in_buf[]); 53 | 54 | #endif 55 | -------------------------------------------------------------------------------- /vad_baidu/wb_vad_c.h: -------------------------------------------------------------------------------- 1 | #ifndef wb_vad_c_h 2 | #define wb_vad_c_h 3 | 4 | /* */ 5 | #define FRAME_LEN 256 /* Length (samples) of the input frame */ 6 | #define COMPLEN 12 /* Number of sub-bands used by VAD */ 7 | 8 | #define UNIRSHFT 7 /* = log2(MAX_16/UNITY), UNITY = 256 */ 9 | #define SCALE 128 /* (UNITY*UNITY)/512 */ 10 | 11 | #define TONE_THR 0.65f /* Threshold for tone detection */ 12 | 13 | /* constants for speech level estimation */ 14 | #define SP_EST_COUNT 80 15 | #define SP_ACTIVITY_COUNT 25 16 | #define LOG2_SP_ACTIVITY_COUNT 5 17 | #define ALPHA_SP_UP (1.0f - 0.85f) 18 | #define ALPHA_SP_DOWN (1.0f - 0.85f) 19 | 20 | #define NOM_LEVEL 2050.0f /* about -26 dBov */ 21 | #define SPEECH_LEVEL_INIT NOM_LEVEL 22 | #define MIN_SPEECH_LEVEL1 (NOM_LEVEL * 0.063f) /* NOM_LEVEL -24 dB */ 23 | #define MIN_SPEECH_LEVEL2 (NOM_LEVEL * 0.2f) /* NOM_LEVEL -14 dB */ 24 | #define MIN_SPEECH_SNR 0.125f /* 0 dB, lowest SNR estimation */ 25 | 26 | /* Constants for background spectrum update */ 27 | #define ALPHA_UP1 (1.0f - 0.95f) /* Normal update, upwards: */ 28 | #define ALPHA_DOWN1 (1.0f - 0.936f) /* Normal update, downwards */ 29 | #define ALPHA_UP2 (1.0f - 0.985f) /* Forced update, upwards */ 30 | #define ALPHA_DOWN2 (1.0f - 0.943f) /* Forced update, downwards */ 31 | #define ALPHA3 (1.0f - 0.95f) /* Update downwards */ 32 | #define ALPHA4 (1.0f - 0.9f) /* For stationary estimation */ 33 | #define ALPHA5 (1.0f - 0.5f) /* For stationary estimation */ 34 | 35 | /* Constants for VAD threshold */ 36 | #define THR_MIN (1.6f*SCALE) /* Minimum threshold */ 37 | 38 | #define THR_HIGH (6.0f*SCALE) /* Highest threshold */ 39 | #define THR_LOW (1.7f*SCALE) /* Lowest threshold */ 40 | 41 | #define NO_P1 31744.0f /* ilog2(1), Noise level for highest threshold */ 42 | #define NO_P2 19786.0f /* ilog2(0.1, Noise level for lowest threshold */ 43 | 44 | #define NO_SLOPE ((float)(THR_LOW - THR_HIGH)/(float)(NO_P2 - NO_P1)) 45 | 46 | #define SP_CH_MIN (-0.75f*SCALE) 47 | #define SP_CH_MAX (0.75f*SCALE) 48 | 49 | #define SP_P1 22527.0f /* ilog2(NOM_LEVEL/4) */ 50 | #define SP_P2 17832.0f /* ilog2(NOM_LEVEL*4) */ 51 | 52 | #define SP_SLOPE ((float)(SP_CH_MAX - SP_CH_MIN)/(float)(SP_P2 - SP_P1)) 53 | 54 | /* Constants for hangover length */ 55 | #define HANG_HIGH 12 /* longest hangover */ 56 | #define HANG_LOW 2 /* shortest hangover */ 57 | #define HANG_P1 THR_LOW /* threshold for longest hangover */ 58 | #define HANG_P2 (4*SCALE) /* threshold for shortest hangover */ 59 | #define HANG_SLOPE ((float)(HANG_LOW-HANG_HIGH)/(float)(HANG_P2-HANG_P1)) 60 | 61 | /* Constants for burst length */ 62 | #define BURST_HIGH 8 /* longest burst length */ 63 | #define BURST_LOW 3 /* shortest burst length */ 64 | #define BURST_P1 THR_HIGH /* threshold for longest burst */ 65 | #define BURST_P2 THR_LOW /* threshold for shortest burst */ 66 | #define BURST_SLOPE ((float)(BURST_LOW-BURST_HIGH)/(float)(BURST_P2-BURST_P1)) 67 | 68 | /* Parameters for background spectrum recovery function */ 69 | #define STAT_COUNT 20 /* threshold of stationary detection counter */ 70 | 71 | #define STAT_THR_LEVEL 184 /* Threshold level for stationarity detection */ 72 | #define STAT_THR 1000 /* Threshold for stationarity detection */ 73 | 74 | /* Limits for background noise estimate */ 75 | 76 | #define NOISE_MIN 40 /* minimum */ 77 | #define NOISE_MAX 20000 /* maximum */ 78 | #define NOISE_INIT 150 /* initial */ 79 | 80 | /* Thresholds for signal power (now calculated on 2 frames) */ 81 | #define VAD_POW_LOW 30000.0f /* If input power is lower than this, VAD is set to 0 */ 82 | #define POW_PITCH_TONE_THR 686080.0f /* If input power is lower, pitch */ 83 | /* detection is ignored */ 84 | 85 | /* Constants for the filter bank */ 86 | #define COEFF3 0.407806f /* coefficient for the 3rd order filter */ 87 | #define COEFF5_1 0.670013f /* 1st coefficient the for 5th order filter */ 88 | #define COEFF5_2 0.195007f /* 2nd coefficient the for 5th order filter */ 89 | #define F_5TH_CNT 5 /* number of 5th order filters */ 90 | #define F_3TH_CNT 6 /* number of 3th order filters */ 91 | 92 | #endif 93 | 94 | --------------------------------------------------------------------------------