├── LICENSE ├── Makefile ├── README.md ├── asr-html ├── index.html └── res │ ├── audiodisplay.js │ ├── main.js │ ├── mic128.png │ ├── recorder.js │ └── recorderWorker.js ├── configure └── src ├── Decoder.h ├── FcgiDecodingApp.cc ├── FcgiDecodingApp.h ├── Makefile ├── Nnet3LatgenFasterDecoder.cc ├── Nnet3LatgenFasterDecoder.h ├── OnlineDecoder.cc ├── OnlineDecoder.h ├── QueryStringParser.cc ├── QueryStringParser.h ├── QueryStringParserTests.cc ├── Request.h ├── RequestRawReader.cc ├── RequestRawReader.h ├── Response.cc ├── Response.h ├── ResponseJsonWriter.cc ├── ResponseJsonWriter.h ├── ResponseMultipartJsonWriter.cc ├── ResponseMultipartJsonWriter.h ├── Timing.cc ├── Timing.h └── fcgi-nnet3-decoder.cc /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, and 10 | distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by the copyright 13 | owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all other entities 16 | that control, are controlled by, or are under common control with that entity. 17 | For the purposes of this definition, "control" means (i) the power, direct or 18 | indirect, to cause the direction or management of such entity, whether by 19 | contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the 20 | outstanding shares, or (iii) beneficial ownership of such entity. 21 | 22 | "You" (or "Your") shall mean an individual or Legal Entity exercising 23 | permissions granted by this License. 24 | 25 | "Source" form shall mean the preferred form for making modifications, including 26 | but not limited to software source code, documentation source, and configuration 27 | files. 28 | 29 | "Object" form shall mean any form resulting from mechanical transformation or 30 | translation of a Source form, including but not limited to compiled object code, 31 | generated documentation, and conversions to other media types. 32 | 33 | "Work" shall mean the work of authorship, whether in Source or Object form, made 34 | available under the License, as indicated by a copyright notice that is included 35 | in or attached to the work (an example is provided in the Appendix below). 36 | 37 | "Derivative Works" shall mean any work, whether in Source or Object form, that 38 | is based on (or derived from) the Work and for which the editorial revisions, 39 | annotations, elaborations, or other modifications represent, as a whole, an 40 | original work of authorship. For the purposes of this License, Derivative Works 41 | shall not include works that remain separable from, or merely link (or bind by 42 | name) to the interfaces of, the Work and Derivative Works thereof. 43 | 44 | "Contribution" shall mean any work of authorship, including the original version 45 | of the Work and any modifications or additions to that Work or Derivative Works 46 | thereof, that is intentionally submitted to Licensor for inclusion in the Work 47 | by the copyright owner or by an individual or Legal Entity authorized to submit 48 | on behalf of the copyright owner. For the purposes of this definition, 49 | "submitted" means any form of electronic, verbal, or written communication sent 50 | to the Licensor or its representatives, including but not limited to 51 | communication on electronic mailing lists, source code control systems, and 52 | issue tracking systems that are managed by, or on behalf of, the Licensor for 53 | the purpose of discussing and improving the Work, but excluding communication 54 | that is conspicuously marked or otherwise designated in writing by the copyright 55 | owner as "Not a Contribution." 56 | 57 | "Contributor" shall mean Licensor and any individual or Legal Entity on behalf 58 | of whom a Contribution has been received by Licensor and subsequently 59 | incorporated within the Work. 60 | 61 | 2. Grant of Copyright License. 62 | 63 | Subject to the terms and conditions of this License, each Contributor hereby 64 | grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, 65 | irrevocable copyright license to reproduce, prepare Derivative Works of, 66 | publicly display, publicly perform, sublicense, and distribute the Work and such 67 | Derivative Works in Source or Object form. 68 | 69 | 3. Grant of Patent License. 70 | 71 | Subject to the terms and conditions of this License, each Contributor hereby 72 | grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, 73 | irrevocable (except as stated in this section) patent license to make, have 74 | made, use, offer to sell, sell, import, and otherwise transfer the Work, where 75 | such license applies only to those patent claims licensable by such Contributor 76 | that are necessarily infringed by their Contribution(s) alone or by combination 77 | of their Contribution(s) with the Work to which such Contribution(s) was 78 | submitted. If You institute patent litigation against any entity (including a 79 | cross-claim or counterclaim in a lawsuit) alleging that the Work or a 80 | Contribution incorporated within the Work constitutes direct or contributory 81 | patent infringement, then any patent licenses granted to You under this License 82 | for that Work shall terminate as of the date such litigation is filed. 83 | 84 | 4. Redistribution. 85 | 86 | You may reproduce and distribute copies of the Work or Derivative Works thereof 87 | in any medium, with or without modifications, and in Source or Object form, 88 | provided that You meet the following conditions: 89 | 90 | You must give any other recipients of the Work or Derivative Works a copy of 91 | this License; and 92 | You must cause any modified files to carry prominent notices stating that You 93 | changed the files; and 94 | You must retain, in the Source form of any Derivative Works that You distribute, 95 | all copyright, patent, trademark, and attribution notices from the Source form 96 | of the Work, excluding those notices that do not pertain to any part of the 97 | Derivative Works; and 98 | If the Work includes a "NOTICE" text file as part of its distribution, then any 99 | Derivative Works that You distribute must include a readable copy of the 100 | attribution notices contained within such NOTICE file, excluding those notices 101 | that do not pertain to any part of the Derivative Works, in at least one of the 102 | following places: within a NOTICE text file distributed as part of the 103 | Derivative Works; within the Source form or documentation, if provided along 104 | with the Derivative Works; or, within a display generated by the Derivative 105 | Works, if and wherever such third-party notices normally appear. The contents of 106 | the NOTICE file are for informational purposes only and do not modify the 107 | License. You may add Your own attribution notices within Derivative Works that 108 | You distribute, alongside or as an addendum to the NOTICE text from the Work, 109 | provided that such additional attribution notices cannot be construed as 110 | modifying the License. 111 | You may add Your own copyright statement to Your modifications and may provide 112 | additional or different license terms and conditions for use, reproduction, or 113 | distribution of Your modifications, or for any such Derivative Works as a whole, 114 | provided Your use, reproduction, and distribution of the Work otherwise complies 115 | with the conditions stated in this License. 116 | 117 | 5. Submission of Contributions. 118 | 119 | Unless You explicitly state otherwise, any Contribution intentionally submitted 120 | for inclusion in the Work by You to the Licensor shall be under the terms and 121 | conditions of this License, without any additional terms or conditions. 122 | Notwithstanding the above, nothing herein shall supersede or modify the terms of 123 | any separate license agreement you may have executed with Licensor regarding 124 | such Contributions. 125 | 126 | 6. Trademarks. 127 | 128 | This License does not grant permission to use the trade names, trademarks, 129 | service marks, or product names of the Licensor, except as required for 130 | reasonable and customary use in describing the origin of the Work and 131 | reproducing the content of the NOTICE file. 132 | 133 | 7. Disclaimer of Warranty. 134 | 135 | Unless required by applicable law or agreed to in writing, Licensor provides the 136 | Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, 137 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, 138 | including, without limitation, any warranties or conditions of TITLE, 139 | NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are 140 | solely responsible for determining the appropriateness of using or 141 | redistributing the Work and assume any risks associated with Your exercise of 142 | permissions under this License. 143 | 144 | 8. Limitation of Liability. 145 | 146 | In no event and under no legal theory, whether in tort (including negligence), 147 | contract, or otherwise, unless required by applicable law (such as deliberate 148 | and grossly negligent acts) or agreed to in writing, shall any Contributor be 149 | liable to You for damages, including any direct, indirect, special, incidental, 150 | or consequential damages of any character arising as a result of this License or 151 | out of the use or inability to use the Work (including but not limited to 152 | damages for loss of goodwill, work stoppage, computer failure or malfunction, or 153 | any and all other commercial damages or losses), even if such Contributor has 154 | been advised of the possibility of such damages. 155 | 156 | 9. Accepting Warranty or Additional Liability. 157 | 158 | While redistributing the Work or Derivative Works thereof, You may choose to 159 | offer, and charge a fee for, acceptance of support, warranty, indemnity, or 160 | other liability obligations and/or rights consistent with this License. However, 161 | in accepting such obligations, You may act only on Your own behalf and on Your 162 | sole responsibility, not on behalf of any other Contributor, and only if You 163 | agree to indemnify, defend, and hold each Contributor harmless for any liability 164 | incurred by, or claims asserted against, such Contributor by reason of your 165 | accepting any such warranty or additional liability. 166 | 167 | END OF TERMS AND CONDITIONS 168 | 169 | APPENDIX: How to apply the Apache License to your work 170 | 171 | To apply the Apache License to your work, attach the following boilerplate 172 | notice, with the fields enclosed by brackets "[]" replaced with your own 173 | identifying information. (Don't include the brackets!) The text should be 174 | enclosed in the appropriate comment syntax for the file format. We also 175 | recommend that a file or class name and description of purpose be included on 176 | the same "printed page" as the copyright notice for easier identification within 177 | third-party archives. 178 | 179 | Copyright [yyyy] [name of copyright owner] 180 | 181 | Licensed under the Apache License, Version 2.0 (the "License"); 182 | you may not use this file except in compliance with the License. 183 | You may obtain a copy of the License at 184 | 185 | http://www.apache.org/licenses/LICENSE-2.0 186 | 187 | Unless required by applicable law or agreed to in writing, software 188 | distributed under the License is distributed on an "AS IS" BASIS, 189 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 190 | See the License for the specific language governing permissions and 191 | limitations under the License. 192 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | all: 2 | $(MAKE) -C src 3 | ln -fs src/fcgi-nnet3-decoder . 4 | clean: 5 | $(MAKE) -C src clean 6 | rm fcgi-nnet3-decoder 7 | test: 8 | $(MAKE) -C src test -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | About 2 | ====== 3 | FastCGI support for [Kaldi](http://kaldi-asr.org/doc/). It allows Kaldi based speech recognition to be used though Apache or Nginx (or any other that support FastCGI) HTTP servers. It also contains simple HTML-based client, that allows testing Kaldi speech recognitionfrom a web page. 4 | 5 | Licence 6 | ------- 7 | Apache 2.0 8 | 9 | Installation guide 10 | ================== 11 | 12 | Summary 13 | ------- 14 | 15 | This guide will help you to download and build your own simple ASR 16 | web-service based on Kaldi ASR code. 17 | 18 | Preparing prerequisites 19 | ----------------------- 20 | 21 | ### Creating a working dir 22 | 23 | Let's create a directory where all data will be downloaded and built. 24 | 25 | mkdir ~/apiai 26 | cd ~/apiai 27 | 28 | You are free to choose any other name and path you wish to, but will 29 | have to keep in mind that your name differs from the name given in the 30 | guide. 31 | 32 | Due to server code is based on Kaldi almost all prerequisites matches 33 | to Kaldi ones. Besides that a FastCGI library is required to communicate 34 | with HTTP server. 35 | 36 | ### Getting Kaldi 37 | 38 | As a first step you have to clone Kaldi source tree available at 39 | : 40 | 41 | git clone https://github.com/kaldi-asr/kaldi 42 | 43 | This command will clone source tree to `kaldi` directory. 44 | To configure and build Kaldi please refer to `kaldi/INSTALL` file. 45 | For detailed information please look for Kaldi official instruction: 46 | 47 | 48 | ### Installing libraries 49 | 50 | There are some extra libraries required. You may install them using 51 | system packet manager. 52 | 53 | In openSuSE you may run: 54 | 55 | $ sudo zypper install FastCGI-devel 56 | 57 | It you have Debian or Ubuntu: 58 | 59 | $ sudo apt-get install libfcgi-dev 60 | 61 | Getting the code 62 | -------------- 63 | 64 | Return to your working directory where you put Kaldi sources 65 | 66 | $ cd ~/apiai 67 | 68 | and then clone server source code 69 | 70 | $ git clone https://github.com/api-ai/asr-server asr-server 71 | 72 | It is recommended to checkout code to the same directory where 73 | kaldi-apiai is located to allow `configure` tool to detect Kaldi 74 | location automatically. 75 | 76 | Building the app 77 | -------------- 78 | 79 | $ cd asr-server 80 | 81 | Before running a make process you have to configure build scripts 82 | by running a special utility: 83 | 84 | $ ./configure 85 | 86 | It will check that all required libraries installed to your system and 87 | also will look for Kaldi libraries in `../kaldi` folder. If you 88 | have Kaldi installed somewhere else you may explicitly pass the 89 | path via --kaldi-root option: 90 | 91 | $ ./configure --kaldi-root= 92 | 93 | If configuration process has finished successfully you may begin 94 | the building process by running make script: 95 | 96 | $ make 97 | 98 | Getting a recognition model 99 | ------------------------ 100 | 101 | When application build complete you need to download language specific 102 | data. 103 | 104 | Return to your working directory where you put Kaldi sources 105 | 106 | $ cd ~/apiai 107 | 108 | Builded ASR application uses a Kaldi nnet3 models, which you can get 109 | by training a neural network with your personal data set or use a 110 | pretrained network provided by us. Currently it is only English model available 111 | at . 112 | 113 | $ wget https://github.com/api-ai/api-ai-english-asr-model/releases/download/1.0/api.ai-kaldi-asr-model.zip 114 | 115 | Unzip the archive to `asr-server` directory. 116 | 117 | $ unzip api.ai-kaldi-asr-model.zip 118 | 119 | Running the app 120 | -------------- 121 | 122 | Set the model directory as a working dir: 123 | 124 | $ cd api.ai-kaldi-asr-model 125 | 126 | There are several ways available to run application. The first one is 127 | to run it as a standalone app listening on socket defined with 128 | `--fcgi-socket` option: 129 | 130 | $ ../asr-server/fcgi-nnet3-decoder --fcgi-socket=:8000 131 | 132 | This command runs application listening on any IP address and port 8000. 133 | You are also free to define a path Unix socket, or explicit IP 134 | address (in a A.B.C.D:PORT form). 135 | 136 | As an alternative way you may use special spawn-fcgi utility: 137 | 138 | $ spawn-fcgi -n -p 8000 -- ../asr-server/fcgi-nnet3-decoder 139 | 140 | Configuring HTTP service 141 | --------------------- 142 | 143 | You may use any web-server which have FastCGI support: Apache, Nginx, Lighttpd etc. 144 | 145 | ### Installing Apache2 146 | 147 | openSuSE: 148 | 149 | $ sudo zypper in apache2 150 | 151 | Debian and Ubuntu: 152 | 153 | $ sudo apt-get install apache2 154 | 155 | ### Configuring Apache2 156 | 157 | Enable FastCGI proxy module with `a2enmod`: 158 | 159 | $ sudo a2enmod proxy_fcgi 160 | 161 | Then you have to add to Apache2 configuration file following line: 162 | 163 | ProxyPass "/asr" "fcgi://localhost:8000/" 164 | 165 | If your Apache configured to include all .conf files from /etc/apache2/conf.d folder you may 166 | create separate asr_proxy.conf file with following content: 167 | 168 | ProxyPass "/asr" "fcgi://localhost:8000/" 169 | Alias /asr-html/ "/home/username/apiai/asr-server/asr-html/" 170 | 171 | Options Indexes MultiViews 172 | AllowOverride None 173 | Require all granted 174 | 175 | 176 | Now restart Apache: 177 | 178 | $ sudo /etc/init.d/apache2 restart 179 | 180 | ### Installing Nginx 181 | 182 | You can download latest sources from official website and build Nginx 183 | with yourself or use your system package manager. 184 | 185 | openSuSE: 186 | 187 | $ sudo zypper install nginx 188 | 189 | Debian and Ubuntu: 190 | 191 | $ sudo apt-get install nginx 192 | 193 | ### Configuring Nginx 194 | 195 | Open nginx.conf and write down the following code: 196 | 197 | http { 198 | server { 199 | location /asr { 200 | fastcgi_pass 127.0.0.1:8000; 201 | # Disabling this option invokes immediate sending replies to client 202 | fastcgi_buffering off; 203 | # Disabling this option invokes immediate decoding incoming audio data 204 | fastcgi_request_buffering off; 205 | include fastcgi_params; 206 | } 207 | 208 | location /asr-html { 209 | root /home/username/apiai/asr-server/; 210 | index index.html; 211 | } 212 | } 213 | } 214 | 215 | This will setup Nginx to pass all requests coming to url /asr directly 216 | to ASR service listening 8000 port via FastCGI gate. For detailed 217 | information please please refer to nginx documentation 218 | (e.g. ) 219 | 220 | Speech Recognition 221 | ---------------- 222 | 223 | Server accepts raw mono 16-bits 16 KHz PCM data. You can convert your audio 224 | using any popular encoding utilities, for instance, you can use ffmpeg: 225 | 226 | $ ffmpeg -i audio.wav -f s16le -ar 16000 -ac 1 audio.raw 227 | 228 | ### Recognition using web browser 229 | 230 | There is a simple JS implementation that allows you to recognize speech using system mic. 231 | Open in your browser: 232 | 233 | http://localhost/asr-html/ 234 | 235 | and follow the instructions on the page. 236 | 237 | ### Recognition from command line using curl 238 | 239 | Now, let’s recognize `audio.raw` by calling web-service with `curl` 240 | utility: 241 | 242 | $ curl -H "Content-Type: application/octet-stream" --data-binary @audio.raw http://localhost/asr 243 | 244 | On successfull recognition the command will return something like this: 245 | 246 | { 247 | "status":"ok", 248 | "data":[{"confidence":0.900359,"text":"HELLO WORLD"}] 249 | } 250 | 251 | On error the return value will be like this: 252 | 253 | {"status":"error","data":[{"text":"Failed to decode"}]} 254 | 255 | ### Recognition request parameters 256 | 257 | There are several parameters to tune up recognition process. All parameters are expected to be passed via query string as web-form fields enumeration (e.g. `?name1=value1&name2=value2`). 258 | 259 | 260 | 261 | 262 | 263 | 264 | 265 | 266 | 267 | 268 | 277 | 278 | 279 | 280 | 281 | 282 | 295 | 296 | 297 | 298 | 299 | 300 | 318 | 319 | 320 | 321 | 322 | 323 | 356 | 357 | 358 | 359 |
ParameterDescriptionAcceptable valuesDefault value
nbestSet the number of possible returned values 269 |
{
270 | 	"status":"ok",
271 | 	"data":[
272 | 		{"confidence":0.900359,"text":"HELLO WORLD"},
273 | 		{"confidence":0.89012,"text":"HELLO WORD"}
274 | 	]
275 | }
276 |
1-101
endofspeechEnable or disable end-of-speech points during recognition. If endpoint 283 | detected all then current result have returned and the rest data would 284 | be skipped. Also in case of interrupted recognition 2 fields would be added 285 | to response: "interrupted" with value "endofspeech", and "time" with time point 286 | showing the number of milliseconds have been processed. 287 | 288 |
{
289 | 	"status":"ok",
290 | 	"data":[{"confidence":0.900359,"text":"HELLO WORLD"}],
291 | 	"interrupted":"endofspeech",
292 | 	"time":3800
293 | }
294 |
true or falsetrue
intermediateSet time interval in milliseconds between intermediate results while 301 | recognition being in progress. 302 | 303 | The result returned as an simple sequence of JSON documents. 304 | Each intermediate document have "status" field set to "intermediate", 305 | last one will have "status" set to "ok". 306 |

307 | {"status":"intermediate","data":[
308 | 	{"confidence":0.908981,"text":"HELLO"}
309 | ]}
310 | {"status":"intermediate","data":[
311 | 	{"confidence":0.903025,"text":"HELLO WORLD"}
312 | ]}
313 | {"status":"ok","data":[
314 | 	{"confidence":0.903025,"text":"HELLO WORLD"}
315 | ]}
316 | 
317 |
>5000
multipartIf enabled the result would be returned as an 324 | HTTP multipart response with "content-type" 325 | set to "multipart/x-mixed-replace" and each response part 326 | has "Content-Disposition" header value equal to "form-data". 327 | Intermediate parts named as "partial" and a final part is named as "result". 328 |

329 | --ResponseBoundary
330 | Content-Disposition: form-data; name="partial"
331 | Content-type: application/json
332 | 
333 | {"status":"intermediate","data":[
334 | 	{"confidence":0.908981,"text":"HELLO"}
335 | ]}
336 | 
337 | --ResponseBoundary
338 | Content-Disposition: form-data; name="partial"
339 | Content-type: application/json
340 | 
341 | {"status":"intermediate","data":[
342 | 	{"confidence":0.903025,"text":"HELLO WORLD"}
343 | ]}
344 | 
345 | --ResponseBoundary
346 | Content-Disposition: form-data; name="result"
347 | Content-type: application/json
348 | 
349 | {"status":"ok","data":[
350 | 	{"confidence":0.903025,"text":"HELLO WORLD"}
351 | ]}
352 | 
353 | --ResponseBoundary--
354 | 
355 |
true or falsefalse
360 | -------------------------------------------------------------------------------- /asr-html/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | ASR Test console 5 | 6 | 7 | 8 | 50 | 51 | 52 |
53 | 54 | 55 |
56 |
57 | 66 | 67 |
68 | 69 | 70 | 71 | -------------------------------------------------------------------------------- /asr-html/res/audiodisplay.js: -------------------------------------------------------------------------------- 1 | function drawBuffer( width, height, context, data ) { 2 | var step = Math.ceil( data.length / width ); 3 | var amp = height / 2; 4 | context.fillStyle = "silver"; 5 | context.clearRect(0,0,width,height); 6 | for(var i=0; i < width; i++){ 7 | var min = 1.0; 8 | var max = -1.0; 9 | for (j=0; j max) 14 | max = datum; 15 | } 16 | context.fillRect(i,(1+min)*amp,1,Math.max(1,(max-min)*amp)); 17 | } 18 | } 19 | -------------------------------------------------------------------------------- /asr-html/res/main.js: -------------------------------------------------------------------------------- 1 | /* Copyright 2013 Chris Wilson 2 | 2016 Api.ai (author: Ilya Platonov) 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | 17 | window.AudioContext = window.AudioContext || window.webkitAudioContext; 18 | 19 | var URL = "/asr" 20 | 21 | var audioContext = new AudioContext(); 22 | var audioInput = null, 23 | realAudioInput = null, 24 | inputPoint = null, 25 | audioRecorder = null; 26 | var rafID = null; 27 | var analyserContext = null; 28 | var canvasWidth, canvasHeight; 29 | var recIndex = 0; 30 | 31 | 32 | 33 | 34 | function sendBlob(blob) { 35 | var request = new XMLHttpRequest(); 36 | 37 | request.open("POST", URL); 38 | 39 | updateStatus("Sending data to " + URL) 40 | request.onreadystatechange=function() { 41 | updateStatus(request.responseText); 42 | } 43 | request.send(blob); 44 | } 45 | 46 | function updateStatus(status) { 47 | var statusP = document.getElementById( "status" ); 48 | statusP.innerHTML = status; 49 | } 50 | /* TODO: 51 | 52 | - offer mono option 53 | - "Monitor input" switch 54 | */ 55 | 56 | function saveAudio() { 57 | audioRecorder.exportWAV( doneEncoding ); 58 | // could get mono instead by saying 59 | // audioRecorder.exportMonoWAV( doneEncoding ); 60 | } 61 | 62 | 63 | function gotBuffers( buffers ) { 64 | var canvas = document.getElementById( "wavedisplay" ); 65 | 66 | drawBuffer( canvas.width, canvas.height, canvas.getContext('2d'), buffers[0] ); 67 | 68 | // the ONLY time gotBuffers is called is right after a new recording is completed - 69 | // so here's where we should set up the download. 70 | audioRecorder.exportMonoWAV( sendBlob ); 71 | //apiaiSend(); 72 | } 73 | 74 | 75 | function toggleRecording( e ) { 76 | if (e.classList.contains("recording")) { 77 | // stop recording 78 | audioRecorder.stop(); 79 | e.classList.remove("recording"); 80 | audioRecorder.getBuffers( gotBuffers ); 81 | 82 | } else { 83 | // start recording 84 | if (!audioRecorder) 85 | return; 86 | e.classList.add("recording"); 87 | audioRecorder.clear(); 88 | audioRecorder.record(); 89 | } 90 | } 91 | 92 | function convertToMono( input ) { 93 | var splitter = audioContext.createChannelSplitter(2); 94 | var merger = audioContext.createChannelMerger(2); 95 | 96 | input.connect( splitter ); 97 | splitter.connect( merger, 0, 0 ); 98 | splitter.connect( merger, 0, 1 ); 99 | return merger; 100 | } 101 | 102 | function cancelAnalyserUpdates() { 103 | window.cancelAnimationFrame( rafID ); 104 | rafID = null; 105 | } 106 | 107 | function updateAnalysers(time) { 108 | if (!analyserContext) { 109 | var canvas = document.getElementById("analyser"); 110 | canvasWidth = canvas.width; 111 | canvasHeight = canvas.height; 112 | analyserContext = canvas.getContext('2d'); 113 | } 114 | 115 | // analyzer draw code here 116 | { 117 | var SPACING = 3; 118 | var BAR_WIDTH = 1; 119 | var numBars = Math.round(canvasWidth / SPACING); 120 | var freqByteData = new Uint8Array(analyserNode.frequencyBinCount); 121 | 122 | analyserNode.getByteFrequencyData(freqByteData); 123 | 124 | analyserContext.clearRect(0, 0, canvasWidth, canvasHeight); 125 | analyserContext.fillStyle = '#F6D565'; 126 | analyserContext.lineCap = 'round'; 127 | var multiplier = analyserNode.frequencyBinCount / numBars; 128 | 129 | // Draw rectangle for each frequency bin. 130 | for (var i = 0; i < numBars; ++i) { 131 | var magnitude = 0; 132 | var offset = Math.floor( i * multiplier ); 133 | // gotta sum/average the block, or we miss narrow-bandwidth spikes 134 | for (var j = 0; j< multiplier; j++) 135 | magnitude += freqByteData[offset + j]; 136 | magnitude = magnitude / multiplier; 137 | var magnitude2 = freqByteData[i * multiplier]; 138 | analyserContext.fillStyle = "hsl( " + Math.round((i*360)/numBars) + ", 100%, 50%)"; 139 | analyserContext.fillRect(i * SPACING, canvasHeight, BAR_WIDTH, -magnitude); 140 | } 141 | } 142 | 143 | rafID = window.requestAnimationFrame( updateAnalysers ); 144 | } 145 | 146 | function toggleMono() { 147 | if (audioInput != realAudioInput) { 148 | audioInput.disconnect(); 149 | realAudioInput.disconnect(); 150 | audioInput = realAudioInput; 151 | } else { 152 | realAudioInput.disconnect(); 153 | audioInput = convertToMono( realAudioInput ); 154 | } 155 | 156 | audioInput.connect(inputPoint); 157 | } 158 | 159 | function gotStream(stream) { 160 | inputPoint = audioContext.createGain(); 161 | 162 | // Create an AudioNode from the stream. 163 | realAudioInput = audioContext.createMediaStreamSource(stream); 164 | audioInput = realAudioInput; 165 | audioInput.connect(inputPoint); 166 | 167 | // audioInput = convertToMono( input ); 168 | 169 | analyserNode = audioContext.createAnalyser(); 170 | analyserNode.fftSize = 2048; 171 | inputPoint.connect( analyserNode ); 172 | 173 | audioRecorder = new Recorder( inputPoint ); 174 | 175 | zeroGain = audioContext.createGain(); 176 | zeroGain.gain.value = 0.0; 177 | inputPoint.connect( zeroGain ); 178 | zeroGain.connect( audioContext.destination ); 179 | updateAnalysers(); 180 | } 181 | 182 | function initAudio() { 183 | if (!navigator.getUserMedia) 184 | navigator.getUserMedia = navigator.webkitGetUserMedia || navigator.mozGetUserMedia; 185 | if (!navigator.cancelAnimationFrame) 186 | navigator.cancelAnimationFrame = navigator.webkitCancelAnimationFrame || navigator.mozCancelAnimationFrame; 187 | if (!navigator.requestAnimationFrame) 188 | navigator.requestAnimationFrame = navigator.webkitRequestAnimationFrame || navigator.mozRequestAnimationFrame; 189 | 190 | navigator.getUserMedia( 191 | { 192 | "audio": { 193 | "mandatory": { 194 | "googEchoCancellation": "false", 195 | "googAutoGainControl": "false", 196 | "googNoiseSuppression": "false", 197 | "googHighpassFilter": "false" 198 | }, 199 | "optional": [] 200 | }, 201 | }, gotStream, function(e) { 202 | alert('Error getting audio'); 203 | console.log(e); 204 | }); 205 | } 206 | 207 | window.addEventListener('load', initAudio ); 208 | -------------------------------------------------------------------------------- /asr-html/res/mic128.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dialogflow/asr-server/d551d7a471300360a846009a31852662d0ba7b23/asr-html/res/mic128.png -------------------------------------------------------------------------------- /asr-html/res/recorder.js: -------------------------------------------------------------------------------- 1 | /*License (MIT) 2 | 3 | Copyright © 2013 Matt Diamond 4 | 2016 Api.ai (author: Ilya Platonov) 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated 7 | documentation files (the "Software"), to deal in the Software without restriction, including without limitation 8 | the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and 9 | to permit persons to whom the Software is furnished to do so, subject to the following conditions: 10 | 11 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of 12 | the Software. 13 | 14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO 15 | THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF 17 | CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 18 | DEALINGS IN THE SOFTWARE. 19 | */ 20 | 21 | (function(window){ 22 | 23 | var WORKER_PATH = 'res/recorderWorker.js'; 24 | 25 | var Recorder = function(source, cfg){ 26 | var config = cfg || { }; 27 | var bufferLen = config.bufferLen || 4096; 28 | this.context = source.context; 29 | if(!this.context.createScriptProcessor){ 30 | this.node = this.context.createJavaScriptNode(bufferLen, 2, 2); 31 | } else { 32 | this.node = this.context.createScriptProcessor(bufferLen, 2, 2); 33 | } 34 | 35 | var worker = new Worker(config.workerPath || WORKER_PATH); 36 | worker.postMessage({ 37 | command: 'init', 38 | config: { 39 | sampleRate: this.context.sampleRate 40 | } 41 | }); 42 | var recording = false, 43 | currCallback; 44 | 45 | this.node.onaudioprocess = function(e){ 46 | if (!recording) return; 47 | worker.postMessage({ 48 | command: 'record', 49 | buffer: [ 50 | e.inputBuffer.getChannelData(0), 51 | e.inputBuffer.getChannelData(1) 52 | ] 53 | }); 54 | } 55 | 56 | this.configure = function(cfg){ 57 | for (var prop in cfg){ 58 | if (cfg.hasOwnProperty(prop)){ 59 | config[prop] = cfg[prop]; 60 | } 61 | } 62 | } 63 | 64 | this.record = function(){ 65 | recording = true; 66 | } 67 | 68 | this.stop = function(){ 69 | recording = false; 70 | } 71 | 72 | this.clear = function(){ 73 | worker.postMessage({ command: 'clear' }); 74 | } 75 | 76 | this.getBuffers = function(cb) { 77 | currCallback = cb || config.callback; 78 | worker.postMessage({ command: 'getBuffers' }) 79 | } 80 | 81 | this.exportWAV = function(cb, type){ 82 | currCallback = cb || config.callback; 83 | type = type || config.type || 'audio/wav'; 84 | if (!currCallback) throw new Error('Callback not set'); 85 | worker.postMessage({ 86 | command: 'exportWAV', 87 | type: type 88 | }); 89 | } 90 | 91 | this.exportMonoWAV = function(cb, type){ 92 | currCallback = cb || config.callback; 93 | type = type || config.type || 'audio/wav'; 94 | if (!currCallback) throw new Error('Callback not set'); 95 | worker.postMessage({ 96 | command: 'exportMonoWAV', 97 | type: type 98 | }); 99 | } 100 | 101 | worker.onmessage = function(e){ 102 | var blob = e.data; 103 | currCallback(blob); 104 | } 105 | 106 | source.connect(this.node); 107 | this.node.connect(this.context.destination); // if the script node is not connected to an output the "onaudioprocess" event is not triggered in chrome. 108 | }; 109 | 110 | Recorder.setupDownload = function(blob, filename){ 111 | var url = (window.URL || window.webkitURL).createObjectURL(blob); 112 | var link = document.getElementById("save"); 113 | link.href = url; 114 | link.download = filename || 'output.wav'; 115 | } 116 | 117 | window.Recorder = Recorder; 118 | 119 | })(window); 120 | -------------------------------------------------------------------------------- /asr-html/res/recorderWorker.js: -------------------------------------------------------------------------------- 1 | /*License (MIT) 2 | 3 | Copyright © 2013 Matt Diamond 4 | 2016 Api.ai (author: Ilya Platonov) 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated 7 | documentation files (the "Software"), to deal in the Software without restriction, including without limitation 8 | the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and 9 | to permit persons to whom the Software is furnished to do so, subject to the following conditions: 10 | 11 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of 12 | the Software. 13 | 14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO 15 | THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF 17 | CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 18 | DEALINGS IN THE SOFTWARE. 19 | */ 20 | 21 | var recLength = 0, 22 | recBuffersL = [], 23 | recBuffersR = [], 24 | sampleRate; 25 | 26 | this.onmessage = function(e){ 27 | switch(e.data.command){ 28 | case 'init': 29 | init(e.data.config); 30 | break; 31 | case 'record': 32 | record(e.data.buffer); 33 | break; 34 | case 'exportWAV': 35 | exportWAV(e.data.type); 36 | break; 37 | case 'exportMonoWAV': 38 | exportMonoWAV(e.data.type); 39 | break; 40 | case 'getBuffers': 41 | getBuffers(); 42 | break; 43 | case 'clear': 44 | clear(); 45 | break; 46 | } 47 | }; 48 | 49 | function init(config){ 50 | sampleRate = config.sampleRate; 51 | } 52 | 53 | function record(inputBuffer){ 54 | recBuffersL.push(inputBuffer[0]); 55 | recBuffersR.push(inputBuffer[1]); 56 | recLength += inputBuffer[0].length; 57 | } 58 | 59 | function exportWAV(type){ 60 | var bufferL = mergeBuffers(recBuffersL, recLength); 61 | var bufferR = mergeBuffers(recBuffersR, recLength); 62 | var interleaved = interleave(bufferL, bufferR); 63 | var downsampledBuffer = downsampleBuffer(interleaved, 16000); 64 | var dataview = encodeWAV(downsampledBuffer); 65 | var audioBlob = new Blob([dataview], { type: type }); 66 | 67 | this.postMessage(audioBlob); 68 | } 69 | 70 | function downsampleBuffer(buffer, rate) { 71 | var sampleRate = 44100; 72 | if (rate == sampleRate) { 73 | return buffer; 74 | } 75 | if (rate > sampleRate) { 76 | throw "downsampling rate show be smaller than original sample rate"; 77 | } 78 | var sampleRateRatio = sampleRate / rate; 79 | var newLength = Math.round(buffer.length / sampleRateRatio); 80 | var result = new Float32Array(newLength); 81 | var offsetResult = 0; 82 | var offsetBuffer = 0; 83 | while (offsetResult < result.length) { 84 | var nextOffsetBuffer = Math.round((offsetResult + 1) * sampleRateRatio); 85 | var accum = 0, count = 0; 86 | for (var i = offsetBuffer; i < nextOffsetBuffer && i < buffer.length; i++) { 87 | accum += buffer[i]; 88 | count++; 89 | } 90 | result[offsetResult] = accum / count; 91 | offsetResult++; 92 | offsetBuffer = nextOffsetBuffer; 93 | } 94 | return result; 95 | } 96 | 97 | function exportMonoWAV(type){ 98 | var bufferL = mergeBuffers(recBuffersL, recLength); 99 | //var dataview = encodeWAV(bufferL, true); 100 | var d = downsampleBuffer(bufferL, 16000); 101 | var buffer = new ArrayBuffer(d.length * 2); 102 | var view = new DataView(buffer); 103 | floatTo16BitPCM(view, 0, d); 104 | var audioBlob = new Blob([view], { type: type }); 105 | this.postMessage(audioBlob); 106 | } 107 | 108 | function getBuffers() { 109 | var buffers = []; 110 | buffers.push( mergeBuffers(recBuffersL, recLength) ); 111 | buffers.push( mergeBuffers(recBuffersR, recLength) ); 112 | this.postMessage(buffers); 113 | } 114 | 115 | function clear(){ 116 | recLength = 0; 117 | recBuffersL = []; 118 | recBuffersR = []; 119 | } 120 | 121 | function mergeBuffers(recBuffers, recLength){ 122 | var result = new Float32Array(recLength); 123 | var offset = 0; 124 | for (var i = 0; i < recBuffers.length; i++){ 125 | result.set(recBuffers[i], offset); 126 | offset += recBuffers[i].length; 127 | } 128 | return result; 129 | } 130 | 131 | function interleave(inputL, inputR){ 132 | var length = inputL.length + inputR.length; 133 | var result = new Float32Array(length); 134 | 135 | var index = 0, 136 | inputIndex = 0; 137 | 138 | while (index < length){ 139 | result[index++] = inputL[inputIndex]; 140 | result[index++] = inputR[inputIndex]; 141 | inputIndex++; 142 | } 143 | return result; 144 | } 145 | 146 | function floatTo16BitPCM(output, offset, input){ 147 | for (var i = 0; i < input.length; i++, offset+=2){ 148 | var s = Math.max(-1, Math.min(1, input[i])); 149 | output.setInt16(offset, s < 0 ? s * 0x8000 : s * 0x7FFF, true); 150 | } 151 | } 152 | 153 | function writeString(view, offset, string){ 154 | for (var i = 0; i < string.length; i++){ 155 | view.setUint8(offset + i, string.charCodeAt(i)); 156 | } 157 | } 158 | 159 | function encodeWAV(samples, mono){ 160 | var buffer = new ArrayBuffer(44 + samples.length * 2); 161 | var view = new DataView(buffer); 162 | 163 | /* RIFF identifier */ 164 | writeString(view, 0, 'RIFF'); 165 | /* file length */ 166 | view.setUint32(4, 32 + samples.length * 2, true); 167 | /* RIFF type */ 168 | writeString(view, 8, 'WAVE'); 169 | /* format chunk identifier */ 170 | writeString(view, 12, 'fmt '); 171 | /* format chunk length */ 172 | view.setUint32(16, 16, true); 173 | /* sample format (raw) */ 174 | view.setUint16(20, 1, true); 175 | /* channel count */ 176 | view.setUint16(22, mono?1:2, true); 177 | /* sample rate */ 178 | view.setUint32(24, sampleRate, true); 179 | /* byte rate (sample rate * block align) */ 180 | view.setUint32(28, sampleRate * 4, true); 181 | /* block align (channel count * bytes per sample) */ 182 | view.setUint16(32, 4, true); 183 | /* bits per sample */ 184 | view.setUint16(34, 16, true); 185 | /* data chunk identifier */ 186 | writeString(view, 36, 'data'); 187 | /* data chunk length */ 188 | view.setUint32(40, samples.length * 2, true); 189 | 190 | floatTo16BitPCM(view, 44, samples); 191 | 192 | return view; 193 | } 194 | -------------------------------------------------------------------------------- /configure: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # 3 | # This configure script is hand-generated, not auto-generated. 4 | # It creates the file kaldi.mk, which is %included by the Makefiles 5 | # in the subdirectories. 6 | # The file kaldi.mk is editable by hand-- for example, you may want to 7 | # remove the options -g -O0 -DKALDI_PARANOID, or edit the 8 | # -DKALDI_DOUBLE_PRECISION option (to be 1 not 0), 9 | 10 | 11 | # Example command lines: 12 | # ./configure 13 | # ./configure --shared ## shared libraries. 14 | # ./configure --mkl-root=/opt/intel/mkl 15 | # ./configure --mkl-root=/opt/intel/mkl --threaded-math=yes 16 | # ./configure --mkl-root=/opt/intel/mkl --threaded-math=yes --mkl-threading=tbb 17 | # This is for MKL 11.3 -- which does not seem to provide Intel OMP libs 18 | # ./configure --openblas-root=../tools/OpenBLAS/install # before doing 19 | # # this, cd to ../tools and type "make openblas". Note: 20 | # # this is not working correctly on all platforms, do "make test" 21 | # # and look out for segmentation faults. 22 | # ./configure --atlas-root=../tools/ATLAS/build 23 | 24 | #This should be incremented after every significant change of the configure script 25 | #I.e. after each change that affects the kaldi.mk or the build system as whole 26 | CONFIGURE_VERSION=1 27 | OUTPUT_MK=apiai.mk 28 | INCLUDE_PATHS="/usr/include /usr/local/include" 29 | LIBRARY_PATHS="/usr/lib /usr/local/lib /usr/local/lib64" 30 | 31 | APIAI_CXX_FLAGS= 32 | 33 | function rel2abs { 34 | if [ ! -z "$1" ]; then 35 | case "${1}" in 36 | [./]*) 37 | echo "$(cd ${1%/*}; pwd)/${1##*/}" 38 | ;; 39 | *) 40 | echo "${PWD}/${1}" 41 | ;; 42 | esac 43 | fi 44 | } 45 | 46 | function read_dirname { 47 | local dir_name=`expr "X$1" : '[^=]*=\(.*\)'`; 48 | local retval=`rel2abs $dir_name` 49 | [ -z $retval ] && echo "Bad option '$1': no such directory: $dir_name" >&2 && exit 1; 50 | echo $retval 51 | } 52 | 53 | function is_set { 54 | local myvar=${1:-notset} 55 | if [ "$myvar" == "notset" ]; then 56 | return 1 57 | else 58 | return 0 59 | fi 60 | } 61 | 62 | ## First do some checks. These verify that all the things are 63 | ## here that should be here. 64 | if ! [ -x "$PWD/configure" ]; then 65 | echo 'You must run "configure" from the src/ directory.' 66 | exit 1 67 | fi 68 | 69 | ## Default locations for Kaldi sources. 70 | KALDIROOT=$(rel2abs ../kaldi) 71 | 72 | function usage { 73 | echo 'Usage: ./configure [--kaldi-root=KALDIROOT]'; 74 | } 75 | 76 | cmd_line="$0 $@" # Save the command line to include in kaldi.mk 77 | 78 | while [ $# -gt 0 ]; 79 | do 80 | case "$1" in 81 | --help) 82 | usage; exit 0 ;; 83 | --version) 84 | echo $CONFIGURE_VERSION; exit 0 ;; 85 | --kaldi-root=*) 86 | KALDIROOT=$(read_dirname $1); 87 | shift ;; 88 | *) echo "Unknown argument: $1, exiting"; usage; exit 1 ;; 89 | esac 90 | done 91 | 92 | function failure { 93 | echo "***configure failed: $* ***" >&2 94 | if [ -f kaldi.mk ]; then rm kaldi.mk; fi 95 | exit 1; 96 | } 97 | 98 | function check_exists { 99 | if [ ! -f $1 ]; then failure "$1 not found."; fi 100 | } 101 | 102 | function exit_success { 103 | echo "SUCCESS" 104 | exit 0; 105 | } 106 | 107 | function check_sys_library { 108 | case $(uname -s) in 109 | Darwin) 110 | for file in $1; do 111 | for path in $LIBRARY_PATHS; do 112 | local result="${path}/lib${file}.a" 113 | echo -n "Checking ${result}..." >&2 114 | if [ -f "$result" ]; then 115 | echo "OK" >&2 116 | echo $result 117 | break 2 118 | else 119 | echo "Not found" >&2 120 | fi 121 | done 122 | done 123 | ;; 124 | *) 125 | echo -n "Looking for $1 library: " 126 | local response=$(whereis lib$1) 127 | local libpath=${response##*:} 128 | if [ -z "$libpath" ]; then 129 | echo "Not found" 130 | failure "Library $1 not found" 131 | else 132 | echo $libpath 133 | fi 134 | ;; 135 | esac 136 | } 137 | 138 | function check_header_file { 139 | for file in $1; do 140 | for path in $INCLUDE_PATHS; do 141 | local result="${path}/${file}" 142 | echo -n "Checking ${result}..." >&2 143 | if [ -f "$result" ]; then 144 | echo "OK" >&2 145 | echo $result 146 | break 2 147 | else 148 | echo "Not found" >&2 149 | fi 150 | done 151 | done 152 | } 153 | 154 | echo "Configuring ..." 155 | 156 | echo "Looking for Kaldi sources in \"$KALDIROOT\"..." 157 | check_exists "$KALDIROOT/src/kaldi.mk" 158 | 159 | check_sys_library fcgi 160 | check_sys_library fcgi++ 161 | 162 | FCGIO_H=$(check_header_file "fcgio.h") 163 | if [ -z "$FCGIO_H" ]; then 164 | FCGIO_H=$(check_header_file "fastcgi/fcgio.h") 165 | if [ -z "$FCGIO_H" ]; then 166 | failure "fcgio.h not found" 167 | else 168 | APIAI_CXX_FLAGS="$APIAI_CXX_FLAGS -I$(dirname $FCGIO_H)" 169 | fi 170 | fi 171 | 172 | # back up the old one in case we modified it 173 | if [ -f "$OUTPUT_MK" ]; then 174 | echo "Backing up $OUTPUT_MK to $OUTPUT_MK.bak" 175 | cp $OUTPUT_MK ${OUTPUT_MK}.bak 176 | fi 177 | 178 | printf "# This file was generated using the following command:\n# $cmd_line\n\n" > $OUTPUT_MK 179 | 180 | printf "KALDI_PATH = $KALDIROOT/src\n" >> $OUTPUT_MK 181 | printf "APIAI_CXX_FLAGS = $APIAI_CXX_FLAGS\n" >> $OUTPUT_MK 182 | 183 | exit_success 184 | -------------------------------------------------------------------------------- /src/Decoder.h: -------------------------------------------------------------------------------- 1 | // Decoder.h 2 | 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 10 | // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED 11 | // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, 12 | // MERCHANTABLITY OR NON-INFRINGEMENT. 13 | // See the Apache 2 License for the specific language governing permissions and 14 | // limitations under the License. 15 | 16 | #ifndef APIAI_DECODER_DECODER_H_ 17 | #define APIAI_DECODER_DECODER_H_ 18 | 19 | #include "Request.h" 20 | #include "Response.h" 21 | #include "util/parse-options.h" 22 | 23 | namespace apiai { 24 | 25 | /** 26 | * ASR decoder basic interface 27 | */ 28 | class Decoder { 29 | public: 30 | virtual ~Decoder() {}; 31 | 32 | /** Create decoder clone */ 33 | virtual Decoder *Clone() const = 0; 34 | /** Register options which can be defined via command line arguments */ 35 | virtual void RegisterOptions(kaldi::OptionsItf &po) = 0; 36 | /** Initialize decoder */ 37 | virtual bool Initialize(kaldi::OptionsItf &po) = 0; 38 | /** Perform decoding routine */ 39 | virtual void Decode(Request &request, Response &response) = 0; 40 | }; 41 | 42 | } /* namespace apiai */ 43 | 44 | #endif /* APIAI_DECODER_DECODER_H_ */ 45 | -------------------------------------------------------------------------------- /src/FcgiDecodingApp.cc: -------------------------------------------------------------------------------- 1 | // FcgiDecodingApp.cc 2 | 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 10 | // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED 11 | // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, 12 | // MERCHANTABLITY OR NON-INFRINGEMENT. 13 | // See the Apache 2 License for the specific language governing permissions and 14 | // limitations under the License. 15 | 16 | #include "RequestRawReader.h" 17 | #include "ResponseJsonWriter.h" 18 | #include "ResponseMultipartJsonWriter.h" 19 | #include "FcgiDecodingApp.h" 20 | #include "QueryStringParser.h" 21 | #include 22 | #include 23 | #include 24 | #include 25 | #include 26 | #include 27 | #include 28 | #include 29 | #include 30 | #include 31 | #include 32 | 33 | namespace apiai { 34 | 35 | const std::string PARAMETER_NAME_NBEST = "nbest"; 36 | const std::string PARAMETER_NAME_INTERMEDIATE = "intermediate"; 37 | const std::string PARAMETER_NAME_END_OF_SPEECH = "endofspeech"; 38 | const std::string PARAMETER_MULTIPART = "multipart"; 39 | 40 | class ResponseParams { 41 | public: 42 | bool multipart; 43 | 44 | static bool default_multipart; 45 | static bool default_endofspeech; 46 | 47 | ResponseParams() : multipart(default_multipart) {}; 48 | }; 49 | 50 | // Multipart option default value 51 | bool ResponseParams::default_multipart = false; 52 | 53 | // End-of-speech detection option default value 54 | bool ResponseParams::default_endofspeech = true; 55 | 56 | bool to_bool(std::string &str) { 57 | std::transform(str.begin(), str.end(), str.begin(), ::tolower); 58 | std::istringstream is(str); 59 | bool b; 60 | is >> std::boolalpha >> b; 61 | return b; 62 | } 63 | 64 | bool to_bool(const char *chars) { 65 | std::string str(chars); 66 | return to_bool(str); 67 | } 68 | 69 | void apply_request_parameters(FCGX_Request &request, RequestRawReader &reader, ResponseParams ¶ms) { 70 | char *queryString = FCGX_GetParam("QUERY_STRING", request.envp); 71 | if (queryString) { 72 | QueryStringParser queryStringParser(queryString); 73 | std::string name, value; 74 | while (queryStringParser.Next(&name, &value)) { 75 | if (PARAMETER_NAME_NBEST == name) { 76 | reader.BestCount(atoi(value.data())); 77 | KALDI_VLOG(1) << "Setting n-best: " << reader.BestCount(); 78 | } else if (PARAMETER_NAME_INTERMEDIATE == name) { 79 | reader.IntermediateIntervalMillisec(atoi(value.data())); 80 | KALDI_VLOG(1) << "Setting intermediate interval: " << reader.IntermediateIntervalMillisec() << " ms"; 81 | } else if (PARAMETER_NAME_END_OF_SPEECH == name) { 82 | reader.DoEndpointing(to_bool(value.data())); 83 | KALDI_VLOG(1) << "Setting end-of-speech: " << (reader.DoEndpointing() ? "enabled" : "disabled"); 84 | } else if (PARAMETER_MULTIPART == name) { 85 | params.multipart = to_bool(value.data()); 86 | KALDI_VLOG(1) << "Setting multipart: " << (params.multipart ? "enabled" : "disabled"); 87 | } else { 88 | KALDI_VLOG(1) << "Skipping unknown parameter \"" << name << "\""; 89 | } 90 | } 91 | } 92 | } 93 | 94 | void FcgiDecodingApp::RegisterOptions(kaldi::OptionsItf &po) { 95 | po.Register("fcgi-socket", &fcgi_socket_path_, "FastCGI connection string, if undefined then stdin and stdout will be used"); 96 | po.Register("fcgi-socket.backlog", &fcgi_socket_backlog_, "FastCGI socket backlog size."); 97 | po.Register("fcgi-threads-number", &fcgi_threads_number_, "Number of FastCGI working threads"); 98 | po.Register("fcgi-multipart", &ResponseParams::default_multipart, "Enable or disable multipart responses by default"); 99 | po.Register("fcgi-endofspeech", &ResponseParams::default_endofspeech, "Enable or disable end-of-speech detection by default"); 100 | } 101 | 102 | void *FcgiDecodingApp::RunChildThread(void *arg) { 103 | FcgiDecodingApp *app = (FcgiDecodingApp*)arg; 104 | Decoder *decoder = app->decoder_.Clone(); 105 | app->ProcessingRoutine(*decoder); 106 | delete decoder; 107 | return NULL; 108 | } 109 | 110 | void FcgiDecodingApp::ProcessingRoutine(Decoder &decoder) { 111 | if (socket_id_ < 0) { 112 | KALDI_WARN << "Socket not opened"; 113 | return; 114 | } 115 | 116 | FCGX_Request request; 117 | FCGX_InitRequest(&request, socket_id_, 0); 118 | 119 | while (FCGX_Accept_r(&request) == 0) { 120 | fcgi_streambuf cin_fcgi_streambuf(request.in); 121 | fcgi_streambuf cout_fcgi_streambuf(request.out); 122 | fcgi_streambuf cerr_fcgi_streambuf(request.err); 123 | 124 | std::istream fcgiin(&cin_fcgi_streambuf); 125 | std::ostream fcgiout(&cout_fcgi_streambuf); 126 | std::ostream fcgierr(&cerr_fcgi_streambuf); 127 | 128 | try { 129 | RequestRawReader reader(&fcgiin); 130 | 131 | reader.DoEndpointing(ResponseParams::default_endofspeech); 132 | 133 | ResponseParams params; 134 | apply_request_parameters(request, reader, params); 135 | 136 | std::auto_ptr writer_ptr; 137 | if (params.multipart) { 138 | writer_ptr.reset(new ResponseMultipartJsonWriter(&fcgiout)); 139 | } else { 140 | writer_ptr.reset(new ResponseJsonWriter(&fcgiout)); 141 | } 142 | 143 | fcgiout << "Content-type: "<< writer_ptr.get()->GetContentType() <<"\r\n\r\n"; 144 | 145 | decoder.Decode(reader, *(writer_ptr.get())); 146 | } catch (std::exception &e) { 147 | KALDI_LOG << "Fatal exception: " << e.what(); 148 | } 149 | 150 | FCGX_Finish_r(&request); 151 | } 152 | } 153 | 154 | int FcgiDecodingApp::Run(int argc, char **argv) { 155 | 156 | if (running_) { 157 | KALDI_WARN << "Application already running"; 158 | return 1; 159 | } 160 | running_ = true; 161 | 162 | // Predefined configuration args 163 | const char *extra_args[] = { 164 | "--feature-type=mfcc", 165 | "--mfcc-config=mfcc.conf", 166 | "--frame-subsampling-factor=3", 167 | "--max-active=2000", 168 | "--beam=15.0", 169 | "--lattice-beam=6.0", 170 | "--acoustic-scale=1.0", 171 | "--endpoint.silence-phones=1", 172 | "--endpoint.rule1.min-trailing-silence=0.5", 173 | "--endpoint.rule2.min-trailing-silence=0.15", 174 | "--endpoint.rule3.min-trailing-silence=0.1", 175 | }; 176 | 177 | FCGX_Init(); 178 | 179 | kaldi::ParseOptions po(usage_.data()); 180 | RegisterOptions(po); 181 | decoder_.RegisterOptions(po); 182 | 183 | std::vector args; 184 | args.push_back(argv[0]); 185 | args.insert(args.end(), extra_args, extra_args + sizeof(extra_args) / sizeof(extra_args[0])); 186 | args.insert(args.end(), argv + 1, argv + argc); 187 | po.Read(args.size(), args.data()); 188 | 189 | if (fcgi_threads_number_ < 1) { 190 | KALDI_ERR << "Number of threads should be at least 1, but " << fcgi_threads_number_ << " given"; 191 | } 192 | 193 | if (fcgi_socket_path_.size() > 0) { 194 | socket_id_ = FCGX_OpenSocket(fcgi_socket_path_.data(), fcgi_socket_backlog_); 195 | if (socket_id_ < 0) { 196 | KALDI_WARN << "Error opening socket" << fcgi_socket_path_ << "(backlog: " << fcgi_socket_backlog_ << ")"; 197 | return 1; 198 | } else { 199 | KALDI_LOG << "Listening FastCGI data at \"" << fcgi_socket_path_ << "\""; 200 | } 201 | } else { 202 | KALDI_LOG << "Listening FastCGI data at stdin"; 203 | } 204 | 205 | if (!decoder_.Initialize(po)) { 206 | po.PrintUsage(); 207 | running_ = false; 208 | return 1; 209 | } 210 | 211 | if (fcgi_threads_number_ == 1) { 212 | KALDI_VLOG(1) << "Single thread running"; 213 | ProcessingRoutine(decoder_); 214 | } else { 215 | std::list thread_list; 216 | int errnumber; 217 | 218 | for (int i = 0; i < fcgi_threads_number_; i++) { 219 | pthread_t thread; 220 | if ((errnumber = pthread_create(&thread, NULL, RunChildThread, this)) != 0) { 221 | KALDI_WARN << "Failed to start thread: " << strerror(errnumber); 222 | break; 223 | } else { 224 | thread_list.push_back(thread); 225 | } 226 | } 227 | 228 | KALDI_VLOG(1) << "Threads ready: " << thread_list.size(); 229 | 230 | for (std::list::iterator i = thread_list.begin(); i != thread_list.end(); ++i) { 231 | if ((errnumber = pthread_join(*i, NULL)) != 0) { 232 | KALDI_WARN << "Failed to join thread: " << strerror(errnumber); 233 | } 234 | } 235 | KALDI_VLOG(1) << "Thread finished, threads left: " << thread_list.size(); 236 | } 237 | 238 | running_ = false; 239 | return 0; 240 | } 241 | } /* namespace apiai */ 242 | -------------------------------------------------------------------------------- /src/FcgiDecodingApp.h: -------------------------------------------------------------------------------- 1 | // FcgiDecodingApp.h 2 | 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 10 | // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED 11 | // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, 12 | // MERCHANTABLITY OR NON-INFRINGEMENT. 13 | // See the Apache 2 License for the specific language governing permissions and 14 | // limitations under the License. 15 | 16 | #ifndef APIAI_DECODER_FCGIDECODINGAPP_H_ 17 | #define APIAI_DECODER_FCGIDECODINGAPP_H_ 18 | 19 | #include "Decoder.h" 20 | 21 | namespace apiai { 22 | 23 | /** 24 | * Decoding class with main routine defined. 25 | * Data IO implemented via FastCGI gate. 26 | * Input data expected as raw audio stream 27 | * Output data is JSON encoded objects 28 | */ 29 | class FcgiDecodingApp { 30 | public: 31 | /** Initialize with given decoder */ 32 | FcgiDecodingApp(Decoder &decoder) : decoder_(decoder), 33 | fcgi_threads_number_(1), fcgi_socket_backlog_(0), socket_id_(0), 34 | running_(false) {}; 35 | 36 | /** Get run specifications and allowed arguments list */ 37 | std::string &Usage() { return usage_; } 38 | /** Set run specifications and allowed arguments list */ 39 | void Usage(std::string &usage) { usage_ = usage; } 40 | 41 | /** Run main routine and pass all given arguments */ 42 | int Run(int argn, char **argv); 43 | private: 44 | void RegisterOptions(kaldi::OptionsItf &po); 45 | void ProcessingRoutine(Decoder &decoder); 46 | static void *RunChildThread(void *app); 47 | 48 | Decoder &decoder_; 49 | std::string usage_; 50 | 51 | int fcgi_threads_number_; 52 | std::string fcgi_socket_path_; 53 | int fcgi_socket_backlog_; 54 | int socket_id_; 55 | bool running_; 56 | }; 57 | 58 | } /* namespace apiai */ 59 | 60 | #endif /* APIAI_DECODER_FCGIDECODINGAPP_H_ */ 61 | -------------------------------------------------------------------------------- /src/Makefile: -------------------------------------------------------------------------------- 1 | 2 | all: 3 | 4 | include ../apiai.mk 5 | include $(KALDI_PATH)/kaldi.mk 6 | 7 | LDFLAGS += $(CUDA_LDFLAGS) 8 | LDLIBS += -lfcgi -lfcgi++ $(CUDA_LDLIBS) 9 | EXTRA_CXXFLAGS += -I$(KALDI_PATH) -L$(KALDI_PATH) $(APIAI_CXX_FLAGS) 10 | 11 | OBJFILES = Timing.o Response.o RequestRawReader.o ResponseJsonWriter.o ResponseMultipartJsonWriter.o OnlineDecoder.o Nnet3LatgenFasterDecoder.o QueryStringParser.o FcgiDecodingApp.o 12 | 13 | LIBNAME = libstidecoder 14 | 15 | BINFILES = fcgi-nnet3-decoder 16 | 17 | TESTFILES = QueryStringParserTests 18 | 19 | ADDLIBS = $(KALDI_PATH)/online2/kaldi-online2.a $(KALDI_PATH)/ivector/kaldi-ivector.a \ 20 | $(KALDI_PATH)/nnet2/kaldi-nnet2.a $(KALDI_PATH)/nnet3/kaldi-nnet3.a $(KALDI_PATH)/lat/kaldi-lat.a \ 21 | $(KALDI_PATH)/decoder/kaldi-decoder.a $(KALDI_PATH)/cudamatrix/kaldi-cudamatrix.a \ 22 | $(KALDI_PATH)/feat/kaldi-feat.a $(KALDI_PATH)/transform/kaldi-transform.a $(KALDI_PATH)/gmm/kaldi-gmm.a \ 23 | $(KALDI_PATH)/hmm/kaldi-hmm.a $(KALDI_PATH)/tree/kaldi-tree.a \ 24 | $(KALDI_PATH)/matrix/kaldi-matrix.a $(KALDI_PATH)/fstext/kaldi-fstext.a \ 25 | $(KALDI_PATH)/util/kaldi-util.a $(KALDI_PATH)/base/kaldi-base.a 26 | 27 | include $(KALDI_PATH)/makefiles/default_rules.mk 28 | -------------------------------------------------------------------------------- /src/Nnet3LatgenFasterDecoder.cc: -------------------------------------------------------------------------------- 1 | // Nnet3LatgenFasterDecoder.cc 2 | 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 10 | // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED 11 | // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, 12 | // MERCHANTABLITY OR NON-INFRINGEMENT. 13 | // See the Apache 2 License for the specific language governing permissions and 14 | // limitations under the License. 15 | 16 | #include "Nnet3LatgenFasterDecoder.h" 17 | 18 | namespace apiai { 19 | 20 | Nnet3LatgenFasterDecoder::Nnet3LatgenFasterDecoder() { 21 | online_ = true; 22 | decode_fst_ = NULL; 23 | trans_model_ = NULL; 24 | nnet_ = NULL; 25 | decodable_info_ = NULL; 26 | feature_info_ = NULL; 27 | nnet3_rxfilename_ = "final.mdl"; 28 | } 29 | 30 | Nnet3LatgenFasterDecoder::~Nnet3LatgenFasterDecoder() { 31 | delete decode_fst_; 32 | delete trans_model_; 33 | delete nnet_; 34 | delete decodable_info_; 35 | delete feature_info_; 36 | } 37 | 38 | Nnet3LatgenFasterDecoder *Nnet3LatgenFasterDecoder::Clone() const { 39 | return new Nnet3LatgenFasterDecoder(*this); 40 | } 41 | 42 | void Nnet3LatgenFasterDecoder::RegisterOptions(kaldi::OptionsItf &po) { 43 | OnlineDecoder::RegisterOptions(po); 44 | 45 | po.Register("nnet-in", &nnet3_rxfilename_, 46 | "Path to nnet"); 47 | po.Register("online", &online_, 48 | "You can set this to false to disable online iVector estimation " 49 | "and have all the data for each utterance used, even at " 50 | "utterance start. This is useful where you just want the best " 51 | "results and don't care about online operation. Setting this to " 52 | "false has the same effect as setting " 53 | "--use-most-recent-ivector=true and --greedy-ivector-extractor=true " 54 | "in the file given to --ivector-extraction-config, and " 55 | "--chunk-length=-1."); 56 | 57 | feature_config_.Register(&po); 58 | decoder_opts_.Register(&po); 59 | decodable_opts_.Register(&po); 60 | endpoint_config_.Register(&po); 61 | } 62 | 63 | bool Nnet3LatgenFasterDecoder::Initialize(kaldi::OptionsItf &po) { 64 | if (!OnlineDecoder::Initialize(po)) { 65 | return false; 66 | } 67 | 68 | if (fst_rxfilename_ == "") { 69 | return false; 70 | } 71 | 72 | if (nnet3_rxfilename_ == "") { 73 | return false; 74 | } 75 | 76 | feature_info_ = new kaldi::OnlineNnet2FeaturePipelineInfo(feature_config_); 77 | 78 | if (!online_) { 79 | feature_info_->ivector_extractor_info.use_most_recent_ivector = true; 80 | feature_info_->ivector_extractor_info.greedy_ivector_extractor = true; 81 | chunk_length_secs_ = -1.0; 82 | } 83 | 84 | trans_model_ = new kaldi::TransitionModel(); 85 | nnet_ = new kaldi::nnet3::AmNnetSimple(); 86 | { 87 | bool binary; 88 | kaldi::Input ki(nnet3_rxfilename_, &binary); 89 | trans_model_->Read(ki.Stream(), binary); 90 | nnet_->Read(ki.Stream(), binary); 91 | } 92 | 93 | // this object contains precomputed stuff that is used by all decodable 94 | // objects. It takes a pointer to nnet_ because if it has iVectors it has 95 | // to modify the nnet to accept iVectors at intervals. 96 | decodable_info_ = new kaldi::nnet3::DecodableNnetSimpleLoopedInfo( 97 | decodable_opts_, nnet_); 98 | 99 | decode_fst_ = fst::ReadFstKaldiGeneric(fst_rxfilename_); 100 | 101 | fst::SymbolTable *word_syms = NULL; 102 | if (word_syms_rxfilename_ != "") 103 | if (!(word_syms = fst::SymbolTable::ReadText(word_syms_rxfilename_))) 104 | KALDI_ERR << "Could not read symbol table from file " 105 | << word_syms_rxfilename_; 106 | 107 | acoustic_scale_ = decodable_opts_.acoustic_scale; 108 | 109 | return true; 110 | } 111 | 112 | void Nnet3LatgenFasterDecoder::InputStarted() 113 | { 114 | adaptation_state_ = new kaldi::OnlineIvectorExtractorAdaptationState(feature_info_->ivector_extractor_info); 115 | 116 | feature_pipeline_ = new kaldi::OnlineNnet2FeaturePipeline (*feature_info_); 117 | feature_pipeline_->SetAdaptationState(*adaptation_state_); 118 | 119 | decoder_ = new kaldi::SingleUtteranceNnet3Decoder(decoder_opts_, 120 | *trans_model_, 121 | *decodable_info_, 122 | *decode_fst_, 123 | feature_pipeline_); 124 | } 125 | 126 | 127 | void Nnet3LatgenFasterDecoder::CleanUp() 128 | { 129 | delete decoder_; 130 | delete adaptation_state_; 131 | delete feature_pipeline_; 132 | 133 | decoder_ = NULL; 134 | adaptation_state_ = NULL; 135 | feature_pipeline_ = NULL; 136 | } 137 | 138 | bool Nnet3LatgenFasterDecoder::AcceptWaveform(kaldi::BaseFloat sampling_rate, 139 | const kaldi::VectorBase &waveform, 140 | const bool do_endpointing) 141 | { 142 | feature_pipeline_->AcceptWaveform(sampling_rate, waveform); 143 | 144 | if (do_endpointing && decoder_->EndpointDetected(endpoint_config_)) { 145 | return false; 146 | } 147 | 148 | decoder_->AdvanceDecoding(); 149 | 150 | return true; 151 | } 152 | 153 | void Nnet3LatgenFasterDecoder::InputFinished() 154 | { 155 | feature_pipeline_->InputFinished(); 156 | decoder_->AdvanceDecoding(); 157 | decoder_->FinalizeDecoding(); 158 | } 159 | 160 | void Nnet3LatgenFasterDecoder::GetLattice(kaldi::CompactLattice *clat, bool end_of_utterance) 161 | { 162 | decoder_->GetLattice(end_of_utterance, clat); 163 | 164 | // In an application you might avoid updating the adaptation state if 165 | // you felt the utterance had low confidence. See lat/confidence.h 166 | feature_pipeline_->GetAdaptationState(adaptation_state_); 167 | 168 | if (acoustic_scale_ != 0) { 169 | ScaleLattice(fst::AcousticLatticeScale(1.0 / acoustic_scale_), clat); 170 | } 171 | } 172 | 173 | } /* namespace apiai */ 174 | -------------------------------------------------------------------------------- /src/Nnet3LatgenFasterDecoder.h: -------------------------------------------------------------------------------- 1 | // Nnet3LatgenFasterDecoder.h 2 | 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 10 | // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED 11 | // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, 12 | // MERCHANTABLITY OR NON-INFRINGEMENT. 13 | // See the Apache 2 License for the specific language governing permissions and 14 | // limitations under the License. 15 | 16 | #ifndef APIAI_DECODER_NNET3LATGENFASTERDECODER_H_ 17 | #define APIAI_DECODER_NNET3LATGENFASTERDECODER_H_ 18 | 19 | #include "OnlineDecoder.h" 20 | #include "online2/online-nnet3-decoding.h" 21 | #include "online2/online-nnet2-feature-pipeline.h" 22 | 23 | namespace apiai { 24 | 25 | class Nnet3LatgenFasterDecoder: public OnlineDecoder { 26 | public: 27 | Nnet3LatgenFasterDecoder(); 28 | virtual ~Nnet3LatgenFasterDecoder(); 29 | 30 | virtual Nnet3LatgenFasterDecoder *Clone() const; 31 | virtual void RegisterOptions(kaldi::OptionsItf &po); 32 | virtual bool Initialize(kaldi::OptionsItf &po); 33 | protected: 34 | virtual bool AcceptWaveform(kaldi::BaseFloat sampling_rate, 35 | const kaldi::VectorBase &waveform, 36 | const bool do_endpointing); 37 | virtual void InputStarted(); 38 | virtual void InputFinished(); 39 | virtual void GetLattice(kaldi::CompactLattice *clat, bool end_of_utterance); 40 | virtual void CleanUp(); 41 | private: 42 | std::string nnet3_rxfilename_; 43 | 44 | bool online_; 45 | kaldi::OnlineEndpointConfig endpoint_config_; 46 | 47 | // feature_config includes configuration for the iVector adaptation, 48 | // as well as the basic features. 49 | kaldi::OnlineNnet2FeaturePipelineConfig feature_config_; 50 | kaldi::nnet3::NnetSimpleLoopedComputationOptions decodable_opts_; 51 | kaldi::LatticeFasterDecoderConfig decoder_opts_; 52 | 53 | kaldi::OnlineNnet2FeaturePipelineInfo *feature_info_; 54 | fst::Fst *decode_fst_; 55 | kaldi::TransitionModel *trans_model_; 56 | kaldi::nnet3::AmNnetSimple *nnet_; 57 | kaldi::nnet3::DecodableNnetSimpleLoopedInfo *decodable_info_; 58 | 59 | kaldi::OnlineIvectorExtractorAdaptationState *adaptation_state_; 60 | kaldi::OnlineNnet2FeaturePipeline *feature_pipeline_; 61 | kaldi::SingleUtteranceNnet3Decoder *decoder_; 62 | }; 63 | 64 | } /* namespace apiai */ 65 | 66 | #endif /* APIAI_DECODER_NNET3LATGENFASTERDECODER_H_ */ 67 | -------------------------------------------------------------------------------- /src/OnlineDecoder.cc: -------------------------------------------------------------------------------- 1 | // OnlineDecoder.cc 2 | 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 10 | // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED 11 | // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, 12 | // MERCHANTABLITY OR NON-INFRINGEMENT. 13 | // See the Apache 2 License for the specific language governing permissions and 14 | // limitations under the License. 15 | 16 | #include "OnlineDecoder.h" 17 | #include "Timing.h" 18 | 19 | namespace apiai { 20 | 21 | #define PAD_SIZE 400 22 | #define AUDIO_DATA_FREQUENCY 16000 23 | kaldi::BaseFloat padVector[PAD_SIZE]; 24 | 25 | struct OnlineDecoder::DecodedData { 26 | kaldi::LatticeWeight weight; 27 | std::vector words; 28 | std::vector alignment; 29 | std::vector weights; 30 | }; 31 | 32 | bool wordsEquals(std::vector &a, std::vector &b) { 33 | return (a.size() == b.size()) && (std::equal(a.begin(), a.end(), b.begin())); 34 | } 35 | 36 | bool getWeightMeasures(const kaldi::Lattice &fst, 37 | std::vector *weights_out) { 38 | typedef kaldi::LatticeArc::Label Label; 39 | typedef kaldi::LatticeArc::StateId StateId; 40 | typedef kaldi::LatticeArc::Weight Weight; 41 | 42 | std::vector weights; 43 | 44 | StateId cur_state = fst.Start(); 45 | if (cur_state == fst::kNoStateId) { // empty sequence. 46 | if (weights_out != NULL) weights_out->clear(); 47 | return true; 48 | } 49 | while (1) { 50 | Weight w = fst.Final(cur_state); 51 | if (w != Weight::Zero()) { // is final.. 52 | 53 | if (w.Value1() != 0 || w.Value2() != 0) { 54 | weights.push_back(w); 55 | } 56 | if (fst.NumArcs(cur_state) != 0) return false; 57 | if (weights_out != NULL) *weights_out = weights; 58 | return true; 59 | } else { 60 | if (fst.NumArcs(cur_state) != 1) return false; 61 | 62 | fst::ArcIterator > iter(fst, cur_state); // get the only arc. 63 | const kaldi::LatticeArc &arc = iter.Value(); 64 | if (arc.weight.Value1() != 0 || arc.weight.Value2() != 0) { 65 | weights.push_back(arc.weight); 66 | } 67 | cur_state = arc.nextstate; 68 | } 69 | } 70 | } 71 | 72 | 73 | OnlineDecoder::OnlineDecoder() { 74 | lm_scale_ = 10; 75 | chunk_length_secs_ = 0.18; 76 | max_record_size_seconds_ = 0; 77 | max_lattice_unchanged_interval_seconds_ = 0; 78 | decoding_timeout_seconds_ = 0; 79 | 80 | word_syms_rxfilename_ = "words.txt"; 81 | fst_rxfilename_ = "HCLG.fst"; 82 | } 83 | 84 | OnlineDecoder::~OnlineDecoder() { 85 | } 86 | 87 | 88 | void OnlineDecoder::GetRecognitionResult(DecodedData &input, RecognitionResult *output) { 89 | // TODO move parameters to external file 90 | output->confidence = std::max(0.0, std::min(1.0, -0.0001466488 * (2.388449*float(input.weight.Value1()) + float(input.weight.Value2())) / (input.words.size() + 1) + 0.956)); 91 | 92 | std::ostringstream outss; 93 | 94 | for (size_t i = 0; i < input.words.size(); i++) { 95 | if (i) { 96 | outss << " "; 97 | } 98 | std::string s = word_syms_->Find(input.words[i]); 99 | if (s == "") { 100 | KALDI_WARN << "Word-id " << input.words[i] <<" not in symbol table."; 101 | } else { 102 | outss << s; 103 | } 104 | } 105 | output->text = outss.str(); 106 | } 107 | 108 | void OnlineDecoder::GetRecognitionResult(std::vector &input, std::vector *output) { 109 | for (int i = 0; i < input.size(); i++) { 110 | RecognitionResult result; 111 | GetRecognitionResult(input.at(i), &result); 112 | output->push_back(result); 113 | } 114 | } 115 | 116 | void OnlineDecoder::RegisterOptions(kaldi::OptionsItf &po) { 117 | po.Register("chunk-length", &chunk_length_secs_, 118 | "Length of chunk size in seconds, that we process."); 119 | po.Register("word-symbol-table", &word_syms_rxfilename_, 120 | "Symbol table for words [for debug output]"); 121 | po.Register("fst-in", &fst_rxfilename_, "Path to FST model file"); 122 | po.Register("lm-scale", &lm_scale_, "Scaling factor for LM probabilities. " 123 | "Note: the ratio acoustic-scale/lm-scale is all that matters."); 124 | 125 | po.Register("max-record-length", &max_record_size_seconds_, 126 | "Max length of record in seconds to be recognised. " 127 | "All records longer than given value will be truncated. Note: Non-positive value to deactivate."); 128 | 129 | po.Register("max-lattice-unchanged-interval", &max_lattice_unchanged_interval_seconds_, 130 | "Max interval length in seconds of lattice recognised unchanged. Note: Non-positive value to deactivate."); 131 | 132 | po.Register("decoding-timeout", &decoding_timeout_seconds_, 133 | "Decoding process timeout given in seconds. Timeout disabled if value is non-positive."); 134 | } 135 | 136 | bool OnlineDecoder::Initialize(kaldi::OptionsItf &po) { 137 | word_syms_ = NULL; 138 | if (word_syms_rxfilename_ == "") { 139 | return false; 140 | } 141 | if (!(word_syms_ = fst::SymbolTable::ReadText(word_syms_rxfilename_))) { 142 | KALDI_ERR << "Could not read symbol table from file " 143 | << word_syms_rxfilename_; 144 | } 145 | return true; 146 | } 147 | 148 | void OnlineDecoder::Decode(Request &request, Response &response) { 149 | try { 150 | KALDI_ASSERT(request.Frequency() == AUDIO_DATA_FREQUENCY); 151 | milliseconds_t start_time = getMilliseconds(); 152 | milliseconds_t progress_time = 0; 153 | 154 | KALDI_VLOG(1) << "Started @ " << start_time << " ms"; 155 | InputStarted(); 156 | 157 | int intermediate_counter = 1; 158 | int intermediate_samples_interval = request.IntermediateIntervalMillisec() > 0 ? request.IntermediateIntervalMillisec() * (request.Frequency() / 1000) : 0; 159 | int max_samples_limit = max_record_size_seconds_ > 0 ? max_record_size_seconds_ * request.Frequency() : 0; 160 | 161 | std::vector prev_words; 162 | int samples_per_chunk = int(chunk_length_secs_ * request.Frequency()); 163 | 164 | int samp_counter = 0; 165 | 166 | kaldi::SubVector *wave_part; 167 | 168 | bool do_endpointing = request.DoEndpointing(); 169 | std::string requestInterrupted = Response::NOT_INTERRUPTED; 170 | int samples_left = (max_samples_limit > 0) ? std::min(max_samples_limit, samples_per_chunk) : samples_per_chunk; 171 | const bool decoding_timeout_enabled = decoding_timeout_seconds_ > 0; 172 | const int decoding_timeout_ms = decoding_timeout_enabled ? decoding_timeout_seconds_ * 1000 : 0; 173 | 174 | int time_left_ms = decoding_timeout_ms; 175 | while ((wave_part = request.NextChunk(samples_left, time_left_ms)) != NULL) { 176 | 177 | samp_counter += wave_part->Dim(); 178 | 179 | if (AcceptWaveform(request.Frequency(), *wave_part, do_endpointing) == false && do_endpointing) { 180 | requestInterrupted = Response::INTERRUPTED_END_OF_SPEECH; 181 | KALDI_VLOG(1) << "End Point Detected @ " << (getMillisecondsSince(start_time)) << " ms"; 182 | break; 183 | } 184 | progress_time = getMillisecondsSince(start_time); 185 | 186 | if (max_samples_limit > 0) { 187 | if (samp_counter > max_samples_limit) { 188 | requestInterrupted = Response::INTERRUPTED_DATA_SIZE_LIMIT; 189 | KALDI_VLOG(1) << "Interrupted by record length @ " << progress_time << " ms"; 190 | break; 191 | } 192 | samples_left = std::min(max_samples_limit - samp_counter, samples_per_chunk); 193 | } 194 | 195 | if ((intermediate_samples_interval > 0) && (samp_counter > (intermediate_samples_interval * intermediate_counter))) { 196 | intermediate_counter++; 197 | std::vector decodeData; 198 | if (DecodeIntermediate(1, &decodeData) > 0) { 199 | DecodedData &data = decodeData.at(0); 200 | if (!wordsEquals(prev_words, data.words)) { 201 | RecognitionResult recognitionResult; 202 | GetRecognitionResult(data, &recognitionResult); 203 | response.SetIntermediateResult(recognitionResult, (samp_counter / (request.Frequency() / 1000))); 204 | prev_words = data.words; 205 | } 206 | } else { 207 | prev_words.clear(); 208 | } 209 | } 210 | if (decoding_timeout_enabled) { 211 | time_left_ms = decoding_timeout_ms - getMillisecondsSince(start_time); 212 | if (time_left_ms <= 0) { 213 | break; 214 | } 215 | } 216 | } 217 | if (wave_part != NULL && requestInterrupted.size() == 0) { 218 | if (decoding_timeout_enabled && (decoding_timeout_ms - getMillisecondsSince(start_time) <= 0)) { 219 | KALDI_VLOG(1) << "Timeout reached @ " << (getMillisecondsSince(start_time)) << " ms"; 220 | requestInterrupted = Response::INTERRUPTED_TIMEOUT; 221 | } else { 222 | requestInterrupted = Response::INTERRUPTED_UNEXPECTED; 223 | } 224 | } 225 | 226 | if (samp_counter == 0) { 227 | throw std::runtime_error("Got no data"); 228 | } 229 | 230 | if (samp_counter < PAD_SIZE) { 231 | KALDI_VLOG(1) << "Input too short, padding with " << (PAD_SIZE - samp_counter) << " zero samples"; 232 | kaldi::SubVector padding(padVector, PAD_SIZE - samp_counter); 233 | AcceptWaveform(request.Frequency(), padding, false); 234 | } 235 | 236 | KALDI_VLOG(1) << "Input finished @ " << getMillisecondsSince(start_time) << " ms (audio length: " << (samp_counter / (request.Frequency() / 1000)) << " ms)"; 237 | InputFinished(); 238 | 239 | std::vector result; 240 | 241 | int32 decoded = Decode(true, request.BestCount(), &result); 242 | 243 | if (decoded == 0) { 244 | response.SetError("Best-path failed"); 245 | KALDI_WARN << "Best-path failed"; 246 | } else { 247 | std::vector recognitionResults; 248 | GetRecognitionResult(result, &recognitionResults); 249 | response.SetResult(recognitionResults, requestInterrupted, (samp_counter / (request.Frequency() / 1000))); 250 | KALDI_VLOG(1) << "Recognized @ " << getMillisecondsSince(start_time) << " ms"; 251 | } 252 | 253 | CleanUp(); 254 | 255 | KALDI_VLOG(1) << "Decode subroutine done"; 256 | } catch (std::runtime_error &e) { 257 | response.SetError(e.what()); 258 | } 259 | }; 260 | 261 | int32 OnlineDecoder::DecodeIntermediate(int bestCount, std::vector *result) { 262 | return Decode(false, bestCount, result); 263 | } 264 | 265 | int32 OnlineDecoder::Decode(bool end_of_utterance, int bestCount, std::vector *result) { 266 | kaldi::CompactLattice clat; 267 | GetLattice(&clat, end_of_utterance); 268 | 269 | if (clat.NumStates() == 0) { 270 | return 0; 271 | } 272 | 273 | if (lm_scale_ != 0) { 274 | fst::ScaleLattice(fst::LatticeScale(lm_scale_, 1.0), &clat); 275 | } 276 | 277 | int32 resultsNumber = 0; 278 | 279 | if (bestCount > 1) { 280 | kaldi::Lattice _lat; 281 | fst::ConvertLattice(clat, &_lat); 282 | kaldi::Lattice nbest_lat; 283 | fst::ShortestPath(_lat, &nbest_lat, bestCount); 284 | std::vector nbest_lats; 285 | fst::ConvertNbestToVector(nbest_lat, &nbest_lats); 286 | if (!nbest_lats.empty()) { 287 | resultsNumber = static_cast(nbest_lats.size()); 288 | for (int32 k = 0; k < resultsNumber; k++) { 289 | kaldi::Lattice &nbest_lat = nbest_lats[k]; 290 | 291 | DecodedData decodeData; 292 | GetLinearSymbolSequence(nbest_lat, &(decodeData.alignment), &(decodeData.words), &(decodeData.weight)); 293 | getWeightMeasures(nbest_lat, &(decodeData.weights)); 294 | result->push_back(decodeData); 295 | } 296 | } 297 | } else { 298 | kaldi::CompactLattice best_path_clat; 299 | kaldi::CompactLatticeShortestPath(clat, &best_path_clat); 300 | 301 | kaldi::Lattice best_path_lat; 302 | fst::ConvertLattice(best_path_clat, &best_path_lat); 303 | DecodedData decodeData; 304 | GetLinearSymbolSequence(best_path_lat, &(decodeData.alignment), &(decodeData.words), &(decodeData.weight)); 305 | getWeightMeasures(best_path_lat, &(decodeData.weights)); 306 | result->push_back(decodeData); 307 | resultsNumber = 1; 308 | } 309 | 310 | return resultsNumber; 311 | } 312 | 313 | } /* namespace apiai */ 314 | -------------------------------------------------------------------------------- /src/OnlineDecoder.h: -------------------------------------------------------------------------------- 1 | // OnlineDecoder.h 2 | 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 10 | // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED 11 | // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, 12 | // MERCHANTABLITY OR NON-INFRINGEMENT. 13 | // See the Apache 2 License for the specific language governing permissions and 14 | // limitations under the License. 15 | 16 | #ifndef APIAI_DECODER_ONLINEDECODER_H_ 17 | #define APIAI_DECODER_ONLINEDECODER_H_ 18 | 19 | #include "Decoder.h" 20 | #include "online2/online-feature-pipeline.h" 21 | #include "online2/onlinebin-util.h" 22 | #include "online2/online-timing.h" 23 | #include "online2/online-endpoint.h" 24 | #include "fstext/fstext-lib.h" 25 | #include "lat/lattice-functions.h" 26 | #include 27 | 28 | namespace apiai { 29 | 30 | /** 31 | * Basic implementation of common code for all Kaldi online decoders 32 | */ 33 | class OnlineDecoder : public Decoder { 34 | public: 35 | OnlineDecoder(); 36 | virtual ~OnlineDecoder(); 37 | 38 | virtual void RegisterOptions(kaldi::OptionsItf &po); 39 | virtual bool Initialize(kaldi::OptionsItf &po); 40 | virtual void Decode(Request &request, Response &response); 41 | protected: 42 | struct DecodedData; 43 | 44 | /** 45 | * Process next data chunk 46 | */ 47 | virtual bool AcceptWaveform(kaldi::BaseFloat sampling_rate, 48 | const kaldi::VectorBase &waveform, 49 | const bool do_endpointing) = 0; 50 | 51 | /** 52 | * Preparare to decoding 53 | */ 54 | virtual void InputStarted() = 0; 55 | /** 56 | * Decoding finished, gets ready to get results 57 | */ 58 | virtual void InputFinished() = 0; 59 | /** 60 | * Put result lattice 61 | */ 62 | virtual void GetLattice(kaldi::CompactLattice *clat, bool end_of_utterance) = 0; 63 | /** 64 | * Clean all data 65 | */ 66 | virtual void CleanUp() = 0; 67 | /** 68 | * Calculate intermediate results 69 | */ 70 | virtual kaldi::int32 DecodeIntermediate(int bestCount, std::vector *result); 71 | 72 | std::string word_syms_rxfilename_; 73 | kaldi::BaseFloat chunk_length_secs_; 74 | kaldi::BaseFloat acoustic_scale_; 75 | kaldi::BaseFloat lm_scale_; 76 | 77 | 78 | /** 79 | * Max length of record in seconds to be recognised. 80 | * All records longer than given value will be truncated. Note: Non-positive value to deactivate. 81 | */ 82 | kaldi::BaseFloat max_record_size_seconds_; 83 | /** 84 | * Max interval length in seconds of lattice recognised unchanged. Non-positive value to deactivate 85 | */ 86 | kaldi::BaseFloat max_lattice_unchanged_interval_seconds_; 87 | 88 | /** Decoding process timeout given in seconds. 89 | * Timeout disabled if value is non-positive 90 | */ 91 | kaldi::BaseFloat decoding_timeout_seconds_; 92 | 93 | bool do_endpointing_; 94 | 95 | std::string fst_rxfilename_; 96 | private: 97 | fst::SymbolTable *word_syms_; 98 | 99 | kaldi::int32 Decode(bool end_of_utterance, int bestCount, std::vector *result); 100 | 101 | void GetRecognitionResult(DecodedData &input, RecognitionResult *output); 102 | void GetRecognitionResult(std::vector &input, std::vector *output); 103 | }; 104 | 105 | } /* namespace apiai */ 106 | 107 | #endif /* APIAI_DECODER_ONLINEDECODER_H_ */ 108 | -------------------------------------------------------------------------------- /src/QueryStringParser.cc: -------------------------------------------------------------------------------- 1 | // QueryStringParser.cc 2 | 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 10 | // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED 11 | // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, 12 | // MERCHANTABLITY OR NON-INFRINGEMENT. 13 | // See the Apache 2 License for the specific language governing permissions and 14 | // limitations under the License. 15 | 16 | #include "QueryStringParser.h" 17 | 18 | namespace apiai { 19 | 20 | enum ParseState { 21 | NameRead, ValueRead, Done 22 | }; 23 | 24 | QueryStringParser::QueryStringParser(const char *query) { 25 | query_.assign(query); 26 | Init(); 27 | } 28 | 29 | QueryStringParser::QueryStringParser(const std::string &query) { 30 | query_ = query; 31 | Init(); 32 | } 33 | 34 | QueryStringParser::~QueryStringParser() { 35 | 36 | } 37 | 38 | void QueryStringParser::Init() { 39 | 40 | 41 | if (query_.size() == 0 || query_ == "?") { 42 | has_next_ = false; 43 | return; 44 | } 45 | 46 | std::string::iterator index = query_.begin(); 47 | 48 | if ((*index) == '?') { 49 | ++index; 50 | } 51 | 52 | has_next_ = SeekNext(index); 53 | } 54 | 55 | bool QueryStringParser::SeekNext(std::string::iterator &from) { 56 | std::string::iterator index = from; 57 | 58 | ParseState state = NameRead; 59 | name_begin_ = index; 60 | value_begin_ = value_end_ = query_.end(); 61 | for (;(state != Done) && (index < query_.end()); index++) { 62 | switch (*index) { 63 | case '=': 64 | switch (state) { 65 | case NameRead: 66 | state = ValueRead; 67 | name_end_ = index; 68 | value_begin_ = value_end_ = index + 1; 69 | break; 70 | case ValueRead: 71 | // Do nothing 72 | break; 73 | case Done: 74 | // Do nothing 75 | break; 76 | } 77 | 78 | break; 79 | case '&': 80 | switch (state) { 81 | case NameRead: 82 | name_end_ = index; 83 | break; 84 | case ValueRead: 85 | value_end_ = index; 86 | break; 87 | case Done: 88 | // Do nothing 89 | break; 90 | } 91 | state = Done; 92 | break; 93 | default: 94 | break; 95 | }; 96 | } 97 | 98 | switch (state) { 99 | case NameRead: 100 | name_end_ = index;; 101 | break; 102 | case ValueRead: 103 | value_end_ = index; 104 | break; 105 | case Done: 106 | // Do nothing 107 | break; 108 | } 109 | 110 | bool result = index != from; 111 | 112 | return result; 113 | } 114 | 115 | bool QueryStringParser::Next(std::string *name, std::string *value) { 116 | if (!has_next_) { 117 | return false; 118 | } 119 | 120 | name->assign(name_begin_, name_end_); 121 | value->assign(value_begin_, value_end_); 122 | 123 | std::string::iterator index = value_end_; 124 | ++index; 125 | has_next_ = SeekNext(index); 126 | return true; 127 | } 128 | 129 | } /* namespace apiai */ 130 | -------------------------------------------------------------------------------- /src/QueryStringParser.h: -------------------------------------------------------------------------------- 1 | // QueryStringParser.h 2 | 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 10 | // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED 11 | // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, 12 | // MERCHANTABLITY OR NON-INFRINGEMENT. 13 | // See the Apache 2 License for the specific language governing permissions and 14 | // limitations under the License. 15 | 16 | #ifndef SRC_QUERYSTRINGPARSER_H_ 17 | #define SRC_QUERYSTRINGPARSER_H_ 18 | 19 | #include 20 | 21 | namespace apiai { 22 | 23 | /** 24 | * Query string parser. 25 | * Iterates through name-value pairs of standard URI query string 26 | */ 27 | class QueryStringParser { 28 | public: 29 | /** Initialize parse with given query string */ 30 | QueryStringParser(const char *query); 31 | /** Initialize parse with given query string */ 32 | QueryStringParser(const std::string &query); 33 | virtual ~QueryStringParser(); 34 | 35 | /** Returns true if there is more unhandled name-value pairs */ 36 | bool HasNext() const { return has_next_; } 37 | /** 38 | * Get next name-value pair. 39 | * Returns false if there is no more pairs 40 | */ 41 | bool Next(std::string *name, std::string *value); 42 | private: 43 | void Init(); 44 | bool SeekNext(std::string::iterator &from); 45 | 46 | std::string query_; 47 | bool has_next_; 48 | 49 | std::string::iterator name_begin_; 50 | std::string::iterator name_end_; 51 | std::string::iterator value_begin_; 52 | std::string::iterator value_end_; 53 | }; 54 | 55 | } /* namespace apiai */ 56 | 57 | #endif /* SRC_QUERYSTRINGPARSER_H_ */ 58 | -------------------------------------------------------------------------------- /src/QueryStringParserTests.cc: -------------------------------------------------------------------------------- 1 | /* 2 | * QueryStringParser.cpp 3 | * 4 | * Created on: Apr 6, 2016 5 | * Author: folomeev 6 | */ 7 | 8 | #include "QueryStringParser.h" 9 | #include "base/kaldi-error.h" 10 | 11 | namespace apiai { 12 | 13 | void TestEmptyString() { 14 | std::string name, value; 15 | QueryStringParser parser(""); 16 | 17 | KALDI_ASSERT(!parser.HasNext()); 18 | KALDI_ASSERT(!parser.Next(&name, &value)); 19 | } 20 | 21 | void TestQuestionMarkString() { 22 | std::string name, value; 23 | QueryStringParser parser("?"); 24 | 25 | KALDI_ASSERT(!parser.HasNext()); 26 | KALDI_ASSERT(!parser.Next(&name, &value)); 27 | } 28 | 29 | void TestSingleNameValue() { 30 | std::string name, value; 31 | QueryStringParser parser("?name=value"); 32 | 33 | KALDI_ASSERT(parser.HasNext()); 34 | KALDI_ASSERT(parser.Next(&name, &value)); 35 | KALDI_ASSERT(name == "name"); 36 | KALDI_ASSERT(value == "value"); 37 | KALDI_ASSERT(!parser.HasNext()); 38 | KALDI_ASSERT(!parser.Next(&name, &value)); 39 | } 40 | 41 | void TestSingleNameNoValue() { 42 | std::string name, value; 43 | QueryStringParser parser("?name="); 44 | 45 | KALDI_ASSERT(parser.HasNext()); 46 | KALDI_ASSERT(parser.Next(&name, &value)); 47 | KALDI_ASSERT(name == "name"); 48 | KALDI_ASSERT(value == ""); 49 | KALDI_ASSERT(!parser.HasNext()); 50 | KALDI_ASSERT(!parser.Next(&name, &value)); 51 | } 52 | 53 | void TestSingleNoNameValue() { 54 | std::string name, value; 55 | QueryStringParser parser("?=value"); 56 | 57 | KALDI_ASSERT(parser.HasNext()); 58 | KALDI_ASSERT(parser.Next(&name, &value)); 59 | KALDI_ASSERT(name == ""); 60 | KALDI_ASSERT(value == "value"); 61 | KALDI_ASSERT(!parser.HasNext()); 62 | KALDI_ASSERT(!parser.Next(&name, &value)); 63 | } 64 | 65 | void TestEndsWithAmpersand() { 66 | std::string name, value; 67 | QueryStringParser parser("?&"); 68 | 69 | KALDI_ASSERT(parser.HasNext()); 70 | KALDI_ASSERT(parser.Next(&name, &value)); 71 | KALDI_ASSERT(name == ""); 72 | KALDI_ASSERT(value == ""); 73 | KALDI_ASSERT(!parser.HasNext()); 74 | KALDI_ASSERT(!parser.Next(&name, &value)); 75 | } 76 | 77 | void TestEquatationInValue() { 78 | std::string name, value; 79 | QueryStringParser parser("?name=v=u"); 80 | 81 | KALDI_ASSERT(parser.HasNext()); 82 | KALDI_ASSERT(parser.Next(&name, &value)); 83 | KALDI_ASSERT(name == "name"); 84 | KALDI_ASSERT(value == "v=u"); 85 | KALDI_ASSERT(!parser.HasNext()); 86 | KALDI_ASSERT(!parser.Next(&name, &value)); 87 | } 88 | 89 | 90 | void TestSingleNoNameNoValue() { 91 | std::string name, value; 92 | QueryStringParser parser("?="); 93 | 94 | KALDI_ASSERT(parser.HasNext()); 95 | KALDI_ASSERT(parser.Next(&name, &value)); 96 | KALDI_ASSERT(name == ""); 97 | KALDI_ASSERT(value == ""); 98 | KALDI_ASSERT(!parser.HasNext()); 99 | KALDI_ASSERT(!parser.Next(&name, &value)); 100 | } 101 | 102 | void TestTwoNameValuePairs() { 103 | std::string name, value; 104 | QueryStringParser parser("?name1=value1&name2=value2"); 105 | 106 | KALDI_ASSERT(parser.HasNext()); 107 | KALDI_ASSERT(parser.Next(&name, &value)); 108 | KALDI_ASSERT(name == "name1"); 109 | KALDI_ASSERT(value == "value1"); 110 | KALDI_ASSERT(parser.HasNext()); 111 | KALDI_ASSERT(parser.Next(&name, &value)); 112 | KALDI_ASSERT(name == "name2"); 113 | KALDI_ASSERT(value == "value2"); 114 | KALDI_ASSERT(!parser.HasNext()); 115 | KALDI_ASSERT(!parser.Next(&name, &value)); 116 | } 117 | 118 | void TestEmptyNameAfterEquality() { 119 | std::string name, value; 120 | QueryStringParser parser("?name1=value1&=value2"); 121 | 122 | KALDI_ASSERT(parser.HasNext()); 123 | KALDI_ASSERT(parser.Next(&name, &value)); 124 | KALDI_ASSERT(name == "name1"); 125 | KALDI_ASSERT(value == "value1"); 126 | KALDI_ASSERT(parser.HasNext()); 127 | KALDI_ASSERT(parser.Next(&name, &value)); 128 | KALDI_ASSERT(name == ""); 129 | KALDI_ASSERT(value == "value2"); 130 | KALDI_ASSERT(!parser.HasNext()); 131 | KALDI_ASSERT(!parser.Next(&name, &value)); 132 | } 133 | 134 | 135 | } /* namespace apiai */ 136 | 137 | 138 | 139 | int main(int argn, char *argv[]) { 140 | using namespace apiai; 141 | 142 | TestEmptyString(); 143 | TestQuestionMarkString(); 144 | TestEndsWithAmpersand(); 145 | TestEquatationInValue(); 146 | TestSingleNameValue(); 147 | TestSingleNameNoValue(); 148 | TestSingleNoNameValue(); 149 | TestSingleNoNameNoValue(); 150 | TestTwoNameValuePairs(); 151 | TestEmptyNameAfterEquality(); 152 | return 0; 153 | } 154 | 155 | -------------------------------------------------------------------------------- /src/Request.h: -------------------------------------------------------------------------------- 1 | // Request.h 2 | 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 10 | // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED 11 | // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, 12 | // MERCHANTABLITY OR NON-INFRINGEMENT. 13 | // See the Apache 2 License for the specific language governing permissions and 14 | // limitations under the License. 15 | 16 | #ifndef SRC_REQUEST_H_ 17 | #define SRC_REQUEST_H_ 18 | 19 | #include "base/kaldi-types.h" 20 | #include "matrix/kaldi-vector.h" 21 | 22 | namespace apiai { 23 | 24 | /** 25 | * Request data holding interface 26 | */ 27 | class Request { 28 | public: 29 | virtual ~Request() {}; 30 | 31 | /** Get number of samples per second of audio data */ 32 | virtual kaldi::int32 Frequency(void) const = 0; 33 | 34 | /** Get max number of expected result variants */ 35 | virtual kaldi::int32 BestCount(void) const = 0; 36 | /** Get milliseconds interval between intermediate results. 37 | * If non-positive given then no intermediate results would be calculated */ 38 | virtual kaldi::int32 IntermediateIntervalMillisec(void) const = 0; 39 | 40 | /** Get end-of-speech points detection flag. */ 41 | virtual bool DoEndpointing(void) const = 0; 42 | 43 | /** 44 | * Get next chunk of audio data samples. 45 | * Max number of samples specified by samples_count value 46 | */ 47 | virtual kaldi::SubVector *NextChunk(kaldi::int32 samples_count) = 0; 48 | /** 49 | * Get next chunk of audio data samples. 50 | * Max number of samples specified by samples_count value. 51 | * Read timeout specified by timeout_ms. 52 | */ 53 | virtual kaldi::SubVector *NextChunk(kaldi::int32 samples_count, kaldi::int32 timeout_ms) = 0; 54 | }; 55 | 56 | } /* namespace apiai */ 57 | 58 | #endif /* SRC_REQUEST_H_ */ 59 | -------------------------------------------------------------------------------- /src/RequestRawReader.cc: -------------------------------------------------------------------------------- 1 | // RequestRawReader.cc 2 | 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 10 | // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED 11 | // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, 12 | // MERCHANTABLITY OR NON-INFRINGEMENT. 13 | // See the Apache 2 License for the specific language governing permissions and 14 | // limitations under the License. 15 | 16 | #include "RequestRawReader.h" 17 | 18 | #include "Timing.h" 19 | #include 20 | 21 | namespace apiai { 22 | 23 | const milliseconds_t data_wait_interval_ms = 500; 24 | 25 | kaldi::SubVector *RequestRawReader::NextChunk(kaldi::int32 samples_count) 26 | { 27 | return NextChunk(samples_count, 0); 28 | } 29 | 30 | kaldi::SubVector *RequestRawReader::NextChunk(kaldi::int32 samples_count, kaldi::int32 timeout_ms) { 31 | // TODO: timeout_ms is not supported because libfcgi do not provides "readsome" functionality 32 | if (samples_count <= 0) { 33 | return NULL; 34 | } 35 | 36 | if (fail_) { 37 | return NULL; 38 | } 39 | 40 | int frame_size = bytes_per_sample_ * channels_; 41 | kaldi::int32 chunk_size = samples_count * frame_size; 42 | 43 | int offset = channel_index_ * bytes_per_sample_; 44 | 45 | std::vector audioData(chunk_size); 46 | 47 | int bytes_read = 0; 48 | 49 | is_->read(audioData.data(), chunk_size); 50 | 51 | bytes_read = is_->gcount(); 52 | 53 | if (is_->gcount() == 0) { 54 | fail_ = true; 55 | last_error_message_ == "Failed to read any data"; 56 | return NULL; 57 | } 58 | 59 | buffer_.clear(); 60 | for (int index = 0; index < bytes_read; index += frame_size) { 61 | kaldi::int16 value = *reinterpret_cast(audioData.data() + index + offset); 62 | kaldi::BaseFloat fvalue = kaldi::BaseFloat(value); 63 | buffer_.push_back(fvalue); 64 | } 65 | 66 | if (current_chunk_ && (current_chunk_->Dim() != buffer_.size())) { 67 | delete current_chunk_; 68 | current_chunk_ = NULL; 69 | } 70 | 71 | if (!current_chunk_) { 72 | current_chunk_ = new kaldi::SubVector(buffer_.data(), buffer_.size()); 73 | } else { 74 | KALDI_ASSERT(buffer_.size() == current_chunk_->Dim()); 75 | std::copy(buffer_.begin(), buffer_.end(), current_chunk_->Data()); 76 | } 77 | 78 | return current_chunk_; 79 | } 80 | 81 | } /* namespace apiai */ 82 | -------------------------------------------------------------------------------- /src/RequestRawReader.h: -------------------------------------------------------------------------------- 1 | // RequestRawReader.h 2 | 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 10 | // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED 11 | // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, 12 | // MERCHANTABLITY OR NON-INFRINGEMENT. 13 | // See the Apache 2 License for the specific language governing permissions and 14 | // limitations under the License. 15 | 16 | #ifndef APIAI_DECODER_STIREQUESTREADER_H_ 17 | #define APIAI_DECODER_STIREQUESTREADER_H_ 18 | 19 | #include "Request.h" 20 | #include 21 | #include 22 | 23 | #define NBEST_MIN 1 24 | #define NBEST_MAX 10 25 | 26 | #define INTERMEDIATE_MIN 500 27 | 28 | namespace apiai { 29 | 30 | /** 31 | * Provides access to PCM data from input stream. 32 | * Assumed that PCM is signed mono, 16 bits, 16 KHz 33 | */ 34 | class RequestRawReader : public Request { 35 | public: 36 | RequestRawReader(std::istream *is) 37 | { 38 | fail_ = false; 39 | current_chunk_ = NULL; 40 | 41 | is_ = is; 42 | frequency_ = 16000; 43 | bytes_per_sample_ = 16 / 8; 44 | channels_ = 1; 45 | channel_index_ = 0; 46 | 47 | bestCount_ = 1; 48 | intermediateMillisecondsInterval_ = 0; 49 | doEndpointing_ = false; 50 | } 51 | 52 | virtual ~RequestRawReader() { 53 | delete current_chunk_; 54 | } 55 | 56 | virtual kaldi::int32 Frequency(void) const { return frequency_; } 57 | 58 | /** Get errors flag */ 59 | bool HasErrors(void) { return fail_ || is_->fail(); } 60 | /** Get last error message */ 61 | const std::string &LastErrorMessage(void) const { return last_error_message_; } 62 | 63 | virtual kaldi::int32 BestCount(void) const { return bestCount_; } 64 | virtual kaldi::int32 IntermediateIntervalMillisec(void) const { return intermediateMillisecondsInterval_; } 65 | virtual bool DoEndpointing(void) const { return doEndpointing_; } 66 | 67 | /** Set number of suggested recognition result variants */ 68 | void BestCount(kaldi::int32 value) { bestCount_ = std::max(NBEST_MIN, std::min(NBEST_MAX, value)); } 69 | /** Set intermediate results interval in milliseconds */ 70 | void IntermediateIntervalMillisec(kaldi::int32 value) { 71 | intermediateMillisecondsInterval_ = value > 0 ? std::max(value, INTERMEDIATE_MIN) : 0; 72 | } 73 | /** Set end-of-speech points detection flag. */ 74 | void DoEndpointing(bool value) { doEndpointing_ = value; } 75 | 76 | virtual kaldi::SubVector *NextChunk(kaldi::int32 samples_count); 77 | virtual kaldi::SubVector *NextChunk(kaldi::int32 samples_count, kaldi::int32 timeout_ms); 78 | private: 79 | bool fail_; 80 | kaldi::int32 frequency_; 81 | kaldi::int32 bytes_per_sample_; 82 | kaldi::int32 channels_; 83 | kaldi::int32 channel_index_; 84 | 85 | kaldi::int32 bestCount_; 86 | kaldi::int32 intermediateMillisecondsInterval_; 87 | bool doEndpointing_; 88 | 89 | std::istream *is_; 90 | std::vector buffer_; 91 | std::string last_error_message_; 92 | kaldi::SubVector *current_chunk_; 93 | }; 94 | 95 | } /* namespace apiai */ 96 | 97 | #endif /* APIAI_DECODER_STIREQUESTREADER_H_ */ 98 | -------------------------------------------------------------------------------- /src/Response.cc: -------------------------------------------------------------------------------- 1 | // Response.h 2 | 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 10 | // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED 11 | // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, 12 | // MERCHANTABLITY OR NON-INFRINGEMENT. 13 | // See the Apache 2 License for the specific language governing permissions and 14 | // limitations under the License. 15 | 16 | #include "Response.h" 17 | 18 | namespace apiai { 19 | 20 | const std::string Response::NOT_INTERRUPTED = ""; 21 | const std::string Response::INTERRUPTED_UNEXPECTED="unexpected"; 22 | const std::string Response::INTERRUPTED_END_OF_SPEECH="endofspeech"; 23 | const std::string Response::INTERRUPTED_DATA_SIZE_LIMIT="sizelimit"; 24 | const std::string Response::INTERRUPTED_TIMEOUT="timeout"; 25 | 26 | } /* namespace apiai */ 27 | 28 | -------------------------------------------------------------------------------- /src/Response.h: -------------------------------------------------------------------------------- 1 | // Response.h 2 | 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 10 | // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED 11 | // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, 12 | // MERCHANTABLITY OR NON-INFRINGEMENT. 13 | // See the Apache 2 License for the specific language governing permissions and 14 | // limitations under the License. 15 | 16 | #ifndef RESPONSE_H_ 17 | #define RESPONSE_H_ 18 | 19 | #include 20 | #include 21 | 22 | namespace apiai { 23 | 24 | /** 25 | * Recognition results holder 26 | */ 27 | struct RecognitionResult { 28 | /** 29 | * Confidence value given in percents 30 | */ 31 | float confidence; 32 | /** 33 | * Recognition result text 34 | */ 35 | std::string text; 36 | }; 37 | 38 | /** 39 | * Interface for recognition data collector 40 | */ 41 | class Response { 42 | public: 43 | virtual ~Response() {}; 44 | 45 | /** Get content type MIME string */ 46 | virtual const std::string &GetContentType() = 0; 47 | 48 | /** Set final results */ 49 | virtual void SetResult(std::vector &data, int timeMarkMs) = 0; 50 | /** Set final result. 51 | * Value of interrupted flag is set to true if recognition process was interrupted before 52 | * all given data been read. 53 | */ 54 | virtual void SetResult(std::vector &data, const std::string &interrupted, int timeMarkMs) = 0; 55 | /** Set intermediate result */ 56 | virtual void SetIntermediateResult(RecognitionResult &decodedData, int timeMarkMs) = 0; 57 | /** Set error value */ 58 | virtual void SetError(const std::string &message) = 0; 59 | 60 | static const std::string NOT_INTERRUPTED; 61 | static const std::string INTERRUPTED_UNEXPECTED; 62 | static const std::string INTERRUPTED_END_OF_SPEECH; 63 | static const std::string INTERRUPTED_DATA_SIZE_LIMIT; 64 | static const std::string INTERRUPTED_TIMEOUT; 65 | }; 66 | 67 | } /* namespace apiai */ 68 | 69 | #endif /* RESPONSE_H_ */ 70 | -------------------------------------------------------------------------------- /src/ResponseJsonWriter.cc: -------------------------------------------------------------------------------- 1 | // ResponseJsonWriter.cc 2 | 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 10 | // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED 11 | // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, 12 | // MERCHANTABLITY OR NON-INFRINGEMENT. 13 | // See the Apache 2 License for the specific language governing permissions and 14 | // limitations under the License. 15 | 16 | #include "ResponseJsonWriter.h" 17 | 18 | namespace apiai { 19 | 20 | const std::string ResponseJsonWriter::MIME_APPLICATION_JAVA = "application/json"; 21 | 22 | void ResponseJsonWriter::SendJson(std::string json, bool final) { 23 | *out_ << json << std::endl; 24 | out_->flush(); 25 | } 26 | 27 | void ResponseJsonWriter::Write(std::ostringstream &outss, RecognitionResult &data) { 28 | outss << "{" 29 | << "\"confidence\":" << data.confidence << "," 30 | << "\"text\":\"" << data.text << "\"" 31 | << "}"; 32 | } 33 | 34 | void ResponseJsonWriter::SetResult(std::vector &data, int timeMarkMs) { 35 | SetResult(data, NOT_INTERRUPTED, timeMarkMs); 36 | } 37 | 38 | void ResponseJsonWriter::SetResult(std::vector &data, const std::string &interrupted, int timeMarkMs) { 39 | 40 | std::ostringstream msg; 41 | msg << "{"; 42 | msg << "\"status\":\"ok\""; 43 | msg << ",\"data\":["; 44 | for (int i = 0; i < data.size(); i++) { 45 | if (i) { 46 | msg << ","; 47 | } 48 | Write(msg, data.at(i)); 49 | } 50 | msg << "]"; 51 | if (interrupted.size() > 0) { 52 | msg << ",\"interrupted\":\"" << interrupted << "\""; 53 | if (timeMarkMs > 0) { 54 | msg << ",\"time\":" << timeMarkMs; 55 | } 56 | } 57 | msg << "}"; 58 | SendJson(msg.str(), true); 59 | } 60 | 61 | void ResponseJsonWriter::SetIntermediateResult(RecognitionResult &decodedData, int timeMarkMs) { 62 | std::ostringstream msg; 63 | msg << "{"; 64 | msg << "\"status\":\"intermediate\""; 65 | msg << ",\"data\":["; 66 | Write(msg, decodedData); 67 | msg << "]}"; 68 | SendJson(msg.str(), false); 69 | } 70 | 71 | void ResponseJsonWriter::SetError(const std::string &message) { 72 | std::ostringstream msg; 73 | msg << "{"; 74 | msg << "\"status\":\"error\""; 75 | msg << ",\"data\":[{\"text\":\""<< message << "\"}]"; 76 | msg << "}"; 77 | SendJson(msg.str(), true); 78 | } 79 | 80 | } /* namespace apiai */ 81 | -------------------------------------------------------------------------------- /src/ResponseJsonWriter.h: -------------------------------------------------------------------------------- 1 | // ResponseJsonWriter.h 2 | 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 10 | // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED 11 | // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, 12 | // MERCHANTABLITY OR NON-INFRINGEMENT. 13 | // See the Apache 2 License for the specific language governing permissions and 14 | // limitations under the License. 15 | 16 | #ifndef RESPONSEJSONWRITER_H_ 17 | #define RESPONSEJSONWRITER_H_ 18 | 19 | #include "Response.h" 20 | #include 21 | 22 | namespace apiai { 23 | 24 | /** 25 | * Writes recognition data to output stream as JSON serialized objects 26 | */ 27 | class ResponseJsonWriter : public Response { 28 | public: 29 | ResponseJsonWriter(std::ostream *osb) : out_(osb) {} 30 | virtual ~ResponseJsonWriter() {}; 31 | 32 | virtual const std::string &GetContentType() { return MIME_APPLICATION_JAVA; } 33 | 34 | virtual void SetResult(std::vector &data, int timeMarkMs); 35 | virtual void SetResult(std::vector &data, const std::string &interrupted, int timeMarkMs); 36 | virtual void SetIntermediateResult(RecognitionResult &decodedData, int timeMarkMs); 37 | virtual void SetError(const std::string &message); 38 | protected: 39 | std::ostream *out() { return out_; } 40 | 41 | virtual void SendJson(std::string json, bool final); 42 | private: 43 | void Write(std::ostringstream &outss, RecognitionResult &data); 44 | std::ostream *out_; 45 | 46 | static const std::string MIME_APPLICATION_JAVA; 47 | }; 48 | 49 | } /* namespace apiai */ 50 | 51 | #endif /* RESPONSEJSONWRITER_H_ */ 52 | -------------------------------------------------------------------------------- /src/ResponseMultipartJsonWriter.cc: -------------------------------------------------------------------------------- 1 | // ResponseJsonWriter.cc 2 | 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 10 | // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED 11 | // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, 12 | // MERCHANTABLITY OR NON-INFRINGEMENT. 13 | // See the Apache 2 License for the specific language governing permissions and 14 | // limitations under the License. 15 | 16 | #include "ResponseMultipartJsonWriter.h" 17 | 18 | namespace apiai { 19 | 20 | const std::string ResponseMultipartJsonWriter::MIME_MULTIPART = "multipart/x-mixed-replace"; 21 | 22 | ResponseMultipartJsonWriter::~ResponseMultipartJsonWriter() { 23 | 24 | } 25 | 26 | ResponseMultipartJsonWriter::ResponseMultipartJsonWriter(std::ostream *osb) 27 | : ResponseJsonWriter(osb) 28 | { 29 | boundary_token_ = "ResponseBoundary"; 30 | content_type_ = MIME_MULTIPART + ";boundary=" + boundary_token_; 31 | data_sent_ = false; 32 | } 33 | 34 | void ResponseMultipartJsonWriter::SendJson(std::string json, bool final) { 35 | if (! data_sent_) { 36 | *out() << "\r\n--" << boundary_token_ << "\r\n"; 37 | data_sent_ = true; 38 | } 39 | 40 | *out() << "Content-Disposition: form-data; name=\"" 41 | << (final ? "result" : "partial") 42 | << "\"\r\n" 43 | << "Content-type: " << ResponseJsonWriter::GetContentType() << "\r\n" 44 | << "\r\n"; 45 | 46 | ResponseJsonWriter::SendJson(json, final); 47 | 48 | *out() << "\r\n" 49 | << "--" << boundary_token_ 50 | << (final ? "--" : "") 51 | << "\r\n"; 52 | 53 | out()->flush(); 54 | } 55 | 56 | } /* namespace apiai */ 57 | -------------------------------------------------------------------------------- /src/ResponseMultipartJsonWriter.h: -------------------------------------------------------------------------------- 1 | // ResponseJsonWriter.cc 2 | 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 10 | // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED 11 | // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, 12 | // MERCHANTABLITY OR NON-INFRINGEMENT. 13 | // See the Apache 2 License for the specific language governing permissions and 14 | // limitations under the License. 15 | 16 | #ifndef RESPONSEMULTIPARTJSONWRITER_H_ 17 | #define RESPONSEMULTIPARTJSONWRITER_H_ 18 | 19 | #include "ResponseJsonWriter.h" 20 | 21 | namespace apiai { 22 | 23 | class ResponseMultipartJsonWriter: public ResponseJsonWriter { 24 | public: 25 | ResponseMultipartJsonWriter(std::ostream *osb); 26 | virtual ~ResponseMultipartJsonWriter(); 27 | 28 | virtual const std::string &GetContentType() { return content_type_; } 29 | protected: 30 | virtual void SendJson(std::string json, bool final); 31 | private: 32 | std::string boundary_token_; 33 | std::string content_type_; 34 | bool data_sent_; 35 | static const std::string MIME_MULTIPART; 36 | }; 37 | 38 | } /* namespace apiai */ 39 | 40 | #endif /* RESPONSEMULTIPARTJSONWRITER_H_ */ 41 | -------------------------------------------------------------------------------- /src/Timing.cc: -------------------------------------------------------------------------------- 1 | // Timing.cc 2 | 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 10 | // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED 11 | // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, 12 | // MERCHANTABLITY OR NON-INFRINGEMENT. 13 | // See the Apache 2 License for the specific language governing permissions and 14 | // limitations under the License. 15 | 16 | #include "Timing.h" 17 | 18 | namespace apiai { 19 | 20 | milliseconds_t getMilliseconds(struct timezone *tz) { 21 | timeval tv; 22 | if (!gettimeofday(&tv, tz)) { 23 | return tv.tv_sec * 1000 + tv.tv_usec / 1000; 24 | } else { 25 | return 0; 26 | } 27 | } 28 | 29 | milliseconds_t getMillisecondsSince(milliseconds_t since, struct timezone *tz) { 30 | return getMilliseconds(tz) - since; 31 | } 32 | 33 | milliseconds_t getMilliseconds() { 34 | return getMilliseconds(0); 35 | } 36 | 37 | milliseconds_t getMillisecondsSince(milliseconds_t since) { 38 | return getMillisecondsSince(since, 0); 39 | } 40 | 41 | 42 | } /* namespace apiai */ 43 | -------------------------------------------------------------------------------- /src/Timing.h: -------------------------------------------------------------------------------- 1 | // Timing.h 2 | 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 10 | // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED 11 | // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, 12 | // MERCHANTABLITY OR NON-INFRINGEMENT. 13 | // See the Apache 2 License for the specific language governing permissions and 14 | // limitations under the License. 15 | 16 | #ifndef TIMING_H_ 17 | #define TIMING_H_ 18 | 19 | #include 20 | 21 | namespace apiai { 22 | 23 | typedef long int milliseconds_t; 24 | 25 | /** 26 | * Get current time of specified time zone in milliseconds 27 | */ 28 | milliseconds_t getMilliseconds(struct timezone *tz); 29 | 30 | /** 31 | * Get time difference between current time of specified time zone and the given time 32 | */ 33 | milliseconds_t getMillisecondsSince(milliseconds_t since, struct timezone *tz); 34 | 35 | /** 36 | * Get current time in milliseconds 37 | */ 38 | milliseconds_t getMilliseconds(); 39 | 40 | /** 41 | * Get time difference between current time and the given 42 | */ 43 | milliseconds_t getMillisecondsSince(milliseconds_t since); 44 | 45 | } /* namespace apiai */ 46 | 47 | 48 | #endif /* TIMING_H_ */ 49 | -------------------------------------------------------------------------------- /src/fcgi-nnet3-decoder.cc: -------------------------------------------------------------------------------- 1 | // fcgi-decoder-nnet3.cc 2 | 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 10 | // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED 11 | // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, 12 | // MERCHANTABLITY OR NON-INFRINGEMENT. 13 | // See the Apache 2 License for the specific language governing permissions and 14 | // limitations under the License. 15 | 16 | #include "Nnet3LatgenFasterDecoder.h" 17 | #include "FcgiDecodingApp.h" 18 | 19 | using namespace apiai; 20 | 21 | int main(int argc, char **argv) { 22 | 23 | Nnet3LatgenFasterDecoder decoder; 24 | FcgiDecodingApp decodingApp(decoder); 25 | 26 | return decodingApp.Run(argc, argv); 27 | } 28 | --------------------------------------------------------------------------------