├── LICENSE
├── Makefile
├── README.md
├── asr-html
    ├── index.html
    └── res
    │   ├── audiodisplay.js
    │   ├── main.js
    │   ├── mic128.png
    │   ├── recorder.js
    │   └── recorderWorker.js
├── configure
└── src
    ├── Decoder.h
    ├── FcgiDecodingApp.cc
    ├── FcgiDecodingApp.h
    ├── Makefile
    ├── Nnet3LatgenFasterDecoder.cc
    ├── Nnet3LatgenFasterDecoder.h
    ├── OnlineDecoder.cc
    ├── OnlineDecoder.h
    ├── QueryStringParser.cc
    ├── QueryStringParser.h
    ├── QueryStringParserTests.cc
    ├── Request.h
    ├── RequestRawReader.cc
    ├── RequestRawReader.h
    ├── Response.cc
    ├── Response.h
    ├── ResponseJsonWriter.cc
    ├── ResponseJsonWriter.h
    ├── ResponseMultipartJsonWriter.cc
    ├── ResponseMultipartJsonWriter.h
    ├── Timing.cc
    ├── Timing.h
    └── fcgi-nnet3-decoder.cc


/LICENSE:
--------------------------------------------------------------------------------
  1 | Apache License
  2 | Version 2.0, January 2004
  3 | http://www.apache.org/licenses/
  4 | 
  5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 | 1. Definitions.
  8 | 
  9 | "License" shall mean the terms and conditions for use, reproduction, and
 10 | distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 | "Licensor" shall mean the copyright owner or entity authorized by the copyright
 13 | owner that is granting the License.
 14 | 
 15 | "Legal Entity" shall mean the union of the acting entity and all other entities
 16 | that control, are controlled by, or are under common control with that entity.
 17 | For the purposes of this definition, "control" means (i) the power, direct or
 18 | indirect, to cause the direction or management of such entity, whether by
 19 | contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the
 20 | outstanding shares, or (iii) beneficial ownership of such entity.
 21 | 
 22 | "You" (or "Your") shall mean an individual or Legal Entity exercising
 23 | permissions granted by this License.
 24 | 
 25 | "Source" form shall mean the preferred form for making modifications, including
 26 | but not limited to software source code, documentation source, and configuration
 27 | files.
 28 | 
 29 | "Object" form shall mean any form resulting from mechanical transformation or
 30 | translation of a Source form, including but not limited to compiled object code,
 31 | generated documentation, and conversions to other media types.
 32 | 
 33 | "Work" shall mean the work of authorship, whether in Source or Object form, made
 34 | available under the License, as indicated by a copyright notice that is included
 35 | in or attached to the work (an example is provided in the Appendix below).
 36 | 
 37 | "Derivative Works" shall mean any work, whether in Source or Object form, that
 38 | is based on (or derived from) the Work and for which the editorial revisions,
 39 | annotations, elaborations, or other modifications represent, as a whole, an
 40 | original work of authorship. For the purposes of this License, Derivative Works
 41 | shall not include works that remain separable from, or merely link (or bind by
 42 | name) to the interfaces of, the Work and Derivative Works thereof.
 43 | 
 44 | "Contribution" shall mean any work of authorship, including the original version
 45 | of the Work and any modifications or additions to that Work or Derivative Works
 46 | thereof, that is intentionally submitted to Licensor for inclusion in the Work
 47 | by the copyright owner or by an individual or Legal Entity authorized to submit
 48 | on behalf of the copyright owner. For the purposes of this definition,
 49 | "submitted" means any form of electronic, verbal, or written communication sent
 50 | to the Licensor or its representatives, including but not limited to
 51 | communication on electronic mailing lists, source code control systems, and
 52 | issue tracking systems that are managed by, or on behalf of, the Licensor for
 53 | the purpose of discussing and improving the Work, but excluding communication
 54 | that is conspicuously marked or otherwise designated in writing by the copyright
 55 | owner as "Not a Contribution."
 56 | 
 57 | "Contributor" shall mean Licensor and any individual or Legal Entity on behalf
 58 | of whom a Contribution has been received by Licensor and subsequently
 59 | incorporated within the Work.
 60 | 
 61 | 2. Grant of Copyright License.
 62 | 
 63 | Subject to the terms and conditions of this License, each Contributor hereby
 64 | grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free,
 65 | irrevocable copyright license to reproduce, prepare Derivative Works of,
 66 | publicly display, publicly perform, sublicense, and distribute the Work and such
 67 | Derivative Works in Source or Object form.
 68 | 
 69 | 3. Grant of Patent License.
 70 | 
 71 | Subject to the terms and conditions of this License, each Contributor hereby
 72 | grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free,
 73 | irrevocable (except as stated in this section) patent license to make, have
 74 | made, use, offer to sell, sell, import, and otherwise transfer the Work, where
 75 | such license applies only to those patent claims licensable by such Contributor
 76 | that are necessarily infringed by their Contribution(s) alone or by combination
 77 | of their Contribution(s) with the Work to which such Contribution(s) was
 78 | submitted. If You institute patent litigation against any entity (including a
 79 | cross-claim or counterclaim in a lawsuit) alleging that the Work or a
 80 | Contribution incorporated within the Work constitutes direct or contributory
 81 | patent infringement, then any patent licenses granted to You under this License
 82 | for that Work shall terminate as of the date such litigation is filed.
 83 | 
 84 | 4. Redistribution.
 85 | 
 86 | You may reproduce and distribute copies of the Work or Derivative Works thereof
 87 | in any medium, with or without modifications, and in Source or Object form,
 88 | provided that You meet the following conditions:
 89 | 
 90 | You must give any other recipients of the Work or Derivative Works a copy of
 91 | this License; and
 92 | You must cause any modified files to carry prominent notices stating that You
 93 | changed the files; and
 94 | You must retain, in the Source form of any Derivative Works that You distribute,
 95 | all copyright, patent, trademark, and attribution notices from the Source form
 96 | of the Work, excluding those notices that do not pertain to any part of the
 97 | Derivative Works; and
 98 | If the Work includes a "NOTICE" text file as part of its distribution, then any
 99 | Derivative Works that You distribute must include a readable copy of the
100 | attribution notices contained within such NOTICE file, excluding those notices
101 | that do not pertain to any part of the Derivative Works, in at least one of the
102 | following places: within a NOTICE text file distributed as part of the
103 | Derivative Works; within the Source form or documentation, if provided along
104 | with the Derivative Works; or, within a display generated by the Derivative
105 | Works, if and wherever such third-party notices normally appear. The contents of
106 | the NOTICE file are for informational purposes only and do not modify the
107 | License. You may add Your own attribution notices within Derivative Works that
108 | You distribute, alongside or as an addendum to the NOTICE text from the Work,
109 | provided that such additional attribution notices cannot be construed as
110 | modifying the License.
111 | You may add Your own copyright statement to Your modifications and may provide
112 | additional or different license terms and conditions for use, reproduction, or
113 | distribution of Your modifications, or for any such Derivative Works as a whole,
114 | provided Your use, reproduction, and distribution of the Work otherwise complies
115 | with the conditions stated in this License.
116 | 
117 | 5. Submission of Contributions.
118 | 
119 | Unless You explicitly state otherwise, any Contribution intentionally submitted
120 | for inclusion in the Work by You to the Licensor shall be under the terms and
121 | conditions of this License, without any additional terms or conditions.
122 | Notwithstanding the above, nothing herein shall supersede or modify the terms of
123 | any separate license agreement you may have executed with Licensor regarding
124 | such Contributions.
125 | 
126 | 6. Trademarks.
127 | 
128 | This License does not grant permission to use the trade names, trademarks,
129 | service marks, or product names of the Licensor, except as required for
130 | reasonable and customary use in describing the origin of the Work and
131 | reproducing the content of the NOTICE file.
132 | 
133 | 7. Disclaimer of Warranty.
134 | 
135 | Unless required by applicable law or agreed to in writing, Licensor provides the
136 | Work (and each Contributor provides its Contributions) on an "AS IS" BASIS,
137 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied,
138 | including, without limitation, any warranties or conditions of TITLE,
139 | NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are
140 | solely responsible for determining the appropriateness of using or
141 | redistributing the Work and assume any risks associated with Your exercise of
142 | permissions under this License.
143 | 
144 | 8. Limitation of Liability.
145 | 
146 | In no event and under no legal theory, whether in tort (including negligence),
147 | contract, or otherwise, unless required by applicable law (such as deliberate
148 | and grossly negligent acts) or agreed to in writing, shall any Contributor be
149 | liable to You for damages, including any direct, indirect, special, incidental,
150 | or consequential damages of any character arising as a result of this License or
151 | out of the use or inability to use the Work (including but not limited to
152 | damages for loss of goodwill, work stoppage, computer failure or malfunction, or
153 | any and all other commercial damages or losses), even if such Contributor has
154 | been advised of the possibility of such damages.
155 | 
156 | 9. Accepting Warranty or Additional Liability.
157 | 
158 | While redistributing the Work or Derivative Works thereof, You may choose to
159 | offer, and charge a fee for, acceptance of support, warranty, indemnity, or
160 | other liability obligations and/or rights consistent with this License. However,
161 | in accepting such obligations, You may act only on Your own behalf and on Your
162 | sole responsibility, not on behalf of any other Contributor, and only if You
163 | agree to indemnify, defend, and hold each Contributor harmless for any liability
164 | incurred by, or claims asserted against, such Contributor by reason of your
165 | accepting any such warranty or additional liability.
166 | 
167 | END OF TERMS AND CONDITIONS
168 | 
169 | APPENDIX: How to apply the Apache License to your work
170 | 
171 | To apply the Apache License to your work, attach the following boilerplate
172 | notice, with the fields enclosed by brackets "[]" replaced with your own
173 | identifying information. (Don't include the brackets!) The text should be
174 | enclosed in the appropriate comment syntax for the file format. We also
175 | recommend that a file or class name and description of purpose be included on
176 | the same "printed page" as the copyright notice for easier identification within
177 | third-party archives.
178 | 
179 |    Copyright [yyyy] [name of copyright owner]
180 | 
181 |    Licensed under the Apache License, Version 2.0 (the "License");
182 |    you may not use this file except in compliance with the License.
183 |    You may obtain a copy of the License at
184 | 
185 |      http://www.apache.org/licenses/LICENSE-2.0
186 | 
187 |    Unless required by applicable law or agreed to in writing, software
188 |    distributed under the License is distributed on an "AS IS" BASIS,
189 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
190 |    See the License for the specific language governing permissions and
191 |    limitations under the License.
192 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
1 | all:
2 | 	$(MAKE) -C src
3 | 	ln -fs src/fcgi-nnet3-decoder .
4 | clean:
5 | 	$(MAKE) -C src clean
6 | 	rm fcgi-nnet3-decoder
7 | test:
8 | 	$(MAKE) -C src test


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | About
  2 | ======
  3 | FastCGI support for [Kaldi](http://kaldi-asr.org/doc/). It allows Kaldi based speech recognition to be used though Apache or Nginx (or any other that support FastCGI) HTTP servers. It also contains simple HTML-based client, that allows testing Kaldi speech recognitionfrom a web page.
  4 | 
  5 | Licence
  6 | -------
  7 | Apache 2.0
  8 | 
  9 | Installation guide
 10 | ==================
 11 | 
 12 | Summary
 13 | -------
 14 | 
 15 | This guide will help you to download and build your own simple ASR 
 16 | web-service based on Kaldi ASR code.
 17 | 
 18 | Preparing prerequisites
 19 | -----------------------
 20 | 
 21 | ### Creating a working dir
 22 | 
 23 | Let's create a directory where all data will be downloaded and built.
 24 | 
 25 | 	mkdir ~/apiai
 26 | 	cd ~/apiai
 27 | 
 28 | You are free to choose any other name and path you wish to, but will 
 29 | have to keep in mind that your name differs from the name given in the 
 30 | guide.
 31 | 
 32 | Due to server code is based on Kaldi almost all prerequisites matches 
 33 | to Kaldi ones. Besides that a FastCGI library is required to communicate 
 34 | with HTTP server.
 35 | 
 36 | ### Getting Kaldi
 37 | 
 38 | As a first step you have to clone Kaldi source tree available at
 39 | <https://github.com/kaldi-asr/kaldi>:
 40 | 
 41 | 	git clone https://github.com/kaldi-asr/kaldi
 42 | 
 43 | This command will clone source tree to `kaldi` directory. 
 44 | To configure and build Kaldi please refer to `kaldi/INSTALL` file.
 45 | For detailed information please look for Kaldi official instruction:
 46 | <http://kaldi-asr.org/doc/install.html>
 47 | 
 48 | ### Installing libraries
 49 | 
 50 | There are some extra libraries required. You may install them using 
 51 | system packet manager.
 52 | 
 53 | In openSuSE you may run:
 54 | 
 55 | 	$ sudo zypper install FastCGI-devel
 56 | 
 57 | It you have Debian or Ubuntu:
 58 | 	
 59 | 	$ sudo apt-get install libfcgi-dev
 60 | 
 61 | Getting the code
 62 | --------------
 63 | 
 64 | Return to your working directory where you put Kaldi sources
 65 | 
 66 | 	$ cd ~/apiai
 67 | 
 68 | and then clone server source code
 69 | 
 70 | 	$ git clone https://github.com/api-ai/asr-server asr-server
 71 | 
 72 | It is recommended to checkout code to the same directory where 
 73 | kaldi-apiai is located to allow `configure` tool to detect Kaldi 
 74 | location automatically.
 75 | 
 76 | Building the app
 77 | --------------
 78 | 
 79 | 	$ cd asr-server
 80 | 
 81 | Before running a make process you have to configure build scripts 
 82 | by running a special utility:
 83 | 
 84 | 	$ ./configure
 85 | 
 86 | It will check that all required libraries installed to your system and 
 87 | also will look for Kaldi libraries in `../kaldi` folder. If you 
 88 | have Kaldi installed somewhere else you may explicitly pass the 
 89 | path via --kaldi-root option:
 90 | 
 91 | 	$ ./configure --kaldi-root=<path_to_kaldi>
 92 | 
 93 | If configuration process has finished successfully you may begin 
 94 | the building process by running make script:
 95 | 
 96 | 	$ make
 97 | 
 98 | Getting a recognition model
 99 | ------------------------
100 | 
101 | When application build complete you need to download language specific 
102 | data. 
103 | 
104 | Return to your working directory where you put Kaldi sources
105 | 
106 | 	$ cd ~/apiai
107 | 
108 | Builded ASR application uses a Kaldi nnet3 models, which you can get
109 | by training a neural network with your personal data set or use a 
110 | pretrained network provided by us. Currently it is only English model available
111 | at <https://github.com/api-ai/api-ai-english-asr-model/releases/download/1.0/api.ai-kaldi-asr-model.zip>. 
112 | 
113 | 	$ wget https://github.com/api-ai/api-ai-english-asr-model/releases/download/1.0/api.ai-kaldi-asr-model.zip
114 | 
115 | Unzip the archive to `asr-server` directory.
116 | 
117 | 	$ unzip api.ai-kaldi-asr-model.zip
118 | 
119 | Running the app
120 | --------------
121 | 
122 | Set the model directory as a working dir:
123 | 
124 | 	$ cd api.ai-kaldi-asr-model
125 | 
126 | There are several ways available to run application. The first one is 
127 | to run it as a standalone app listening on socket defined with 
128 | `--fcgi-socket` option:
129 | 
130 | 	$ ../asr-server/fcgi-nnet3-decoder --fcgi-socket=:8000
131 | 
132 | This command runs application listening on any IP address and port 8000. 
133 | You are also free to define a path Unix socket, or explicit IP 
134 | address (in a A.B.C.D:PORT form).
135 | 
136 | As an alternative way you may use special spawn-fcgi utility:
137 | 
138 | 	$ spawn-fcgi -n -p 8000 -- ../asr-server/fcgi-nnet3-decoder
139 | 
140 | Configuring HTTP service
141 | ---------------------
142 | 
143 | You may use any web-server which have FastCGI support: Apache, Nginx, Lighttpd etc. 
144 | 
145 | ### Installing Apache2
146 | 
147 | openSuSE:
148 | 
149 | 	$ sudo zypper in apache2
150 | 	
151 | Debian and Ubuntu:
152 | 
153 | 	$ sudo apt-get install apache2
154 | 	
155 | ### Configuring Apache2
156 | 
157 | Enable FastCGI proxy module with `a2enmod`:
158 | 	
159 | 	$ sudo a2enmod proxy_fcgi
160 | 	
161 | Then you have to add to Apache2 configuration file following line:
162 | 
163 | 	ProxyPass "/asr" "fcgi://localhost:8000/"
164 | 	
165 | If your Apache configured to include all .conf files from /etc/apache2/conf.d folder you may 
166 | create separate asr_proxy.conf file with following content:
167 | 
168 | 	ProxyPass "/asr" "fcgi://localhost:8000/"
169 | 	Alias /asr-html/ "/home/username/apiai/asr-server/asr-html/"
170 | 	<Directory "/home/username/apiai/asr-server/asr-html">
171 | 		Options Indexes MultiViews
172 | 		AllowOverride None
173 | 		Require all granted
174 | 	</Directory>
175 | 	
176 | Now restart Apache:
177 | 	
178 | 	$ sudo /etc/init.d/apache2 restart
179 | 
180 | ### Installing Nginx
181 | 
182 | You can download latest sources from official website <http://nginx.org/> and build Nginx 
183 | with yourself or use your system package manager.
184 | 
185 | openSuSE:
186 | 
187 | 	$ sudo zypper install nginx
188 | 
189 | Debian and Ubuntu:
190 | 
191 | 	$ sudo apt-get install nginx
192 | 
193 | ### Configuring Nginx
194 | 
195 | Open nginx.conf and write down the following code:
196 | 
197 | 	http {
198 | 		server {
199 | 			location /asr {
200 | 				fastcgi_pass 127.0.0.1:8000;
201 | 				# Disabling this option invokes immediate sending replies to client
202 | 				fastcgi_buffering off;
203 | 				# Disabling this option invokes immediate decoding incoming audio data
204 | 				fastcgi_request_buffering off;
205 | 				include      fastcgi_params;
206 | 			}
207 | 
208 | 			location /asr-html {
209 | 				root /home/username/apiai/asr-server/;
210 | 				index index.html;
211 | 			}
212 | 		}
213 | 	}
214 | 
215 | This will setup Nginx to pass all requests coming to url /asr directly 
216 | to ASR service listening 8000 port via FastCGI gate. For detailed 
217 | information please please refer to nginx documentation 
218 | (e.g. <https://www.nginx.com/resources/wiki/start/topics/examples/fastcgiexample/>)
219 | 
220 | Speech Recognition
221 | ----------------
222 | 
223 | Server accepts raw mono 16-bits 16 KHz PCM data. You can convert your audio 
224 | using any popular encoding utilities, for instance, you can use ffmpeg:
225 | 
226 | 	$ ffmpeg -i audio.wav -f s16le -ar 16000 -ac 1 audio.raw
227 | 
228 | ### Recognition using web browser
229 | 
230 | There is a simple JS implementation that allows you to recognize speech using system mic.
231 | Open in your browser:
232 | 
233 | 	http://localhost/asr-html/
234 | 
235 | and follow the instructions on the page.
236 | 
237 | ### Recognition from command line using curl
238 | 
239 | Now, let’s recognize `audio.raw` by calling web-service with `curl` 
240 | utility:
241 | 
242 | 	$ curl -H "Content-Type: application/octet-stream" --data-binary @audio.raw http://localhost/asr
243 | 
244 | On successfull recognition the command will return something like this:
245 | 
246 | 	{
247 | 		"status":"ok",
248 | 		"data":[{"confidence":0.900359,"text":"HELLO WORLD"}]
249 | 	}
250 | 
251 | On error the return value will be like this:
252 | 
253 | 	{"status":"error","data":[{"text":"Failed to decode"}]}
254 | 
255 | ### Recognition request parameters
256 | 
257 | There are several parameters to tune up recognition process. All parameters are expected to be passed via query string as web-form fields enumeration (e.g. `?name1=value1&name2=value2`).
258 | 
259 | <table border="1">
260 | 	<tr>
261 | 		<th>Parameter</th>
262 | 		<th>Description</th>
263 | 		<th>Acceptable values</th>
264 | 		<th>Default value</th>
265 | 	</tr>
266 | 	<tr>
267 | 		<td>nbest</td>
268 | 		<td>Set the number of possible returned values
269 | <pre><code>{
270 | 	"status":"ok",
271 | 	"data":[
272 | 		{"confidence":0.900359,"text":"HELLO WORLD"},
273 | 		{"confidence":0.89012,"text":"HELLO WORD"}
274 | 	]
275 | }</code></pre>
276 | </td>
277 | 		<td>1-10</td>
278 | 		<td>1</td>
279 | 	</tr>
280 | 	<tr>
281 | 		<td>endofspeech</td>
282 | 		<td>Enable or disable end-of-speech points during recognition. If endpoint
283 | 			detected all then current result have returned and the rest data would 
284 | 			be skipped. Also in case of interrupted recognition 2 fields would be added
285 | 			to response: "interrupted" with value "endofspeech", and "time" with time point
286 | 			showing the number of milliseconds have been processed.
287 | 
288 | <pre><code>{
289 | 	"status":"ok",
290 | 	"data":[{"confidence":0.900359,"text":"HELLO WORLD"}],
291 | 	"interrupted":"endofspeech",
292 | 	"time":3800
293 | }</code></pre>
294 | </td>
295 | 		<td>true or false</td>
296 | 		<td>true</td>
297 | 	</tr>
298 | 	<tr>
299 | 		<td>intermediate</td>
300 | 		<td>Set time interval in milliseconds between intermediate results while 
301 | 			recognition being in progress.
302 | 
303 | 			The result returned as an simple sequence of JSON documents.
304 | 			Each intermediate document have "status" field set to "intermediate",
305 | 			last one will have "status" set to "ok".
306 | <pre><code>
307 | {"status":"intermediate","data":[
308 | 	{"confidence":0.908981,"text":"HELLO"}
309 | ]}
310 | {"status":"intermediate","data":[
311 | 	{"confidence":0.903025,"text":"HELLO WORLD"}
312 | ]}
313 | {"status":"ok","data":[
314 | 	{"confidence":0.903025,"text":"HELLO WORLD"}
315 | ]}
316 | </code></pre>
317 | </td>
318 | 		<td> >500</td>
319 | 		<td>0</td>
320 | 	</tr>
321 | 	<tr>
322 | 		<td>multipart</td>
323 | 		<td>If enabled the result would be returned as an <a href="https://www.w3.org/Protocols/rfc1341/7_2_Multipart.html">
324 | 			HTTP multipart response</a> with "content-type"
325 | 			set to "multipart/x-mixed-replace" and each response part
326 | 			has "Content-Disposition" header value equal to "form-data".
327 | 			Intermediate parts named as "partial" and a final part is named as "result".
328 | <pre><code>
329 | --ResponseBoundary
330 | Content-Disposition: form-data; name="partial"
331 | Content-type: application/json
332 | 
333 | {"status":"intermediate","data":[
334 | 	{"confidence":0.908981,"text":"HELLO"}
335 | ]}
336 | 
337 | --ResponseBoundary
338 | Content-Disposition: form-data; name="partial"
339 | Content-type: application/json
340 | 
341 | {"status":"intermediate","data":[
342 | 	{"confidence":0.903025,"text":"HELLO WORLD"}
343 | ]}
344 | 
345 | --ResponseBoundary
346 | Content-Disposition: form-data; name="result"
347 | Content-type: application/json
348 | 
349 | {"status":"ok","data":[
350 | 	{"confidence":0.903025,"text":"HELLO WORLD"}
351 | ]}
352 | 
353 | --ResponseBoundary--
354 | </code></pre>
355 | </td>
356 | 		<td>true or false</td>
357 | 		<td>false</td>
358 | 	</tr>
359 | </table>
360 | 


--------------------------------------------------------------------------------
/asr-html/index.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html><head><meta http-equiv="Content-Type" content="text/html; charset=utf-8">
 3 | 	<meta name="viewport" content="width=device-width,initial-scale=1">
 4 | 	<title>ASR Test console</title>
 5 | 	<script src="./res/audiodisplay.js"></script><style type="text/css"></style>
 6 | 	<script src="./res/recorder.js"></script>
 7 | 	<script src="./res/main.js"></script>
 8 | 	<style>
 9 | 	html { overflow: hidden; }
10 | 	body { 
11 | 		font: 14pt Arial, sans-serif; 
12 | 		background: white;
13 | 		height: 100vh;
14 | 		width: 100%;
15 | 		margin: 0 0;
16 | 	}
17 | 	canvas { 
18 | 		display: inline-block; 
19 | 		background: #202020; 
20 | 		width: 100%;
21 | 		height: 100px;
22 | 	}
23 | 	#controls {
24 | 		display: -webkit-flex;
25 | 		align-items: center;
26 |     	display: flex;
27 | 		height: 100%;
28 | 		width: 100%;
29 | 	}
30 | 	#status {
31 | 		height: 100%;
32 | 		width: 100%;
33 | 	}
34 | 	#record {  }
35 | 	#record.recording { 
36 | 		background: red;
37 | 		background: -webkit-radial-gradient(center, ellipse cover, #ff0000 0%,white 75%,white 100%,#7db9e8 100%); 
38 | 		background: -moz-radial-gradient(center, ellipse cover, #ff0000 0%,white 75%,white 100%,#7db9e8 100%); 
39 | 		background: radial-gradient(center, ellipse cover, #ff0000 0%,white 75%,white 100%,#7db9e8 100%); 
40 | 	}
41 | 	#viz {
42 | 		width: 100%;
43 | 		display: flex;
44 | 		flex-direction: column;
45 | 		justify-content: space-around;
46 | 		align-items: center;
47 | 	}
48 | 
49 | 	</style>
50 | </head>
51 | <body>
52 | 	<div id="viz">
53 | 		<canvas id="analyser" width="1024" height="300"></canvas>
54 | 		<canvas id="wavedisplay" width="1024" height="300"></canvas>
55 | 	</div>
56 | 	<div id="controls">
57 | 	    <textarea id="status">
58 | Ensure you browser supports audio capturing. If you see moving columns on top, then capturing works fine.
59 | To start recognition press mic, then speak, then press mic again when done speaking.
60 | 
61 | For more details go to https://github.com/api-ai/asr-server
62 | For information about ASR model go to https://github.com/api-ai/api-ai-english-asr-model
63 | 
64 | Copyright © 2016 Api.ai
65 | 	    </textarea>
66 | 		<img id="record" src="./res/mic128.png" onclick="toggleRecording(this);" class="">
67 | 	</div>
68 | 	</div>
69 | 
70 | 
71 | </body></html>


--------------------------------------------------------------------------------
/asr-html/res/audiodisplay.js:
--------------------------------------------------------------------------------
 1 | function drawBuffer( width, height, context, data ) {
 2 |     var step = Math.ceil( data.length / width );
 3 |     var amp = height / 2;
 4 |     context.fillStyle = "silver";
 5 |     context.clearRect(0,0,width,height);
 6 |     for(var i=0; i < width; i++){
 7 |         var min = 1.0;
 8 |         var max = -1.0;
 9 |         for (j=0; j<step; j++) {
10 |             var datum = data[(i*step)+j]; 
11 |             if (datum < min)
12 |                 min = datum;
13 |             if (datum > max)
14 |                 max = datum;
15 |         }
16 |         context.fillRect(i,(1+min)*amp,1,Math.max(1,(max-min)*amp));
17 |     }
18 | }
19 | 


--------------------------------------------------------------------------------
/asr-html/res/main.js:
--------------------------------------------------------------------------------
  1 | /* Copyright 2013 Chris Wilson
  2 |              2016 Api.ai (author: Ilya Platonov)
  3 | 
  4 |    Licensed under the Apache License, Version 2.0 (the "License");
  5 |    you may not use this file except in compliance with the License.
  6 |    You may obtain a copy of the License at
  7 | 
  8 |        http://www.apache.org/licenses/LICENSE-2.0
  9 | 
 10 |    Unless required by applicable law or agreed to in writing, software
 11 |    distributed under the License is distributed on an "AS IS" BASIS,
 12 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 |    See the License for the specific language governing permissions and
 14 |    limitations under the License.
 15 | */
 16 | 
 17 | window.AudioContext = window.AudioContext || window.webkitAudioContext;
 18 | 
 19 | var URL = "/asr"
 20 | 
 21 | var audioContext = new AudioContext();
 22 | var audioInput = null,
 23 |     realAudioInput = null,
 24 |     inputPoint = null,
 25 |     audioRecorder = null;
 26 | var rafID = null;
 27 | var analyserContext = null;
 28 | var canvasWidth, canvasHeight;
 29 | var recIndex = 0;
 30 | 
 31 | 
 32 | 
 33 | 
 34 | function sendBlob(blob) {
 35 |     var request = new XMLHttpRequest();
 36 | 
 37 |     request.open("POST", URL);
 38 | 
 39 |     updateStatus("Sending data to " + URL)
 40 |     request.onreadystatechange=function() {
 41 |        updateStatus(request.responseText);
 42 |     }
 43 |     request.send(blob);
 44 | }
 45 | 
 46 | function updateStatus(status) {
 47 |     var statusP = document.getElementById( "status" );
 48 |     statusP.innerHTML = status;
 49 | }
 50 | /* TODO:
 51 | 
 52 | - offer mono option
 53 | - "Monitor input" switch
 54 | */
 55 | 
 56 | function saveAudio() {
 57 |     audioRecorder.exportWAV( doneEncoding );
 58 |     // could get mono instead by saying
 59 |     // audioRecorder.exportMonoWAV( doneEncoding );
 60 | }
 61 | 
 62 | 
 63 | function gotBuffers( buffers ) {
 64 |     var canvas = document.getElementById( "wavedisplay" );
 65 | 
 66 |     drawBuffer( canvas.width, canvas.height, canvas.getContext('2d'), buffers[0] );
 67 | 
 68 |     // the ONLY time gotBuffers is called is right after a new recording is completed - 
 69 |     // so here's where we should set up the download.
 70 |     audioRecorder.exportMonoWAV( sendBlob );
 71 |     //apiaiSend();
 72 | }
 73 | 
 74 | 
 75 | function toggleRecording( e ) {
 76 |     if (e.classList.contains("recording")) {
 77 |         // stop recording
 78 |         audioRecorder.stop();
 79 |         e.classList.remove("recording");
 80 |         audioRecorder.getBuffers( gotBuffers );
 81 | 
 82 |     } else {
 83 |         // start recording
 84 |         if (!audioRecorder)
 85 |             return;
 86 |         e.classList.add("recording");
 87 |         audioRecorder.clear();
 88 |         audioRecorder.record();
 89 |     }
 90 | }
 91 | 
 92 | function convertToMono( input ) {
 93 |     var splitter = audioContext.createChannelSplitter(2);
 94 |     var merger = audioContext.createChannelMerger(2);
 95 | 
 96 |     input.connect( splitter );
 97 |     splitter.connect( merger, 0, 0 );
 98 |     splitter.connect( merger, 0, 1 );
 99 |     return merger;
100 | }
101 | 
102 | function cancelAnalyserUpdates() {
103 |     window.cancelAnimationFrame( rafID );
104 |     rafID = null;
105 | }
106 | 
107 | function updateAnalysers(time) {
108 |     if (!analyserContext) {
109 |         var canvas = document.getElementById("analyser");
110 |         canvasWidth = canvas.width;
111 |         canvasHeight = canvas.height;
112 |         analyserContext = canvas.getContext('2d');
113 |     }
114 | 
115 |     // analyzer draw code here
116 |     {
117 |         var SPACING = 3;
118 |         var BAR_WIDTH = 1;
119 |         var numBars = Math.round(canvasWidth / SPACING);
120 |         var freqByteData = new Uint8Array(analyserNode.frequencyBinCount);
121 | 
122 |         analyserNode.getByteFrequencyData(freqByteData); 
123 | 
124 |         analyserContext.clearRect(0, 0, canvasWidth, canvasHeight);
125 |         analyserContext.fillStyle = '#F6D565';
126 |         analyserContext.lineCap = 'round';
127 |         var multiplier = analyserNode.frequencyBinCount / numBars;
128 | 
129 |         // Draw rectangle for each frequency bin.
130 |         for (var i = 0; i < numBars; ++i) {
131 |             var magnitude = 0;
132 |             var offset = Math.floor( i * multiplier );
133 |             // gotta sum/average the block, or we miss narrow-bandwidth spikes
134 |             for (var j = 0; j< multiplier; j++)
135 |                 magnitude += freqByteData[offset + j];
136 |             magnitude = magnitude / multiplier;
137 |             var magnitude2 = freqByteData[i * multiplier];
138 |             analyserContext.fillStyle = "hsl( " + Math.round((i*360)/numBars) + ", 100%, 50%)";
139 |             analyserContext.fillRect(i * SPACING, canvasHeight, BAR_WIDTH, -magnitude);
140 |         }
141 |     }
142 |     
143 |     rafID = window.requestAnimationFrame( updateAnalysers );
144 | }
145 | 
146 | function toggleMono() {
147 |     if (audioInput != realAudioInput) {
148 |         audioInput.disconnect();
149 |         realAudioInput.disconnect();
150 |         audioInput = realAudioInput;
151 |     } else {
152 |         realAudioInput.disconnect();
153 |         audioInput = convertToMono( realAudioInput );
154 |     }
155 | 
156 |     audioInput.connect(inputPoint);
157 | }
158 | 
159 | function gotStream(stream) {
160 |     inputPoint = audioContext.createGain();
161 | 
162 |     // Create an AudioNode from the stream.
163 |     realAudioInput = audioContext.createMediaStreamSource(stream);
164 |     audioInput = realAudioInput;
165 |     audioInput.connect(inputPoint);
166 | 
167 | //    audioInput = convertToMono( input );
168 | 
169 |     analyserNode = audioContext.createAnalyser();
170 |     analyserNode.fftSize = 2048;
171 |     inputPoint.connect( analyserNode );
172 | 
173 |     audioRecorder = new Recorder( inputPoint );
174 | 
175 |     zeroGain = audioContext.createGain();
176 |     zeroGain.gain.value = 0.0;
177 |     inputPoint.connect( zeroGain );
178 |     zeroGain.connect( audioContext.destination );
179 |     updateAnalysers();
180 | }
181 | 
182 | function initAudio() {
183 |         if (!navigator.getUserMedia)
184 |             navigator.getUserMedia = navigator.webkitGetUserMedia || navigator.mozGetUserMedia;
185 |         if (!navigator.cancelAnimationFrame)
186 |             navigator.cancelAnimationFrame = navigator.webkitCancelAnimationFrame || navigator.mozCancelAnimationFrame;
187 |         if (!navigator.requestAnimationFrame)
188 |             navigator.requestAnimationFrame = navigator.webkitRequestAnimationFrame || navigator.mozRequestAnimationFrame;
189 | 
190 |     navigator.getUserMedia(
191 |         {
192 |             "audio": {
193 |                 "mandatory": {
194 |                     "googEchoCancellation": "false",
195 |                     "googAutoGainControl": "false",
196 |                     "googNoiseSuppression": "false",
197 |                     "googHighpassFilter": "false"
198 |                 },
199 |                 "optional": []
200 |             },
201 |         }, gotStream, function(e) {
202 |             alert('Error getting audio');
203 |             console.log(e);
204 |         });
205 | }
206 | 
207 | window.addEventListener('load', initAudio );
208 | 


--------------------------------------------------------------------------------
/asr-html/res/mic128.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dialogflow/asr-server/d551d7a471300360a846009a31852662d0ba7b23/asr-html/res/mic128.png


--------------------------------------------------------------------------------
/asr-html/res/recorder.js:
--------------------------------------------------------------------------------
  1 | /*License (MIT)
  2 | 
  3 | Copyright © 2013 Matt Diamond
  4 |             2016 Api.ai (author: Ilya Platonov)
  5 | 
  6 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated 
  7 | documentation files (the "Software"), to deal in the Software without restriction, including without limitation 
  8 | the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and 
  9 | to permit persons to whom the Software is furnished to do so, subject to the following conditions:
 10 | 
 11 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of 
 12 | the Software.
 13 | 
 14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO 
 15 | THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 
 16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF 
 17 | CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 
 18 | DEALINGS IN THE SOFTWARE.
 19 | */
 20 | 
 21 | (function(window){
 22 | 
 23 |   var WORKER_PATH = 'res/recorderWorker.js';
 24 | 
 25 |   var Recorder = function(source, cfg){
 26 |     var config = cfg || { };
 27 |     var bufferLen = config.bufferLen || 4096;
 28 |     this.context = source.context;
 29 |     if(!this.context.createScriptProcessor){
 30 |        this.node = this.context.createJavaScriptNode(bufferLen, 2, 2);
 31 |     } else {
 32 |        this.node = this.context.createScriptProcessor(bufferLen, 2, 2);
 33 |     }
 34 |    
 35 |     var worker = new Worker(config.workerPath || WORKER_PATH);
 36 |     worker.postMessage({
 37 |       command: 'init',
 38 |       config: {
 39 |         sampleRate: this.context.sampleRate
 40 |       }
 41 |     });
 42 |     var recording = false,
 43 |       currCallback;
 44 | 
 45 |     this.node.onaudioprocess = function(e){
 46 |       if (!recording) return;
 47 |       worker.postMessage({
 48 |         command: 'record',
 49 |         buffer: [
 50 |           e.inputBuffer.getChannelData(0),
 51 |           e.inputBuffer.getChannelData(1)
 52 |         ]
 53 |       });
 54 |     }
 55 | 
 56 |     this.configure = function(cfg){
 57 |       for (var prop in cfg){
 58 |         if (cfg.hasOwnProperty(prop)){
 59 |           config[prop] = cfg[prop];
 60 |         }
 61 |       }
 62 |     }
 63 | 
 64 |     this.record = function(){
 65 |       recording = true;
 66 |     }
 67 | 
 68 |     this.stop = function(){
 69 |       recording = false;
 70 |     }
 71 | 
 72 |     this.clear = function(){
 73 |       worker.postMessage({ command: 'clear' });
 74 |     }
 75 | 
 76 |     this.getBuffers = function(cb) {
 77 |       currCallback = cb || config.callback;
 78 |       worker.postMessage({ command: 'getBuffers' })
 79 |     }
 80 | 
 81 |     this.exportWAV = function(cb, type){
 82 |       currCallback = cb || config.callback;
 83 |       type = type || config.type || 'audio/wav';
 84 |       if (!currCallback) throw new Error('Callback not set');
 85 |       worker.postMessage({
 86 |         command: 'exportWAV',
 87 |         type: type
 88 |       });
 89 |     }
 90 | 
 91 |     this.exportMonoWAV = function(cb, type){
 92 |       currCallback = cb || config.callback;
 93 |       type = type || config.type || 'audio/wav';
 94 |       if (!currCallback) throw new Error('Callback not set');
 95 |       worker.postMessage({
 96 |         command: 'exportMonoWAV',
 97 |         type: type
 98 |       });
 99 |     }
100 | 
101 |     worker.onmessage = function(e){
102 |       var blob = e.data;
103 |       currCallback(blob);
104 |     }
105 | 
106 |     source.connect(this.node);
107 |     this.node.connect(this.context.destination);   // if the script node is not connected to an output the "onaudioprocess" event is not triggered in chrome.
108 |   };
109 | 
110 |   Recorder.setupDownload = function(blob, filename){
111 |     var url = (window.URL || window.webkitURL).createObjectURL(blob);
112 |     var link = document.getElementById("save");
113 |     link.href = url;
114 |     link.download = filename || 'output.wav';
115 |   }
116 | 
117 |   window.Recorder = Recorder;
118 | 
119 | })(window);
120 | 


--------------------------------------------------------------------------------
/asr-html/res/recorderWorker.js:
--------------------------------------------------------------------------------
  1 | /*License (MIT)
  2 | 
  3 | Copyright © 2013 Matt Diamond
  4 |             2016 Api.ai (author: Ilya Platonov)
  5 | 
  6 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated 
  7 | documentation files (the "Software"), to deal in the Software without restriction, including without limitation 
  8 | the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and 
  9 | to permit persons to whom the Software is furnished to do so, subject to the following conditions:
 10 | 
 11 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of 
 12 | the Software.
 13 | 
 14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO 
 15 | THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 
 16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF 
 17 | CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 
 18 | DEALINGS IN THE SOFTWARE.
 19 | */
 20 | 
 21 | var recLength = 0,
 22 |   recBuffersL = [],
 23 |   recBuffersR = [],
 24 |   sampleRate;
 25 | 
 26 | this.onmessage = function(e){
 27 |   switch(e.data.command){
 28 |     case 'init':
 29 |       init(e.data.config);
 30 |       break;
 31 |     case 'record':
 32 |       record(e.data.buffer);
 33 |       break;
 34 |     case 'exportWAV':
 35 |       exportWAV(e.data.type);
 36 |       break;
 37 |     case 'exportMonoWAV':
 38 |       exportMonoWAV(e.data.type);
 39 |       break;
 40 |     case 'getBuffers':
 41 |       getBuffers();
 42 |       break;
 43 |     case 'clear':
 44 |       clear();
 45 |       break;
 46 |   }
 47 | };
 48 | 
 49 | function init(config){
 50 |   sampleRate = config.sampleRate;
 51 | }
 52 | 
 53 | function record(inputBuffer){
 54 |   recBuffersL.push(inputBuffer[0]);
 55 |   recBuffersR.push(inputBuffer[1]);
 56 |   recLength += inputBuffer[0].length;
 57 | }
 58 | 
 59 | function exportWAV(type){
 60 |   var bufferL = mergeBuffers(recBuffersL, recLength);
 61 |   var bufferR = mergeBuffers(recBuffersR, recLength);
 62 |   var interleaved = interleave(bufferL, bufferR);
 63 |   var downsampledBuffer = downsampleBuffer(interleaved, 16000);
 64 |   var dataview = encodeWAV(downsampledBuffer);
 65 |   var audioBlob = new Blob([dataview], { type: type });
 66 | 
 67 |   this.postMessage(audioBlob);
 68 | }
 69 | 
 70 | function downsampleBuffer(buffer, rate) {
 71 |     var sampleRate = 44100;
 72 |     if (rate == sampleRate) {
 73 |         return buffer;
 74 |     }
 75 |     if (rate > sampleRate) {
 76 |         throw "downsampling rate show be smaller than original sample rate";
 77 |     }
 78 |     var sampleRateRatio = sampleRate / rate;
 79 |     var newLength = Math.round(buffer.length / sampleRateRatio);
 80 |     var result = new Float32Array(newLength);
 81 |     var offsetResult = 0;
 82 |     var offsetBuffer = 0;
 83 |     while (offsetResult < result.length) {
 84 |         var nextOffsetBuffer = Math.round((offsetResult + 1) * sampleRateRatio);
 85 |         var accum = 0, count = 0;
 86 |         for (var i = offsetBuffer; i < nextOffsetBuffer && i < buffer.length; i++) {
 87 |             accum += buffer[i];
 88 |             count++;
 89 |         }
 90 |         result[offsetResult] = accum / count;
 91 |         offsetResult++;
 92 |         offsetBuffer = nextOffsetBuffer;
 93 |     }
 94 |     return result;
 95 | }
 96 | 
 97 | function exportMonoWAV(type){
 98 |   var bufferL = mergeBuffers(recBuffersL, recLength);
 99 |   //var dataview = encodeWAV(bufferL, true);
100 |   var d = downsampleBuffer(bufferL, 16000);
101 |   var buffer = new ArrayBuffer(d.length * 2);
102 |   var view = new DataView(buffer);
103 |   floatTo16BitPCM(view, 0, d);
104 |   var audioBlob = new Blob([view], { type: type });
105 |   this.postMessage(audioBlob);
106 | }
107 | 
108 | function getBuffers() {
109 |   var buffers = [];
110 |   buffers.push( mergeBuffers(recBuffersL, recLength) );
111 |   buffers.push( mergeBuffers(recBuffersR, recLength) );
112 |   this.postMessage(buffers);
113 | }
114 | 
115 | function clear(){
116 |   recLength = 0;
117 |   recBuffersL = [];
118 |   recBuffersR = [];
119 | }
120 | 
121 | function mergeBuffers(recBuffers, recLength){
122 |   var result = new Float32Array(recLength);
123 |   var offset = 0;
124 |   for (var i = 0; i < recBuffers.length; i++){
125 |     result.set(recBuffers[i], offset);
126 |     offset += recBuffers[i].length;
127 |   }
128 |   return result;
129 | }
130 | 
131 | function interleave(inputL, inputR){
132 |   var length = inputL.length + inputR.length;
133 |   var result = new Float32Array(length);
134 | 
135 |   var index = 0,
136 |     inputIndex = 0;
137 | 
138 |   while (index < length){
139 |     result[index++] = inputL[inputIndex];
140 |     result[index++] = inputR[inputIndex];
141 |     inputIndex++;
142 |   }
143 |   return result;
144 | }
145 | 
146 | function floatTo16BitPCM(output, offset, input){
147 |   for (var i = 0; i < input.length; i++, offset+=2){
148 |     var s = Math.max(-1, Math.min(1, input[i]));
149 |     output.setInt16(offset, s < 0 ? s * 0x8000 : s * 0x7FFF, true);
150 |   }
151 | }
152 | 
153 | function writeString(view, offset, string){
154 |   for (var i = 0; i < string.length; i++){
155 |     view.setUint8(offset + i, string.charCodeAt(i));
156 |   }
157 | }
158 | 
159 | function encodeWAV(samples, mono){
160 |   var buffer = new ArrayBuffer(44 + samples.length * 2);
161 |   var view = new DataView(buffer);
162 | 
163 |   /* RIFF identifier */
164 |   writeString(view, 0, 'RIFF');
165 |   /* file length */
166 |   view.setUint32(4, 32 + samples.length * 2, true);
167 |   /* RIFF type */
168 |   writeString(view, 8, 'WAVE');
169 |   /* format chunk identifier */
170 |   writeString(view, 12, 'fmt ');
171 |   /* format chunk length */
172 |   view.setUint32(16, 16, true);
173 |   /* sample format (raw) */
174 |   view.setUint16(20, 1, true);
175 |   /* channel count */
176 |   view.setUint16(22, mono?1:2, true);
177 |   /* sample rate */
178 |   view.setUint32(24, sampleRate, true);
179 |   /* byte rate (sample rate * block align) */
180 |   view.setUint32(28, sampleRate * 4, true);
181 |   /* block align (channel count * bytes per sample) */
182 |   view.setUint16(32, 4, true);
183 |   /* bits per sample */
184 |   view.setUint16(34, 16, true);
185 |   /* data chunk identifier */
186 |   writeString(view, 36, 'data');
187 |   /* data chunk length */
188 |   view.setUint32(40, samples.length * 2, true);
189 | 
190 |   floatTo16BitPCM(view, 44, samples);
191 | 
192 |   return view;
193 | }
194 | 


--------------------------------------------------------------------------------
/configure:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | #
  3 | # This configure script is hand-generated, not auto-generated.
  4 | # It creates the file kaldi.mk, which is %included by the Makefiles
  5 | # in the subdirectories.
  6 | # The file kaldi.mk is editable by hand-- for example, you may want to
  7 | # remove the options -g -O0 -DKALDI_PARANOID, or edit the
  8 | # -DKALDI_DOUBLE_PRECISION option (to be 1 not 0),
  9 | 
 10 | 
 11 | #  Example command lines:
 12 | # ./configure
 13 | # ./configure --shared  ## shared libraries.
 14 | # ./configure --mkl-root=/opt/intel/mkl
 15 | # ./configure --mkl-root=/opt/intel/mkl --threaded-math=yes
 16 | # ./configure --mkl-root=/opt/intel/mkl --threaded-math=yes --mkl-threading=tbb
 17 | #        This is for MKL 11.3 -- which does not seem  to provide Intel OMP libs
 18 | # ./configure  --openblas-root=../tools/OpenBLAS/install  # before doing
 19 | #        # this, cd to ../tools and type "make openblas".  Note:
 20 | #        # this is not working correctly on all platforms, do "make test"
 21 | #        # and look out for segmentation faults.
 22 | # ./configure --atlas-root=../tools/ATLAS/build
 23 | 
 24 | #This should be incremented after every significant change of the configure script
 25 | #I.e. after each change that affects the kaldi.mk or the build system as whole
 26 | CONFIGURE_VERSION=1
 27 | OUTPUT_MK=apiai.mk
 28 | INCLUDE_PATHS="/usr/include /usr/local/include"
 29 | LIBRARY_PATHS="/usr/lib /usr/local/lib /usr/local/lib64"
 30 | 
 31 | APIAI_CXX_FLAGS=
 32 | 
 33 | function rel2abs {
 34 |   if [ ! -z "$1" ]; then
 35 |     case "${1}" in
 36 |       [./]*)
 37 |       echo "$(cd ${1%/*}; pwd)/${1##*/}"
 38 |       ;;
 39 |       *)
 40 |       echo "${PWD}/${1}"
 41 |       ;;
 42 |     esac
 43 |   fi
 44 | }
 45 | 
 46 | function read_dirname {
 47 |   local dir_name=`expr "X$1" : '[^=]*=\(.*\)'`;
 48 |   local retval=`rel2abs $dir_name`
 49 |   [ -z $retval ] && echo "Bad option '$1': no such directory: $dir_name" >&2 && exit 1;
 50 |   echo $retval
 51 | }
 52 | 
 53 | function is_set {
 54 |   local myvar=${1:-notset}
 55 |   if [ "$myvar" == "notset" ]; then
 56 |     return 1
 57 |   else
 58 |     return 0
 59 |   fi
 60 | }
 61 | 
 62 | ##   First do some checks.  These verify that all the things are
 63 | ##   here that should be here.
 64 | if ! [ -x "$PWD/configure" ]; then
 65 |   echo 'You must run "configure" from the src/ directory.'
 66 |   exit 1
 67 | fi
 68 | 
 69 | ## Default locations for Kaldi sources.
 70 | KALDIROOT=$(rel2abs ../kaldi)
 71 | 
 72 | function usage {
 73 |   echo 'Usage: ./configure [--kaldi-root=KALDIROOT]';
 74 | }
 75 | 
 76 | cmd_line="$0 $@"  # Save the command line to include in kaldi.mk
 77 | 
 78 | while [ $# -gt 0 ];
 79 | do
 80 |   case "$1" in
 81 |   --help)
 82 |     usage; exit 0 ;;
 83 |   --version)
 84 |     echo $CONFIGURE_VERSION; exit 0 ;;
 85 |   --kaldi-root=*)
 86 |     KALDIROOT=$(read_dirname $1);
 87 |     shift ;;
 88 |   *)  echo "Unknown argument: $1, exiting"; usage; exit 1 ;;
 89 |   esac
 90 | done
 91 | 
 92 | function failure {
 93 |   echo "***configure failed: $* ***" >&2
 94 |   if [ -f kaldi.mk ]; then rm kaldi.mk; fi
 95 |   exit 1;
 96 | }
 97 | 
 98 | function check_exists {
 99 |   if [ ! -f $1 ]; then failure "$1 not found."; fi
100 | }
101 | 
102 | function exit_success {
103 |   echo "SUCCESS"
104 |   exit 0;
105 | }
106 | 
107 | function check_sys_library {
108 |   case $(uname -s) in
109 |     Darwin)
110 |       for file in $1; do
111 |         for path in $LIBRARY_PATHS; do
112 |           local result="${path}/lib${file}.a"
113 |           echo -n "Checking ${result}..." >&2
114 |           if [ -f "$result" ]; then
115 |             echo "OK" >&2
116 |             echo $result
117 |             break 2
118 |           else
119 |             echo "Not found" >&2
120 |           fi
121 |         done
122 |       done
123 |     ;;
124 |     *)
125 |       echo -n "Looking for $1 library: "
126 |       local response=$(whereis lib$1)
127 |       local libpath=${response##*:}
128 |       if [ -z "$libpath" ]; then
129 |         echo "Not found"
130 |         failure "Library $1 not found"
131 |       else
132 |         echo $libpath
133 |       fi
134 |     ;;
135 |   esac
136 | }
137 | 
138 | function check_header_file {
139 | 	for file in $1; do
140 | 		for path in $INCLUDE_PATHS; do
141 | 			local result="${path}/${file}"
142 | 			echo -n "Checking ${result}..." >&2
143 | 			if [ -f "$result" ]; then
144 | 				echo "OK" >&2
145 | 				echo $result
146 | 				break 2
147 | 			else
148 | 				echo "Not found" >&2
149 | 			fi
150 | 		done
151 | 	done
152 | }
153 | 
154 | echo "Configuring ..."
155 | 
156 | echo "Looking for Kaldi sources in \"$KALDIROOT\"..."
157 | check_exists "$KALDIROOT/src/kaldi.mk"
158 | 
159 | check_sys_library fcgi
160 | check_sys_library fcgi++
161 | 
162 | FCGIO_H=$(check_header_file "fcgio.h")
163 | if [ -z "$FCGIO_H" ]; then
164 | 	FCGIO_H=$(check_header_file "fastcgi/fcgio.h")
165 | 	if [ -z "$FCGIO_H" ]; then
166 | 		failure "fcgio.h not found"
167 | 	else
168 | 		APIAI_CXX_FLAGS="$APIAI_CXX_FLAGS -I$(dirname $FCGIO_H)"
169 | 	fi
170 | fi
171 | 
172 | # back up the old one in case we modified it
173 | if [ -f "$OUTPUT_MK" ]; then
174 |   echo "Backing up $OUTPUT_MK to $OUTPUT_MK.bak"
175 |   cp $OUTPUT_MK ${OUTPUT_MK}.bak
176 | fi
177 | 
178 | printf "# This file was generated using the following command:\n# $cmd_line\n\n" > $OUTPUT_MK
179 | 
180 | printf "KALDI_PATH = $KALDIROOT/src\n" >> $OUTPUT_MK
181 | printf "APIAI_CXX_FLAGS = $APIAI_CXX_FLAGS\n" >> $OUTPUT_MK
182 | 
183 | exit_success
184 | 


--------------------------------------------------------------------------------
/src/Decoder.h:
--------------------------------------------------------------------------------
 1 | // Decoder.h
 2 | 
 3 | // Licensed under the Apache License, Version 2.0 (the "License");
 4 | // you may not use this file except in compliance with the License.
 5 | // You may obtain a copy of the License at
 6 | //
 7 | //  http://www.apache.org/licenses/LICENSE-2.0
 8 | //
 9 | // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
10 | // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
11 | // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
12 | // MERCHANTABLITY OR NON-INFRINGEMENT.
13 | // See the Apache 2 License for the specific language governing permissions and
14 | // limitations under the License.
15 | 
16 | #ifndef APIAI_DECODER_DECODER_H_
17 | #define APIAI_DECODER_DECODER_H_
18 | 
19 | #include "Request.h"
20 | #include "Response.h"
21 | #include "util/parse-options.h"
22 | 
23 | namespace apiai {
24 | 
25 | /**
26 |  * ASR decoder basic interface
27 |  */
28 | class Decoder {
29 | public:
30 | 	virtual ~Decoder() {};
31 | 
32 | 	/** Create decoder clone */
33 | 	virtual Decoder *Clone() const = 0;
34 | 	/** Register options which can be defined via command line arguments */
35 | 	virtual void RegisterOptions(kaldi::OptionsItf &po) = 0;
36 | 	/** Initialize decoder */
37 | 	virtual bool Initialize(kaldi::OptionsItf &po) = 0;
38 | 	/** Perform decoding routine */
39 | 	virtual void Decode(Request &request, Response &response) = 0;
40 | };
41 | 
42 | } /* namespace apiai */
43 | 
44 | #endif /* APIAI_DECODER_DECODER_H_ */
45 | 


--------------------------------------------------------------------------------
/src/FcgiDecodingApp.cc:
--------------------------------------------------------------------------------
  1 | // FcgiDecodingApp.cc
  2 | 
  3 | // Licensed under the Apache License, Version 2.0 (the "License");
  4 | // you may not use this file except in compliance with the License.
  5 | // You may obtain a copy of the License at
  6 | //
  7 | //  http://www.apache.org/licenses/LICENSE-2.0
  8 | //
  9 | // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 10 | // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
 11 | // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
 12 | // MERCHANTABLITY OR NON-INFRINGEMENT.
 13 | // See the Apache 2 License for the specific language governing permissions and
 14 | // limitations under the License.
 15 | 
 16 | #include "RequestRawReader.h"
 17 | #include "ResponseJsonWriter.h"
 18 | #include "ResponseMultipartJsonWriter.h"
 19 | #include "FcgiDecodingApp.h"
 20 | #include "QueryStringParser.h"
 21 | #include <fcgio.h>
 22 | #include <list>
 23 | #include <string>
 24 | #include <stdlib.h>
 25 | #include <pthread.h>
 26 | #include <signal.h>
 27 | #include <sstream>
 28 | #include <iomanip>
 29 | #include <algorithm>
 30 | #include <cctype>
 31 | #include <memory>
 32 | 
 33 | namespace apiai {
 34 | 
 35 | const std::string PARAMETER_NAME_NBEST = "nbest";
 36 | const std::string PARAMETER_NAME_INTERMEDIATE = "intermediate";
 37 | const std::string PARAMETER_NAME_END_OF_SPEECH = "endofspeech";
 38 | const std::string PARAMETER_MULTIPART = "multipart";
 39 | 
 40 | class ResponseParams {
 41 | public:
 42 | 	bool multipart;
 43 | 
 44 | 	static bool default_multipart;
 45 | 	static bool default_endofspeech;
 46 | 
 47 | 	ResponseParams() : multipart(default_multipart) {};
 48 | };
 49 | 
 50 | // Multipart option default value
 51 | bool ResponseParams::default_multipart = false;
 52 | 
 53 | // End-of-speech detection option default value
 54 | bool ResponseParams::default_endofspeech = true;
 55 | 
 56 | bool to_bool(std::string &str) {
 57 |     std::transform(str.begin(), str.end(), str.begin(), ::tolower);
 58 |     std::istringstream is(str);
 59 |     bool b;
 60 |     is >> std::boolalpha >> b;
 61 |     return b;
 62 | }
 63 | 
 64 | bool to_bool(const char *chars) {
 65 | 	std::string str(chars);
 66 |     return to_bool(str);
 67 | }
 68 | 
 69 | void apply_request_parameters(FCGX_Request &request, RequestRawReader &reader, ResponseParams &params) {
 70 | 	char *queryString = FCGX_GetParam("QUERY_STRING", request.envp);
 71 | 	if (queryString) {
 72 | 		QueryStringParser queryStringParser(queryString);
 73 | 		std::string name, value;
 74 | 		while (queryStringParser.Next(&name, &value)) {
 75 | 			if (PARAMETER_NAME_NBEST == name) {
 76 | 				reader.BestCount(atoi(value.data()));
 77 | 				KALDI_VLOG(1) << "Setting n-best: " << reader.BestCount();
 78 | 			} else if (PARAMETER_NAME_INTERMEDIATE == name) {
 79 | 				reader.IntermediateIntervalMillisec(atoi(value.data()));
 80 | 				KALDI_VLOG(1) << "Setting intermediate interval: " << reader.IntermediateIntervalMillisec() << " ms";
 81 | 			} else if (PARAMETER_NAME_END_OF_SPEECH == name) {
 82 | 				reader.DoEndpointing(to_bool(value.data()));
 83 | 				KALDI_VLOG(1) << "Setting end-of-speech: " << (reader.DoEndpointing() ? "enabled" : "disabled");
 84 | 			} else if (PARAMETER_MULTIPART == name) {
 85 | 				params.multipart = to_bool(value.data());
 86 | 				KALDI_VLOG(1) << "Setting multipart: " << (params.multipart ? "enabled" : "disabled");
 87 | 			} else {
 88 | 				KALDI_VLOG(1) << "Skipping unknown parameter \"" << name << "\"";
 89 | 			}
 90 | 		}
 91 | 	}
 92 | }
 93 | 
 94 | void FcgiDecodingApp::RegisterOptions(kaldi::OptionsItf &po) {
 95 |     po.Register("fcgi-socket", &fcgi_socket_path_, "FastCGI connection string, if undefined then stdin and stdout will be used");
 96 |     po.Register("fcgi-socket.backlog", &fcgi_socket_backlog_, "FastCGI socket backlog size.");
 97 |     po.Register("fcgi-threads-number", &fcgi_threads_number_, "Number of FastCGI working threads");
 98 |     po.Register("fcgi-multipart", &ResponseParams::default_multipart, "Enable or disable multipart responses by default");
 99 |     po.Register("fcgi-endofspeech", &ResponseParams::default_endofspeech, "Enable or disable end-of-speech detection by default");
100 | }
101 | 
102 | void *FcgiDecodingApp::RunChildThread(void *arg) {
103 | 	FcgiDecodingApp *app = (FcgiDecodingApp*)arg;
104 | 	Decoder *decoder = app->decoder_.Clone();
105 | 	app->ProcessingRoutine(*decoder);
106 | 	delete decoder;
107 | 	return NULL;
108 | }
109 | 
110 | void FcgiDecodingApp::ProcessingRoutine(Decoder &decoder) {
111 |     if (socket_id_ < 0) {
112 | 	KALDI_WARN << "Socket not opened";
113 | 	return;
114 |     }
115 | 
116 |     FCGX_Request request;
117 |     FCGX_InitRequest(&request, socket_id_, 0);
118 | 
119 |     while (FCGX_Accept_r(&request) == 0) {
120 | 	fcgi_streambuf cin_fcgi_streambuf(request.in);
121 | 	fcgi_streambuf cout_fcgi_streambuf(request.out);
122 | 	fcgi_streambuf cerr_fcgi_streambuf(request.err);
123 | 
124 | 	std::istream fcgiin(&cin_fcgi_streambuf);
125 | 	std::ostream fcgiout(&cout_fcgi_streambuf);
126 | 	std::ostream fcgierr(&cerr_fcgi_streambuf);
127 | 
128 | 	try {
129 | 		RequestRawReader reader(&fcgiin);
130 | 
131 | 		reader.DoEndpointing(ResponseParams::default_endofspeech);
132 | 
133 | 		ResponseParams params;
134 | 		apply_request_parameters(request, reader, params);
135 | 
136 | 		std::auto_ptr<ResponseJsonWriter> writer_ptr;
137 | 		if (params.multipart) {
138 | 			writer_ptr.reset(new ResponseMultipartJsonWriter(&fcgiout));
139 | 		} else {
140 | 			writer_ptr.reset(new ResponseJsonWriter(&fcgiout));
141 | 		}
142 | 
143 | 		fcgiout << "Content-type: "<< writer_ptr.get()->GetContentType() <<"\r\n\r\n";
144 | 
145 | 		decoder.Decode(reader, *(writer_ptr.get()));
146 | 	} catch (std::exception &e) {
147 | 		KALDI_LOG << "Fatal exception: " << e.what();
148 | 	}
149 | 
150 | 	FCGX_Finish_r(&request);
151 |     }
152 | }
153 | 
154 | int FcgiDecodingApp::Run(int argc, char **argv) {
155 | 
156 | 	if (running_) {
157 | 		KALDI_WARN << "Application already running";
158 | 		return 1;
159 | 	}
160 | 	running_ = true;
161 | 
162 | 	// Predefined configuration args
163 | 	const char *extra_args[] = {
164 | 		"--feature-type=mfcc",
165 | 		"--mfcc-config=mfcc.conf",
166 | 		"--frame-subsampling-factor=3",
167 | 		"--max-active=2000",
168 | 		"--beam=15.0",
169 | 		"--lattice-beam=6.0",
170 | 		"--acoustic-scale=1.0",
171 | 		"--endpoint.silence-phones=1",
172 | 		"--endpoint.rule1.min-trailing-silence=0.5",
173 | 		"--endpoint.rule2.min-trailing-silence=0.15",
174 | 		"--endpoint.rule3.min-trailing-silence=0.1",
175 | 	};
176 | 
177 |     FCGX_Init();
178 | 
179 |     kaldi::ParseOptions po(usage_.data());
180 |     RegisterOptions(po);
181 |     decoder_.RegisterOptions(po);
182 | 
183 |     std::vector<const char*> args;
184 |     args.push_back(argv[0]);
185 |     args.insert(args.end(), extra_args, extra_args + sizeof(extra_args) / sizeof(extra_args[0]));
186 |     args.insert(args.end(), argv + 1, argv + argc);
187 |     po.Read(args.size(), args.data());
188 | 
189 |     if (fcgi_threads_number_ < 1) {
190 |     	KALDI_ERR << "Number of threads should be at least 1, but " << fcgi_threads_number_ << " given";
191 |     }
192 | 
193 |     if (fcgi_socket_path_.size() > 0) {
194 | 		socket_id_ = FCGX_OpenSocket(fcgi_socket_path_.data(), fcgi_socket_backlog_);
195 | 		if (socket_id_ < 0) {
196 | 			KALDI_WARN << "Error opening socket" << fcgi_socket_path_ << "(backlog: " << fcgi_socket_backlog_ << ")";
197 | 			return 1;
198 | 		} else {
199 | 			KALDI_LOG << "Listening FastCGI data at \"" << fcgi_socket_path_ << "\"";
200 | 		}
201 |     } else {
202 |     	KALDI_LOG << "Listening FastCGI data at stdin";
203 |     }
204 | 
205 | 	if (!decoder_.Initialize(po)) {
206 | 		po.PrintUsage();
207 | 		running_ = false;
208 | 	    return 1;
209 | 	}
210 | 
211 | 	if (fcgi_threads_number_ == 1) {
212 | 		KALDI_VLOG(1) << "Single thread running";
213 | 		ProcessingRoutine(decoder_);
214 | 	} else {
215 | 		std::list<pthread_t> thread_list;
216 | 		int errnumber;
217 | 
218 | 		for (int i = 0; i < fcgi_threads_number_; i++) {
219 | 			pthread_t thread;
220 | 			if ((errnumber = pthread_create(&thread, NULL, RunChildThread, this)) != 0) {
221 | 				KALDI_WARN << "Failed to start thread: " << strerror(errnumber);
222 | 				break;
223 | 			} else {
224 | 				thread_list.push_back(thread);
225 | 			}
226 | 		}
227 | 
228 | 		KALDI_VLOG(1) << "Threads ready: " << thread_list.size();
229 | 
230 | 		for (std::list<pthread_t>::iterator i = thread_list.begin(); i != thread_list.end(); ++i) {
231 | 			if ((errnumber = pthread_join(*i, NULL)) != 0) {
232 | 				KALDI_WARN << "Failed to join thread: " << strerror(errnumber);
233 | 			}
234 | 		}
235 | 		KALDI_VLOG(1) << "Thread finished, threads left: " << thread_list.size();
236 | 	}
237 | 
238 | 	running_ = false;
239 | 	return 0;
240 | }
241 | } /* namespace apiai */
242 | 


--------------------------------------------------------------------------------
/src/FcgiDecodingApp.h:
--------------------------------------------------------------------------------
 1 | // FcgiDecodingApp.h
 2 | 
 3 | // Licensed under the Apache License, Version 2.0 (the "License");
 4 | // you may not use this file except in compliance with the License.
 5 | // You may obtain a copy of the License at
 6 | //
 7 | //  http://www.apache.org/licenses/LICENSE-2.0
 8 | //
 9 | // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
10 | // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
11 | // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
12 | // MERCHANTABLITY OR NON-INFRINGEMENT.
13 | // See the Apache 2 License for the specific language governing permissions and
14 | // limitations under the License.
15 | 
16 | #ifndef APIAI_DECODER_FCGIDECODINGAPP_H_
17 | #define APIAI_DECODER_FCGIDECODINGAPP_H_
18 | 
19 | #include "Decoder.h"
20 | 
21 | namespace apiai {
22 | 
23 | /**
24 |  * Decoding class with main routine defined.
25 |  * Data IO implemented via FastCGI gate.
26 |  * Input data expected as raw audio stream
27 |  * Output data is JSON encoded objects
28 |  */
29 | class FcgiDecodingApp {
30 | public:
31 | 	/** Initialize with given decoder */
32 | 	FcgiDecodingApp(Decoder &decoder) : decoder_(decoder),
33 | 		fcgi_threads_number_(1), fcgi_socket_backlog_(0), socket_id_(0),
34 | 		running_(false) {};
35 | 
36 | 	/** Get run specifications and allowed arguments list */
37 | 	std::string &Usage() { return usage_; }
38 | 	/** Set run specifications and allowed arguments list */
39 | 	void Usage(std::string &usage) { usage_ = usage; }
40 | 
41 | 	/** Run main routine and pass all given arguments */
42 | 	int Run(int argn, char **argv);
43 | private:
44 | 	void RegisterOptions(kaldi::OptionsItf &po);
45 | 	void ProcessingRoutine(Decoder &decoder);
46 | 	static void *RunChildThread(void *app);
47 | 
48 | 	Decoder &decoder_;
49 | 	std::string usage_;
50 | 
51 | 	int fcgi_threads_number_;
52 | 	std::string fcgi_socket_path_;
53 | 	int fcgi_socket_backlog_;
54 | 	int socket_id_;
55 | 	bool running_;
56 | };
57 | 
58 | } /* namespace apiai */
59 | 
60 | #endif /* APIAI_DECODER_FCGIDECODINGAPP_H_ */
61 | 


--------------------------------------------------------------------------------
/src/Makefile:
--------------------------------------------------------------------------------
 1 | 
 2 | all:
 3 | 
 4 | include ../apiai.mk
 5 | include $(KALDI_PATH)/kaldi.mk
 6 | 
 7 | LDFLAGS += $(CUDA_LDFLAGS)
 8 | LDLIBS += -lfcgi -lfcgi++ $(CUDA_LDLIBS)
 9 | EXTRA_CXXFLAGS += -I$(KALDI_PATH) -L$(KALDI_PATH) $(APIAI_CXX_FLAGS)
10 | 
11 | OBJFILES = Timing.o Response.o RequestRawReader.o ResponseJsonWriter.o ResponseMultipartJsonWriter.o OnlineDecoder.o Nnet3LatgenFasterDecoder.o QueryStringParser.o FcgiDecodingApp.o 
12 | 
13 | LIBNAME = libstidecoder
14 | 
15 | BINFILES = fcgi-nnet3-decoder
16 | 
17 | TESTFILES = QueryStringParserTests
18 | 
19 | ADDLIBS = $(KALDI_PATH)/online2/kaldi-online2.a $(KALDI_PATH)/ivector/kaldi-ivector.a \
20 |           $(KALDI_PATH)/nnet2/kaldi-nnet2.a $(KALDI_PATH)/nnet3/kaldi-nnet3.a $(KALDI_PATH)/lat/kaldi-lat.a \
21 |           $(KALDI_PATH)/decoder/kaldi-decoder.a  $(KALDI_PATH)/cudamatrix/kaldi-cudamatrix.a \
22 |           $(KALDI_PATH)/feat/kaldi-feat.a $(KALDI_PATH)/transform/kaldi-transform.a $(KALDI_PATH)/gmm/kaldi-gmm.a \
23 |           $(KALDI_PATH)/hmm/kaldi-hmm.a $(KALDI_PATH)/tree/kaldi-tree.a \
24 |           $(KALDI_PATH)/matrix/kaldi-matrix.a $(KALDI_PATH)/fstext/kaldi-fstext.a \
25 |           $(KALDI_PATH)/util/kaldi-util.a $(KALDI_PATH)/base/kaldi-base.a 
26 |           
27 | include $(KALDI_PATH)/makefiles/default_rules.mk
28 | 


--------------------------------------------------------------------------------
/src/Nnet3LatgenFasterDecoder.cc:
--------------------------------------------------------------------------------
  1 | // Nnet3LatgenFasterDecoder.cc
  2 | 
  3 | // Licensed under the Apache License, Version 2.0 (the "License");
  4 | // you may not use this file except in compliance with the License.
  5 | // You may obtain a copy of the License at
  6 | //
  7 | //  http://www.apache.org/licenses/LICENSE-2.0
  8 | //
  9 | // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 10 | // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
 11 | // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
 12 | // MERCHANTABLITY OR NON-INFRINGEMENT.
 13 | // See the Apache 2 License for the specific language governing permissions and
 14 | // limitations under the License.
 15 | 
 16 | #include "Nnet3LatgenFasterDecoder.h"
 17 | 
 18 | namespace apiai {
 19 | 
 20 | Nnet3LatgenFasterDecoder::Nnet3LatgenFasterDecoder() {
 21 | 	online_ = true;
 22 | 	decode_fst_ = NULL;
 23 | 	trans_model_ = NULL;
 24 | 	nnet_ = NULL;
 25 |     decodable_info_ = NULL;    
 26 | 	feature_info_ = NULL;
 27 | 	nnet3_rxfilename_ = "final.mdl";
 28 | }
 29 | 
 30 | Nnet3LatgenFasterDecoder::~Nnet3LatgenFasterDecoder() {
 31 | 	delete decode_fst_;
 32 | 	delete trans_model_;
 33 | 	delete nnet_;
 34 |     delete decodable_info_;   
 35 | 	delete feature_info_;
 36 | }
 37 | 
 38 | Nnet3LatgenFasterDecoder *Nnet3LatgenFasterDecoder::Clone() const {
 39 | 	return new Nnet3LatgenFasterDecoder(*this);
 40 | }
 41 | 
 42 | void Nnet3LatgenFasterDecoder::RegisterOptions(kaldi::OptionsItf &po) {
 43 | 	OnlineDecoder::RegisterOptions(po);
 44 | 
 45 | 	po.Register("nnet-in", &nnet3_rxfilename_,
 46 | 	                "Path to nnet");
 47 |     po.Register("online", &online_,
 48 |                 "You can set this to false to disable online iVector estimation "
 49 |                 "and have all the data for each utterance used, even at "
 50 |                 "utterance start.  This is useful where you just want the best "
 51 |                 "results and don't care about online operation.  Setting this to "
 52 |                 "false has the same effect as setting "
 53 |                 "--use-most-recent-ivector=true and --greedy-ivector-extractor=true "
 54 |                 "in the file given to --ivector-extraction-config, and "
 55 |                 "--chunk-length=-1.");
 56 | 
 57 |     feature_config_.Register(&po);
 58 |     decoder_opts_.Register(&po);
 59 |     decodable_opts_.Register(&po);
 60 |     endpoint_config_.Register(&po);
 61 | }
 62 | 
 63 | bool Nnet3LatgenFasterDecoder::Initialize(kaldi::OptionsItf &po) {
 64 | 	if (!OnlineDecoder::Initialize(po)) {
 65 | 		return false;
 66 | 	}
 67 | 
 68 | 	if (fst_rxfilename_ == "") {
 69 | 		return false;
 70 | 	}
 71 | 
 72 | 	if (nnet3_rxfilename_ == "") {
 73 | 		return false;
 74 | 	}
 75 | 
 76 |     feature_info_ = new kaldi::OnlineNnet2FeaturePipelineInfo(feature_config_);
 77 | 
 78 |     if (!online_) {
 79 |       feature_info_->ivector_extractor_info.use_most_recent_ivector = true;
 80 |       feature_info_->ivector_extractor_info.greedy_ivector_extractor = true;
 81 |       chunk_length_secs_ = -1.0;
 82 |     }
 83 | 
 84 |     trans_model_ = new kaldi::TransitionModel();
 85 |     nnet_ = new kaldi::nnet3::AmNnetSimple();      
 86 |     {
 87 |       bool binary;
 88 |       kaldi::Input ki(nnet3_rxfilename_, &binary);
 89 |       trans_model_->Read(ki.Stream(), binary);
 90 |       nnet_->Read(ki.Stream(), binary);
 91 |     }
 92 | 
 93 |     // this object contains precomputed stuff that is used by all decodable
 94 |     // objects.  It takes a pointer to nnet_ because if it has iVectors it has
 95 |     // to modify the nnet to accept iVectors at intervals.
 96 |     decodable_info_ = new kaldi::nnet3::DecodableNnetSimpleLoopedInfo(     
 97 |                             decodable_opts_, nnet_);
 98 | 
 99 |     decode_fst_ = fst::ReadFstKaldiGeneric(fst_rxfilename_);
100 | 
101 |     fst::SymbolTable *word_syms = NULL;
102 |     if (word_syms_rxfilename_ != "")
103 |       if (!(word_syms = fst::SymbolTable::ReadText(word_syms_rxfilename_)))
104 |         KALDI_ERR << "Could not read symbol table from file "
105 |                   << word_syms_rxfilename_;
106 | 
107 |     acoustic_scale_ = decodable_opts_.acoustic_scale;                          
108 | 
109 |     return true;
110 | }
111 | 
112 | void Nnet3LatgenFasterDecoder::InputStarted()
113 | {
114 | 	adaptation_state_ = new kaldi::OnlineIvectorExtractorAdaptationState(feature_info_->ivector_extractor_info);
115 | 
116 | 	feature_pipeline_ = new kaldi::OnlineNnet2FeaturePipeline (*feature_info_);
117 | 	feature_pipeline_->SetAdaptationState(*adaptation_state_);
118 | 
119 | 	decoder_ = new kaldi::SingleUtteranceNnet3Decoder(decoder_opts_,
120 | 										*trans_model_,
121 | 										*decodable_info_,
122 | 										*decode_fst_,
123 | 										feature_pipeline_);
124 | }
125 | 
126 | 
127 | void Nnet3LatgenFasterDecoder::CleanUp()
128 | {
129 | 	delete decoder_;
130 | 	delete adaptation_state_;
131 | 	delete feature_pipeline_;
132 | 
133 | 	decoder_ = NULL;
134 | 	adaptation_state_ = NULL;
135 | 	feature_pipeline_ = NULL;
136 | }
137 | 
138 | bool Nnet3LatgenFasterDecoder::AcceptWaveform(kaldi::BaseFloat sampling_rate,
139 | 		const kaldi::VectorBase<kaldi::BaseFloat> &waveform,
140 | 		const bool do_endpointing)
141 | {
142 | 	feature_pipeline_->AcceptWaveform(sampling_rate, waveform);
143 | 
144 | 	if (do_endpointing && decoder_->EndpointDetected(endpoint_config_)) {
145 | 		return false;
146 | 	}
147 | 
148 | 	decoder_->AdvanceDecoding();
149 | 
150 | 	return true;
151 | }
152 | 
153 | void Nnet3LatgenFasterDecoder::InputFinished()
154 | {
155 | 	feature_pipeline_->InputFinished();
156 | 	decoder_->AdvanceDecoding();
157 | 	decoder_->FinalizeDecoding();
158 | }
159 | 
160 | void Nnet3LatgenFasterDecoder::GetLattice(kaldi::CompactLattice *clat, bool end_of_utterance)
161 | {
162 | 	decoder_->GetLattice(end_of_utterance, clat);
163 | 
164 | 	// In an application you might avoid updating the adaptation state if
165 | 	// you felt the utterance had low confidence.  See lat/confidence.h
166 | 	feature_pipeline_->GetAdaptationState(adaptation_state_);
167 | 
168 | 	if (acoustic_scale_ != 0) {
169 | 		ScaleLattice(fst::AcousticLatticeScale(1.0 / acoustic_scale_), clat);
170 | 	}
171 | }
172 | 
173 | } /* namespace apiai */
174 | 


--------------------------------------------------------------------------------
/src/Nnet3LatgenFasterDecoder.h:
--------------------------------------------------------------------------------
 1 | // Nnet3LatgenFasterDecoder.h
 2 | 
 3 | // Licensed under the Apache License, Version 2.0 (the "License");
 4 | // you may not use this file except in compliance with the License.
 5 | // You may obtain a copy of the License at
 6 | //
 7 | //  http://www.apache.org/licenses/LICENSE-2.0
 8 | //
 9 | // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
10 | // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
11 | // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
12 | // MERCHANTABLITY OR NON-INFRINGEMENT.
13 | // See the Apache 2 License for the specific language governing permissions and
14 | // limitations under the License.
15 | 
16 | #ifndef APIAI_DECODER_NNET3LATGENFASTERDECODER_H_
17 | #define APIAI_DECODER_NNET3LATGENFASTERDECODER_H_
18 | 
19 | #include "OnlineDecoder.h"
20 | #include "online2/online-nnet3-decoding.h"          
21 | #include "online2/online-nnet2-feature-pipeline.h"
22 | 
23 | namespace apiai {
24 | 
25 | class Nnet3LatgenFasterDecoder: public OnlineDecoder {
26 | public:
27 | 	Nnet3LatgenFasterDecoder();
28 | 	virtual ~Nnet3LatgenFasterDecoder();
29 | 
30 | 	virtual Nnet3LatgenFasterDecoder *Clone() const;
31 | 	virtual void RegisterOptions(kaldi::OptionsItf &po);
32 | 	virtual bool Initialize(kaldi::OptionsItf &po);
33 | protected:
34 | 	virtual bool AcceptWaveform(kaldi::BaseFloat sampling_rate,
35 | 			const kaldi::VectorBase<kaldi::BaseFloat> &waveform,
36 | 			const bool do_endpointing);
37 | 	virtual void InputStarted();
38 | 	virtual void InputFinished();
39 | 	virtual void GetLattice(kaldi::CompactLattice *clat, bool end_of_utterance);
40 | 	virtual void CleanUp();
41 | private:
42 | 	std::string nnet3_rxfilename_;
43 | 
44 |     bool online_;
45 |     kaldi::OnlineEndpointConfig endpoint_config_;
46 | 
47 |     // feature_config includes configuration for the iVector adaptation,
48 |     // as well as the basic features.
49 |     kaldi::OnlineNnet2FeaturePipelineConfig feature_config_;
50 |     kaldi::nnet3::NnetSimpleLoopedComputationOptions decodable_opts_;  
51 |     kaldi::LatticeFasterDecoderConfig decoder_opts_;                   
52 | 
53 |     kaldi::OnlineNnet2FeaturePipelineInfo *feature_info_;
54 |     fst::Fst<fst::StdArc> *decode_fst_;
55 |     kaldi::TransitionModel *trans_model_;
56 |     kaldi::nnet3::AmNnetSimple *nnet_;
57 |     kaldi::nnet3::DecodableNnetSimpleLoopedInfo *decodable_info_;     
58 | 
59 |     kaldi::OnlineIvectorExtractorAdaptationState *adaptation_state_;
60 |     kaldi::OnlineNnet2FeaturePipeline *feature_pipeline_;
61 |     kaldi::SingleUtteranceNnet3Decoder *decoder_;
62 | };
63 | 
64 | } /* namespace apiai */
65 | 
66 | #endif /* APIAI_DECODER_NNET3LATGENFASTERDECODER_H_ */
67 | 


--------------------------------------------------------------------------------
/src/OnlineDecoder.cc:
--------------------------------------------------------------------------------
  1 | // OnlineDecoder.cc
  2 | 
  3 | // Licensed under the Apache License, Version 2.0 (the "License");
  4 | // you may not use this file except in compliance with the License.
  5 | // You may obtain a copy of the License at
  6 | //
  7 | //  http://www.apache.org/licenses/LICENSE-2.0
  8 | //
  9 | // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 10 | // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
 11 | // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
 12 | // MERCHANTABLITY OR NON-INFRINGEMENT.
 13 | // See the Apache 2 License for the specific language governing permissions and
 14 | // limitations under the License.
 15 | 
 16 | #include "OnlineDecoder.h"
 17 | #include "Timing.h"
 18 | 
 19 | namespace apiai {
 20 | 
 21 | #define PAD_SIZE 400
 22 | #define AUDIO_DATA_FREQUENCY 16000
 23 | kaldi::BaseFloat padVector[PAD_SIZE];
 24 | 
 25 | struct OnlineDecoder::DecodedData {
 26 | 	kaldi::LatticeWeight weight;
 27 | 	std::vector<int32> words;
 28 | 	std::vector<int32> alignment;
 29 | 	std::vector<kaldi::LatticeWeight> weights;
 30 | };
 31 | 
 32 | bool wordsEquals(std::vector<int32> &a, std::vector<int32> &b) {
 33 | 	return (a.size() == b.size()) && (std::equal(a.begin(), a.end(), b.begin()));
 34 | }
 35 | 
 36 | bool getWeightMeasures(const kaldi::Lattice &fst,
 37 |                             std::vector<kaldi::LatticeArc::Weight> *weights_out) {
 38 |   typedef kaldi::LatticeArc::Label Label;
 39 |   typedef kaldi::LatticeArc::StateId StateId;
 40 |   typedef kaldi::LatticeArc::Weight Weight;
 41 | 
 42 |   std::vector<Weight> weights;
 43 | 
 44 |   StateId cur_state = fst.Start();
 45 |   if (cur_state == fst::kNoStateId) {  // empty sequence.
 46 |     if (weights_out != NULL) weights_out->clear();
 47 |     return true;
 48 |   }
 49 |   while (1) {
 50 |     Weight w = fst.Final(cur_state);
 51 |     if (w != Weight::Zero()) {  // is final..
 52 | 
 53 |       if (w.Value1() != 0 || w.Value2() != 0) {
 54 |     	  weights.push_back(w);
 55 |       }
 56 |       if (fst.NumArcs(cur_state) != 0) return false;
 57 |       if (weights_out != NULL) *weights_out = weights;
 58 |       return true;
 59 |     } else {
 60 |       if (fst.NumArcs(cur_state) != 1) return false;
 61 | 
 62 |       fst::ArcIterator<fst::Fst<kaldi::LatticeArc> > iter(fst, cur_state);  // get the only arc.
 63 |       const kaldi::LatticeArc &arc = iter.Value();
 64 |       if (arc.weight.Value1() != 0 || arc.weight.Value2() != 0) {
 65 |     	  weights.push_back(arc.weight);
 66 |       }
 67 |       cur_state = arc.nextstate;
 68 |     }
 69 |   }
 70 | }
 71 | 
 72 | 
 73 | OnlineDecoder::OnlineDecoder() {
 74 | 	lm_scale_ = 10;
 75 | 	chunk_length_secs_ = 0.18;
 76 | 	max_record_size_seconds_ = 0;
 77 | 	max_lattice_unchanged_interval_seconds_ = 0;
 78 | 	decoding_timeout_seconds_ = 0;
 79 | 
 80 | 	word_syms_rxfilename_ = "words.txt";
 81 | 	fst_rxfilename_ = "HCLG.fst";
 82 | }
 83 | 
 84 | OnlineDecoder::~OnlineDecoder() {
 85 | }
 86 | 
 87 | 
 88 | void OnlineDecoder::GetRecognitionResult(DecodedData &input, RecognitionResult *output) {
 89 | 	  // TODO move parameters to external file
 90 | 	  output->confidence = std::max(0.0, std::min(1.0, -0.0001466488 * (2.388449*float(input.weight.Value1()) + float(input.weight.Value2())) / (input.words.size() + 1) + 0.956));
 91 | 
 92 | 	  std::ostringstream outss;
 93 | 
 94 | 	  for (size_t i = 0; i < input.words.size(); i++) {
 95 | 		if (i) {
 96 | 		  outss << " ";
 97 | 		}
 98 | 		std::string s = word_syms_->Find(input.words[i]);
 99 | 		if (s == "") {
100 | 		  KALDI_WARN << "Word-id " << input.words[i] <<" not in symbol table.";
101 | 		} else {
102 | 			outss << s;
103 | 		}
104 | 	  }
105 | 	  output->text = outss.str();
106 | }
107 | 
108 | void OnlineDecoder::GetRecognitionResult(std::vector<DecodedData> &input, std::vector<RecognitionResult> *output) {
109 | 	for (int i = 0; i < input.size(); i++) {
110 | 		RecognitionResult result;
111 | 		GetRecognitionResult(input.at(i), &result);
112 | 		output->push_back(result);
113 | 	}
114 | }
115 | 
116 | void OnlineDecoder::RegisterOptions(kaldi::OptionsItf &po) {
117 |     po.Register("chunk-length", &chunk_length_secs_,
118 |                 "Length of chunk size in seconds, that we process.");
119 |     po.Register("word-symbol-table", &word_syms_rxfilename_,
120 |                 "Symbol table for words [for debug output]");
121 |     po.Register("fst-in", &fst_rxfilename_, "Path to FST model file");
122 |     po.Register("lm-scale", &lm_scale_, "Scaling factor for LM probabilities. "
123 | 				"Note: the ratio acoustic-scale/lm-scale is all that matters.");
124 | 
125 |     po.Register("max-record-length", &max_record_size_seconds_,
126 |     		"Max length of record in seconds to be recognised. "
127 | 			"All records longer than given value will be truncated. Note: Non-positive value to deactivate.");
128 | 
129 |     po.Register("max-lattice-unchanged-interval", &max_lattice_unchanged_interval_seconds_,
130 | 			"Max interval length in seconds of lattice recognised unchanged. Note: Non-positive value to deactivate.");
131 | 
132 |     po.Register("decoding-timeout", &decoding_timeout_seconds_,
133 |     		"Decoding process timeout given in seconds. Timeout disabled if value is non-positive.");
134 | }
135 | 
136 | bool OnlineDecoder::Initialize(kaldi::OptionsItf &po) {
137 | 	word_syms_ = NULL;
138 | 	if (word_syms_rxfilename_ == "") {
139 | 		return false;
140 | 	}
141 | 	if (!(word_syms_ = fst::SymbolTable::ReadText(word_syms_rxfilename_))) {
142 | 		KALDI_ERR << "Could not read symbol table from file "
143 | 			  << word_syms_rxfilename_;
144 | 	}
145 | 	return true;
146 | }
147 | 
148 | void OnlineDecoder::Decode(Request &request, Response &response) {
149 | 	try {
150 | 		KALDI_ASSERT(request.Frequency() == AUDIO_DATA_FREQUENCY);
151 | 		milliseconds_t start_time = getMilliseconds();
152 | 		milliseconds_t progress_time = 0;
153 | 
154 | 		KALDI_VLOG(1) << "Started @ " << start_time << " ms";
155 | 		InputStarted();
156 | 
157 | 		int intermediate_counter = 1;
158 | 		int intermediate_samples_interval = request.IntermediateIntervalMillisec() > 0 ? request.IntermediateIntervalMillisec() * (request.Frequency() / 1000) : 0;
159 | 		int max_samples_limit = max_record_size_seconds_ > 0 ? max_record_size_seconds_ * request.Frequency() : 0;
160 | 
161 | 		std::vector<int32> prev_words;
162 | 		int samples_per_chunk = int(chunk_length_secs_ * request.Frequency());
163 | 
164 | 		int samp_counter = 0;
165 | 
166 | 		kaldi::SubVector<kaldi::BaseFloat> *wave_part;
167 | 
168 | 		bool do_endpointing = request.DoEndpointing();
169 | 		std::string requestInterrupted = Response::NOT_INTERRUPTED;
170 | 		int samples_left = (max_samples_limit > 0) ? std::min(max_samples_limit, samples_per_chunk) : samples_per_chunk;
171 | 		const bool decoding_timeout_enabled = decoding_timeout_seconds_ > 0;
172 | 		const int decoding_timeout_ms = decoding_timeout_enabled ? decoding_timeout_seconds_ * 1000 : 0;
173 | 
174 | 		int time_left_ms = decoding_timeout_ms;
175 | 		while ((wave_part = request.NextChunk(samples_left, time_left_ms)) != NULL) {
176 | 
177 | 			samp_counter += wave_part->Dim();
178 | 
179 | 			if (AcceptWaveform(request.Frequency(), *wave_part, do_endpointing) == false && do_endpointing) {
180 | 				requestInterrupted = Response::INTERRUPTED_END_OF_SPEECH;
181 | 				KALDI_VLOG(1) << "End Point Detected @ " << (getMillisecondsSince(start_time)) << " ms";
182 | 				break;
183 | 			}
184 | 			progress_time = getMillisecondsSince(start_time);
185 | 
186 | 			if (max_samples_limit > 0) {
187 | 				if (samp_counter > max_samples_limit) {
188 | 					requestInterrupted = Response::INTERRUPTED_DATA_SIZE_LIMIT;
189 | 					KALDI_VLOG(1) << "Interrupted by record length @ " << progress_time << " ms";
190 | 					break;
191 | 				}
192 | 				samples_left = std::min(max_samples_limit - samp_counter, samples_per_chunk);
193 | 			}
194 | 
195 | 			if ((intermediate_samples_interval > 0) && (samp_counter > (intermediate_samples_interval * intermediate_counter))) {
196 | 				intermediate_counter++;
197 | 				std::vector<DecodedData> decodeData;
198 | 				if (DecodeIntermediate(1, &decodeData) > 0) {
199 | 					DecodedData &data = decodeData.at(0);
200 | 					if (!wordsEquals(prev_words, data.words)) {
201 | 						RecognitionResult recognitionResult;
202 | 						GetRecognitionResult(data, &recognitionResult);
203 | 						response.SetIntermediateResult(recognitionResult, (samp_counter / (request.Frequency() / 1000)));
204 | 						prev_words = data.words;
205 | 					}
206 | 				} else {
207 | 					prev_words.clear();
208 | 				}
209 | 			}
210 | 			if (decoding_timeout_enabled) {
211 | 				time_left_ms = decoding_timeout_ms - getMillisecondsSince(start_time);
212 | 				if (time_left_ms <= 0) {
213 | 					break;
214 | 				}
215 | 			}
216 | 		}
217 | 		if (wave_part != NULL && requestInterrupted.size() == 0) {
218 | 			if (decoding_timeout_enabled && (decoding_timeout_ms - getMillisecondsSince(start_time) <= 0)) {
219 | 				KALDI_VLOG(1) << "Timeout reached @ " << (getMillisecondsSince(start_time)) << " ms";
220 | 				requestInterrupted = Response::INTERRUPTED_TIMEOUT;
221 | 			} else {
222 | 				requestInterrupted = Response::INTERRUPTED_UNEXPECTED;
223 | 			}
224 | 		}
225 | 
226 | 		if (samp_counter == 0) {
227 | 			throw std::runtime_error("Got no data");
228 | 		}
229 | 
230 | 		if (samp_counter < PAD_SIZE) {
231 | 			KALDI_VLOG(1) << "Input too short, padding with " << (PAD_SIZE - samp_counter) << " zero samples";
232 | 			kaldi::SubVector<kaldi::BaseFloat> padding(padVector, PAD_SIZE - samp_counter);
233 | 			AcceptWaveform(request.Frequency(), padding, false);
234 | 		}
235 | 
236 | 		KALDI_VLOG(1) << "Input finished @ " << getMillisecondsSince(start_time) << " ms (audio length: " << (samp_counter / (request.Frequency() / 1000)) << " ms)";
237 | 		InputFinished();
238 | 
239 | 		std::vector<DecodedData> result;
240 | 
241 | 		int32 decoded = Decode(true, request.BestCount(), &result);
242 | 
243 | 		if (decoded == 0) {
244 | 			response.SetError("Best-path failed");
245 | 			KALDI_WARN << "Best-path failed";
246 | 		} else {
247 | 			std::vector<RecognitionResult> recognitionResults;
248 | 			GetRecognitionResult(result, &recognitionResults);
249 | 			response.SetResult(recognitionResults, requestInterrupted, (samp_counter / (request.Frequency() / 1000)));
250 | 			KALDI_VLOG(1) << "Recognized @ " << getMillisecondsSince(start_time) << " ms";
251 | 		}
252 | 
253 | 		CleanUp();
254 | 
255 | 		KALDI_VLOG(1) << "Decode subroutine done";
256 | 	} catch (std::runtime_error &e) {
257 | 		response.SetError(e.what());
258 | 	}
259 | };
260 | 
261 | int32 OnlineDecoder::DecodeIntermediate(int bestCount, std::vector<DecodedData> *result) {
262 | 	return Decode(false, bestCount, result);
263 | }
264 | 
265 | int32 OnlineDecoder::Decode(bool end_of_utterance, int bestCount, std::vector<DecodedData> *result) {
266 | 	kaldi::CompactLattice clat;
267 | 	GetLattice(&clat, end_of_utterance);
268 | 
269 | 	if (clat.NumStates() == 0) {
270 | 		return 0;
271 | 	}
272 | 
273 | 	if (lm_scale_ != 0) {
274 | 		fst::ScaleLattice(fst::LatticeScale(lm_scale_, 1.0), &clat);
275 | 	}
276 | 
277 | 	int32 resultsNumber = 0;
278 | 
279 | 	if (bestCount > 1) {
280 | 		kaldi::Lattice _lat;
281 | 		fst::ConvertLattice(clat, &_lat);
282 | 		kaldi::Lattice nbest_lat;
283 | 		fst::ShortestPath(_lat, &nbest_lat, bestCount);
284 | 		std::vector<kaldi::Lattice> nbest_lats;
285 | 		fst::ConvertNbestToVector(nbest_lat, &nbest_lats);
286 | 		if (!nbest_lats.empty()) {
287 | 		  resultsNumber = static_cast<int32>(nbest_lats.size());
288 | 		  for (int32 k = 0; k < resultsNumber; k++) {
289 | 			kaldi::Lattice &nbest_lat = nbest_lats[k];
290 | 
291 | 			DecodedData decodeData;
292 | 			GetLinearSymbolSequence(nbest_lat, &(decodeData.alignment), &(decodeData.words), &(decodeData.weight));
293 | 			getWeightMeasures(nbest_lat, &(decodeData.weights));
294 | 			result->push_back(decodeData);
295 | 		  }
296 | 		}
297 | 	} else {
298 | 		kaldi::CompactLattice best_path_clat;
299 | 		kaldi::CompactLatticeShortestPath(clat, &best_path_clat);
300 | 
301 | 		kaldi::Lattice best_path_lat;
302 | 		fst::ConvertLattice(best_path_clat, &best_path_lat);
303 | 		DecodedData decodeData;
304 | 		GetLinearSymbolSequence(best_path_lat, &(decodeData.alignment), &(decodeData.words), &(decodeData.weight));
305 | 		getWeightMeasures(best_path_lat, &(decodeData.weights));
306 | 		result->push_back(decodeData);
307 | 		resultsNumber = 1;
308 | 	}
309 | 
310 | 	return resultsNumber;
311 | }
312 | 
313 | } /* namespace apiai */
314 | 


--------------------------------------------------------------------------------
/src/OnlineDecoder.h:
--------------------------------------------------------------------------------
  1 | // OnlineDecoder.h
  2 | 
  3 | // Licensed under the Apache License, Version 2.0 (the "License");
  4 | // you may not use this file except in compliance with the License.
  5 | // You may obtain a copy of the License at
  6 | //
  7 | //  http://www.apache.org/licenses/LICENSE-2.0
  8 | //
  9 | // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 10 | // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
 11 | // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
 12 | // MERCHANTABLITY OR NON-INFRINGEMENT.
 13 | // See the Apache 2 License for the specific language governing permissions and
 14 | // limitations under the License.
 15 | 
 16 | #ifndef APIAI_DECODER_ONLINEDECODER_H_
 17 | #define APIAI_DECODER_ONLINEDECODER_H_
 18 | 
 19 | #include "Decoder.h"
 20 | #include "online2/online-feature-pipeline.h"
 21 | #include "online2/onlinebin-util.h"
 22 | #include "online2/online-timing.h"
 23 | #include "online2/online-endpoint.h"
 24 | #include "fstext/fstext-lib.h"
 25 | #include "lat/lattice-functions.h"
 26 | #include <list>
 27 | 
 28 | namespace apiai {
 29 | 
 30 | /**
 31 |  * Basic implementation of common code for all Kaldi online decoders
 32 |  */
 33 | class OnlineDecoder : public Decoder {
 34 | public:
 35 | 	OnlineDecoder();
 36 | 	virtual ~OnlineDecoder();
 37 | 
 38 | 	virtual void RegisterOptions(kaldi::OptionsItf &po);
 39 | 	virtual bool Initialize(kaldi::OptionsItf &po);
 40 | 	virtual void Decode(Request &request, Response &response);
 41 | protected:
 42 | 	struct DecodedData;
 43 | 
 44 | 	/**
 45 | 	 * Process next data chunk
 46 | 	 */
 47 | 	virtual bool AcceptWaveform(kaldi::BaseFloat sampling_rate,
 48 |                     const kaldi::VectorBase<kaldi::BaseFloat> &waveform,
 49 | 					const bool do_endpointing) = 0;
 50 | 
 51 | 	/**
 52 | 	 * Preparare to decoding
 53 | 	 */
 54 | 	virtual void InputStarted() = 0;
 55 | 	/**
 56 | 	 * Decoding finished, gets ready to get results
 57 | 	 */
 58 | 	virtual void InputFinished() = 0;
 59 | 	/**
 60 | 	 * Put result lattice
 61 | 	 */
 62 | 	virtual void GetLattice(kaldi::CompactLattice *clat, bool end_of_utterance) = 0;
 63 | 	/**
 64 | 	 * Clean all data
 65 | 	 */
 66 | 	virtual void CleanUp() = 0;
 67 | 	/**
 68 | 	 * Calculate intermediate results
 69 | 	 */
 70 | 	virtual kaldi::int32 DecodeIntermediate(int bestCount, std::vector<DecodedData> *result);
 71 | 
 72 | 	std::string word_syms_rxfilename_;
 73 | 	kaldi::BaseFloat chunk_length_secs_;
 74 | 	kaldi::BaseFloat acoustic_scale_;
 75 | 	kaldi::BaseFloat lm_scale_;
 76 | 
 77 | 
 78 | 	/**
 79 | 	 * Max length of record in seconds to be recognised.
 80 | 	 * All records longer than given value will be truncated. Note: Non-positive value to deactivate.
 81 | 	 */
 82 | 	kaldi::BaseFloat max_record_size_seconds_;
 83 | 	/**
 84 | 	 * Max interval length in seconds of lattice recognised unchanged. Non-positive value to deactivate
 85 | 	 */
 86 | 	kaldi::BaseFloat max_lattice_unchanged_interval_seconds_;
 87 | 
 88 | 	/** Decoding process timeout given in seconds.
 89 | 	 * Timeout disabled if value is non-positive
 90 | 	 */
 91 | 	kaldi::BaseFloat decoding_timeout_seconds_;
 92 | 
 93 | 	bool do_endpointing_;
 94 | 
 95 | 	std::string fst_rxfilename_;
 96 | private:
 97 | 	fst::SymbolTable *word_syms_;
 98 | 
 99 | 	kaldi::int32 Decode(bool end_of_utterance, int bestCount, std::vector<DecodedData> *result);
100 | 
101 | 	void GetRecognitionResult(DecodedData &input, RecognitionResult *output);
102 | 	void GetRecognitionResult(std::vector<DecodedData> &input, std::vector<RecognitionResult> *output);
103 | };
104 | 
105 | } /* namespace apiai */
106 | 
107 | #endif /* APIAI_DECODER_ONLINEDECODER_H_ */
108 | 


--------------------------------------------------------------------------------
/src/QueryStringParser.cc:
--------------------------------------------------------------------------------
  1 | // QueryStringParser.cc
  2 | 
  3 | // Licensed under the Apache License, Version 2.0 (the "License");
  4 | // you may not use this file except in compliance with the License.
  5 | // You may obtain a copy of the License at
  6 | //
  7 | //  http://www.apache.org/licenses/LICENSE-2.0
  8 | //
  9 | // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 10 | // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
 11 | // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
 12 | // MERCHANTABLITY OR NON-INFRINGEMENT.
 13 | // See the Apache 2 License for the specific language governing permissions and
 14 | // limitations under the License.
 15 | 
 16 | #include "QueryStringParser.h"
 17 | 
 18 | namespace apiai {
 19 | 
 20 | enum ParseState {
 21 | 	NameRead, ValueRead, Done
 22 | };
 23 | 
 24 | QueryStringParser::QueryStringParser(const char *query) {
 25 | 	query_.assign(query);
 26 | 	Init();
 27 | }
 28 | 
 29 | QueryStringParser::QueryStringParser(const std::string &query) {
 30 | 	query_ = query;
 31 | 	Init();
 32 | }
 33 | 
 34 | QueryStringParser::~QueryStringParser() {
 35 | 
 36 | }
 37 | 
 38 | void QueryStringParser::Init() {
 39 | 
 40 | 
 41 | 	if (query_.size() == 0 || query_ == "?") {
 42 | 		has_next_ = false;
 43 | 		return;
 44 | 	}
 45 | 
 46 | 	std::string::iterator index = query_.begin();
 47 | 
 48 | 	if ((*index) == '?') {
 49 | 		++index;
 50 | 	}
 51 | 
 52 | 	has_next_ = SeekNext(index);
 53 | }
 54 | 
 55 | bool QueryStringParser::SeekNext(std::string::iterator &from) {
 56 | 	std::string::iterator index = from;
 57 | 
 58 | 	ParseState state = NameRead;
 59 | 	name_begin_ = index;
 60 | 	value_begin_ = value_end_ = query_.end();
 61 | 	for (;(state != Done) && (index < query_.end()); index++) {
 62 | 		switch (*index) {
 63 | 		case '=':
 64 | 			switch (state) {
 65 | 			case NameRead:
 66 | 				state = ValueRead;
 67 | 				name_end_ = index;
 68 | 				value_begin_ = value_end_ = index + 1;
 69 | 				break;
 70 | 			case ValueRead:
 71 | 				// Do nothing
 72 | 				break;
 73 | 			case Done:
 74 | 				// Do nothing
 75 | 				break;
 76 | 			}
 77 | 
 78 | 			break;
 79 | 		case '&':
 80 | 			switch (state) {
 81 | 			case NameRead:
 82 | 				name_end_ = index;
 83 | 				break;
 84 | 			case ValueRead:
 85 | 				value_end_ = index;
 86 | 				break;
 87 | 			case Done:
 88 | 				// Do nothing
 89 | 				break;
 90 | 			}
 91 | 			state = Done;
 92 | 			break;
 93 | 		default:
 94 | 			break;
 95 | 		};
 96 | 	}
 97 | 
 98 | 	switch (state) {
 99 | 	case NameRead:
100 | 		name_end_ = index;;
101 | 		break;
102 | 	case ValueRead:
103 | 		value_end_ = index;
104 | 		break;
105 | 	case Done:
106 | 		// Do nothing
107 | 		break;
108 | 	}
109 | 
110 | 	bool result = index != from;
111 | 
112 | 	return result;
113 | }
114 | 
115 | bool QueryStringParser::Next(std::string *name, std::string *value) {
116 | 	if (!has_next_) {
117 | 		return false;
118 | 	}
119 | 
120 | 	name->assign(name_begin_, name_end_);
121 | 	value->assign(value_begin_, value_end_);
122 | 
123 | 	std::string::iterator index = value_end_;
124 | 	++index;
125 | 	has_next_ = SeekNext(index);
126 | 	return true;
127 | }
128 | 
129 | } /* namespace apiai */
130 | 


--------------------------------------------------------------------------------
/src/QueryStringParser.h:
--------------------------------------------------------------------------------
 1 | // QueryStringParser.h
 2 | 
 3 | // Licensed under the Apache License, Version 2.0 (the "License");
 4 | // you may not use this file except in compliance with the License.
 5 | // You may obtain a copy of the License at
 6 | //
 7 | //  http://www.apache.org/licenses/LICENSE-2.0
 8 | //
 9 | // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
10 | // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
11 | // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
12 | // MERCHANTABLITY OR NON-INFRINGEMENT.
13 | // See the Apache 2 License for the specific language governing permissions and
14 | // limitations under the License.
15 | 
16 | #ifndef SRC_QUERYSTRINGPARSER_H_
17 | #define SRC_QUERYSTRINGPARSER_H_
18 | 
19 | #include <string>
20 | 
21 | namespace apiai {
22 | 
23 | /**
24 |  * Query string parser.
25 |  * Iterates through name-value pairs of standard URI query string
26 |  */
27 | class QueryStringParser {
28 | public:
29 | 	/** Initialize parse with given query string */
30 | 	QueryStringParser(const char *query);
31 | 	/** Initialize parse with given query string */
32 | 	QueryStringParser(const std::string &query);
33 | 	virtual ~QueryStringParser();
34 | 
35 | 	/** Returns true if there is more unhandled name-value pairs */
36 | 	bool HasNext() const { return has_next_; }
37 | 	/**
38 | 	 * Get next name-value pair.
39 | 	 * Returns false if there is no more pairs
40 | 	 */
41 | 	bool Next(std::string *name, std::string *value);
42 | private:
43 | 	void Init();
44 | 	bool SeekNext(std::string::iterator &from);
45 | 
46 | 	std::string query_;
47 | 	bool has_next_;
48 | 
49 | 	std::string::iterator name_begin_;
50 | 	std::string::iterator name_end_;
51 | 	std::string::iterator value_begin_;
52 | 	std::string::iterator value_end_;
53 | };
54 | 
55 | } /* namespace apiai */
56 | 
57 | #endif /* SRC_QUERYSTRINGPARSER_H_ */
58 | 


--------------------------------------------------------------------------------
/src/QueryStringParserTests.cc:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * QueryStringParser.cpp
  3 |  *
  4 |  *  Created on: Apr 6, 2016
  5 |  *      Author: folomeev
  6 |  */
  7 | 
  8 | #include "QueryStringParser.h"
  9 | #include "base/kaldi-error.h"
 10 | 
 11 | namespace apiai {
 12 | 
 13 | 	void TestEmptyString() {
 14 | 		std::string name, value;
 15 | 		QueryStringParser parser("");
 16 | 
 17 | 		KALDI_ASSERT(!parser.HasNext());
 18 | 		KALDI_ASSERT(!parser.Next(&name, &value));
 19 | 	}
 20 | 
 21 | 	void TestQuestionMarkString() {
 22 | 		std::string name, value;
 23 | 		QueryStringParser parser("?");
 24 | 
 25 | 		KALDI_ASSERT(!parser.HasNext());
 26 | 		KALDI_ASSERT(!parser.Next(&name, &value));
 27 | 	}
 28 | 
 29 | 	void TestSingleNameValue() {
 30 | 		std::string name, value;
 31 | 		QueryStringParser parser("?name=value");
 32 | 
 33 | 		KALDI_ASSERT(parser.HasNext());
 34 | 		KALDI_ASSERT(parser.Next(&name, &value));
 35 | 		KALDI_ASSERT(name == "name");
 36 | 		KALDI_ASSERT(value == "value");
 37 | 		KALDI_ASSERT(!parser.HasNext());
 38 | 		KALDI_ASSERT(!parser.Next(&name, &value));
 39 | 	}
 40 | 
 41 | 	void TestSingleNameNoValue() {
 42 | 		std::string name, value;
 43 | 		QueryStringParser parser("?name=");
 44 | 
 45 | 		KALDI_ASSERT(parser.HasNext());
 46 | 		KALDI_ASSERT(parser.Next(&name, &value));
 47 | 		KALDI_ASSERT(name == "name");
 48 | 		KALDI_ASSERT(value == "");
 49 | 		KALDI_ASSERT(!parser.HasNext());
 50 | 		KALDI_ASSERT(!parser.Next(&name, &value));
 51 | 	}
 52 | 
 53 | 	void TestSingleNoNameValue() {
 54 | 		std::string name, value;
 55 | 		QueryStringParser parser("?=value");
 56 | 
 57 | 		KALDI_ASSERT(parser.HasNext());
 58 | 		KALDI_ASSERT(parser.Next(&name, &value));
 59 | 		KALDI_ASSERT(name == "");
 60 | 		KALDI_ASSERT(value == "value");
 61 | 		KALDI_ASSERT(!parser.HasNext());
 62 | 		KALDI_ASSERT(!parser.Next(&name, &value));
 63 | 	}
 64 | 
 65 | 	void TestEndsWithAmpersand() {
 66 | 		std::string name, value;
 67 | 		QueryStringParser parser("?&");
 68 | 
 69 | 		KALDI_ASSERT(parser.HasNext());
 70 | 		KALDI_ASSERT(parser.Next(&name, &value));
 71 | 		KALDI_ASSERT(name == "");
 72 | 		KALDI_ASSERT(value == "");
 73 | 		KALDI_ASSERT(!parser.HasNext());
 74 | 		KALDI_ASSERT(!parser.Next(&name, &value));
 75 | 	}
 76 | 
 77 | 	void TestEquatationInValue() {
 78 | 		std::string name, value;
 79 | 		QueryStringParser parser("?name=v=u");
 80 | 
 81 | 		KALDI_ASSERT(parser.HasNext());
 82 | 		KALDI_ASSERT(parser.Next(&name, &value));
 83 | 		KALDI_ASSERT(name == "name");
 84 | 		KALDI_ASSERT(value == "v=u");
 85 | 		KALDI_ASSERT(!parser.HasNext());
 86 | 		KALDI_ASSERT(!parser.Next(&name, &value));
 87 | 	}
 88 | 
 89 | 
 90 | 	void TestSingleNoNameNoValue() {
 91 | 		std::string name, value;
 92 | 		QueryStringParser parser("?=");
 93 | 
 94 | 		KALDI_ASSERT(parser.HasNext());
 95 | 		KALDI_ASSERT(parser.Next(&name, &value));
 96 | 		KALDI_ASSERT(name == "");
 97 | 		KALDI_ASSERT(value == "");
 98 | 		KALDI_ASSERT(!parser.HasNext());
 99 | 		KALDI_ASSERT(!parser.Next(&name, &value));
100 | 	}
101 | 
102 | 	void TestTwoNameValuePairs() {
103 | 		std::string name, value;
104 | 		QueryStringParser parser("?name1=value1&name2=value2");
105 | 
106 | 		KALDI_ASSERT(parser.HasNext());
107 | 		KALDI_ASSERT(parser.Next(&name, &value));
108 | 		KALDI_ASSERT(name == "name1");
109 | 		KALDI_ASSERT(value == "value1");
110 | 		KALDI_ASSERT(parser.HasNext());
111 | 		KALDI_ASSERT(parser.Next(&name, &value));
112 | 		KALDI_ASSERT(name == "name2");
113 | 		KALDI_ASSERT(value == "value2");
114 | 		KALDI_ASSERT(!parser.HasNext());
115 | 		KALDI_ASSERT(!parser.Next(&name, &value));
116 | 	}
117 | 
118 | 	void TestEmptyNameAfterEquality() {
119 | 		std::string name, value;
120 | 		QueryStringParser parser("?name1=value1&=value2");
121 | 
122 | 		KALDI_ASSERT(parser.HasNext());
123 | 		KALDI_ASSERT(parser.Next(&name, &value));
124 | 		KALDI_ASSERT(name == "name1");
125 | 		KALDI_ASSERT(value == "value1");
126 | 		KALDI_ASSERT(parser.HasNext());
127 | 		KALDI_ASSERT(parser.Next(&name, &value));
128 | 		KALDI_ASSERT(name == "");
129 | 		KALDI_ASSERT(value == "value2");
130 | 		KALDI_ASSERT(!parser.HasNext());
131 | 		KALDI_ASSERT(!parser.Next(&name, &value));
132 | 	}
133 | 
134 | 
135 | } /* namespace apiai */
136 | 
137 | 
138 | 
139 | int main(int argn, char *argv[]) {
140 | 	using namespace apiai;
141 | 
142 | 	TestEmptyString();
143 | 	TestQuestionMarkString();
144 | 	TestEndsWithAmpersand();
145 | 	TestEquatationInValue();
146 | 	TestSingleNameValue();
147 | 	TestSingleNameNoValue();
148 | 	TestSingleNoNameValue();
149 | 	TestSingleNoNameNoValue();
150 | 	TestTwoNameValuePairs();
151 | 	TestEmptyNameAfterEquality();
152 | 	return 0;
153 | }
154 | 
155 | 


--------------------------------------------------------------------------------
/src/Request.h:
--------------------------------------------------------------------------------
 1 | // Request.h
 2 | 
 3 | // Licensed under the Apache License, Version 2.0 (the "License");
 4 | // you may not use this file except in compliance with the License.
 5 | // You may obtain a copy of the License at
 6 | //
 7 | //  http://www.apache.org/licenses/LICENSE-2.0
 8 | //
 9 | // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
10 | // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
11 | // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
12 | // MERCHANTABLITY OR NON-INFRINGEMENT.
13 | // See the Apache 2 License for the specific language governing permissions and
14 | // limitations under the License.
15 | 
16 | #ifndef SRC_REQUEST_H_
17 | #define SRC_REQUEST_H_
18 | 
19 | #include "base/kaldi-types.h"
20 | #include "matrix/kaldi-vector.h"
21 | 
22 | namespace apiai {
23 | 
24 | /**
25 |  * Request data holding interface
26 |  */
27 | class Request {
28 | public:
29 | 	virtual ~Request() {};
30 | 
31 | 	/** Get number of samples per second of audio data */
32 | 	virtual kaldi::int32 Frequency(void) const = 0;
33 | 
34 | 	/** Get max number of expected result variants */
35 | 	virtual kaldi::int32 BestCount(void) const = 0;
36 | 	/** Get milliseconds interval between intermediate results.
37 | 	 *  If non-positive given then no intermediate results would be calculated */
38 | 	virtual kaldi::int32 IntermediateIntervalMillisec(void) const = 0;
39 | 
40 | 	/** Get end-of-speech points detection flag. */
41 | 	virtual bool DoEndpointing(void) const = 0;
42 | 
43 | 	/**
44 | 	 * Get next chunk of audio data samples.
45 | 	 * Max number of samples specified by samples_count value
46 | 	 */
47 | 	virtual kaldi::SubVector<kaldi::BaseFloat> *NextChunk(kaldi::int32 samples_count) = 0;
48 | 	/**
49 | 	 * Get next chunk of audio data samples.
50 | 	 * Max number of samples specified by samples_count value.
51 | 	 * Read timeout specified by timeout_ms.
52 | 	 */
53 | 	virtual kaldi::SubVector<kaldi::BaseFloat> *NextChunk(kaldi::int32 samples_count, kaldi::int32 timeout_ms) = 0;
54 | };
55 | 
56 | } /* namespace apiai */
57 | 
58 | #endif /* SRC_REQUEST_H_ */
59 | 


--------------------------------------------------------------------------------
/src/RequestRawReader.cc:
--------------------------------------------------------------------------------
 1 | // RequestRawReader.cc
 2 | 
 3 | // Licensed under the Apache License, Version 2.0 (the "License");
 4 | // you may not use this file except in compliance with the License.
 5 | // You may obtain a copy of the License at
 6 | //
 7 | //  http://www.apache.org/licenses/LICENSE-2.0
 8 | //
 9 | // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
10 | // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
11 | // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
12 | // MERCHANTABLITY OR NON-INFRINGEMENT.
13 | // See the Apache 2 License for the specific language governing permissions and
14 | // limitations under the License.
15 | 
16 | #include "RequestRawReader.h"
17 | 
18 | #include "Timing.h"
19 | #include <algorithm>
20 | 
21 | namespace apiai {
22 | 
23 | const milliseconds_t data_wait_interval_ms = 500;
24 | 
25 | kaldi::SubVector<kaldi::BaseFloat> *RequestRawReader::NextChunk(kaldi::int32 samples_count)
26 | {
27 | 	return NextChunk(samples_count, 0);
28 | }
29 | 
30 | kaldi::SubVector<kaldi::BaseFloat> *RequestRawReader::NextChunk(kaldi::int32 samples_count, kaldi::int32 timeout_ms) {
31 | 	// TODO: timeout_ms is not supported because libfcgi do not provides "readsome" functionality
32 | 	if (samples_count <= 0) {
33 | 		return NULL;
34 | 	}
35 | 
36 | 	if (fail_) {
37 | 		return NULL;
38 | 	}
39 | 
40 | 	int frame_size = bytes_per_sample_ * channels_;
41 | 	kaldi::int32 chunk_size = samples_count * frame_size;
42 | 
43 | 	int offset = channel_index_ * bytes_per_sample_;
44 | 
45 | 	std::vector<char> audioData(chunk_size);
46 | 
47 | 	int bytes_read = 0;
48 | 
49 | 	is_->read(audioData.data(), chunk_size);
50 | 
51 | 	bytes_read = is_->gcount();
52 | 
53 | 	if (is_->gcount() == 0) {
54 | 		fail_ = true;
55 | 		last_error_message_ == "Failed to read any data";
56 | 		return NULL;
57 | 	}
58 | 
59 | 	buffer_.clear();
60 | 	for (int index = 0; index < bytes_read; index += frame_size) {
61 | 		kaldi::int16 value = *reinterpret_cast<kaldi::int16*>(audioData.data() + index + offset);
62 | 		kaldi::BaseFloat fvalue = kaldi::BaseFloat(value);
63 | 		buffer_.push_back(fvalue);
64 | 	}
65 | 
66 | 	if (current_chunk_ && (current_chunk_->Dim() != buffer_.size())) {
67 | 		delete current_chunk_;
68 | 		current_chunk_ = NULL;
69 | 	}
70 | 
71 | 	if (!current_chunk_) {
72 | 		current_chunk_ = new kaldi::SubVector<kaldi::BaseFloat>(buffer_.data(), buffer_.size());
73 | 	} else {
74 | 		KALDI_ASSERT(buffer_.size() == current_chunk_->Dim());
75 | 		std::copy(buffer_.begin(), buffer_.end(), current_chunk_->Data());
76 | 	}
77 | 
78 | 	return current_chunk_;
79 | }
80 | 
81 | } /* namespace apiai */
82 | 


--------------------------------------------------------------------------------
/src/RequestRawReader.h:
--------------------------------------------------------------------------------
 1 | // RequestRawReader.h
 2 | 
 3 | // Licensed under the Apache License, Version 2.0 (the "License");
 4 | // you may not use this file except in compliance with the License.
 5 | // You may obtain a copy of the License at
 6 | //
 7 | //  http://www.apache.org/licenses/LICENSE-2.0
 8 | //
 9 | // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
10 | // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
11 | // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
12 | // MERCHANTABLITY OR NON-INFRINGEMENT.
13 | // See the Apache 2 License for the specific language governing permissions and
14 | // limitations under the License.
15 | 
16 | #ifndef APIAI_DECODER_STIREQUESTREADER_H_
17 | #define APIAI_DECODER_STIREQUESTREADER_H_
18 | 
19 | #include "Request.h"
20 | #include <stdio.h>
21 | #include <istream>
22 | 
23 | #define NBEST_MIN 1
24 | #define NBEST_MAX 10
25 | 
26 | #define INTERMEDIATE_MIN 500
27 | 
28 | namespace apiai {
29 | 
30 | /**
31 |  * Provides access to PCM data from input stream.
32 |  * Assumed that PCM is signed mono, 16 bits, 16 KHz
33 |  */
34 | class RequestRawReader : public Request {
35 | public:
36 | 	RequestRawReader(std::istream *is)
37 | 	{
38 | 		fail_ = false;
39 | 		current_chunk_ = NULL;
40 | 
41 | 		is_ = is;
42 | 		frequency_ = 16000;
43 | 		bytes_per_sample_ = 16 / 8;
44 | 		channels_ = 1;
45 | 		channel_index_ = 0;
46 | 
47 | 		bestCount_ = 1;
48 | 		intermediateMillisecondsInterval_ = 0;
49 | 		doEndpointing_ = false;
50 | 	}
51 | 
52 | 	virtual ~RequestRawReader() {
53 | 		delete current_chunk_;
54 | 	}
55 | 
56 | 	virtual kaldi::int32 Frequency(void) const { return frequency_; }
57 | 
58 | 	/** Get errors flag */
59 | 	bool HasErrors(void) { return fail_ || is_->fail(); }
60 | 	/** Get last error message */
61 | 	const std::string &LastErrorMessage(void) const { return last_error_message_; }
62 | 
63 | 	virtual kaldi::int32 BestCount(void) const { return bestCount_; }
64 | 	virtual kaldi::int32 IntermediateIntervalMillisec(void) const { return intermediateMillisecondsInterval_; }
65 | 	virtual bool DoEndpointing(void) const { return doEndpointing_; }
66 | 
67 | 	/** Set number of suggested recognition result variants */
68 | 	void BestCount(kaldi::int32 value) { bestCount_ = std::max(NBEST_MIN, std::min(NBEST_MAX, value)); }
69 | 	/** Set intermediate results interval in milliseconds */
70 | 	void IntermediateIntervalMillisec(kaldi::int32 value) {
71 | 		intermediateMillisecondsInterval_ = value > 0 ? std::max(value, INTERMEDIATE_MIN) : 0;
72 | 	}
73 | 	/** Set end-of-speech points detection flag. */
74 | 	void DoEndpointing(bool value) { doEndpointing_ = value; }
75 | 
76 | 	virtual kaldi::SubVector<kaldi::BaseFloat> *NextChunk(kaldi::int32 samples_count);
77 | 	virtual kaldi::SubVector<kaldi::BaseFloat> *NextChunk(kaldi::int32 samples_count, kaldi::int32 timeout_ms);
78 | private:
79 | 	bool fail_;
80 | 	kaldi::int32 frequency_;
81 | 	kaldi::int32 bytes_per_sample_;
82 | 	kaldi::int32 channels_;
83 | 	kaldi::int32 channel_index_;
84 | 
85 | 	kaldi::int32 bestCount_;
86 | 	kaldi::int32 intermediateMillisecondsInterval_;
87 | 	bool doEndpointing_;
88 | 
89 | 	std::istream *is_;
90 | 	std::vector<kaldi::BaseFloat> buffer_;
91 | 	std::string last_error_message_;
92 | 	kaldi::SubVector<kaldi::BaseFloat> *current_chunk_;
93 | };
94 | 
95 | } /* namespace apiai */
96 | 
97 | #endif /* APIAI_DECODER_STIREQUESTREADER_H_ */
98 | 


--------------------------------------------------------------------------------
/src/Response.cc:
--------------------------------------------------------------------------------
 1 | // Response.h
 2 | 
 3 | // Licensed under the Apache License, Version 2.0 (the "License");
 4 | // you may not use this file except in compliance with the License.
 5 | // You may obtain a copy of the License at
 6 | //
 7 | //  http://www.apache.org/licenses/LICENSE-2.0
 8 | //
 9 | // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
10 | // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
11 | // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
12 | // MERCHANTABLITY OR NON-INFRINGEMENT.
13 | // See the Apache 2 License for the specific language governing permissions and
14 | // limitations under the License.
15 | 
16 | #include "Response.h"
17 | 
18 | namespace apiai {
19 | 
20 | const std::string Response::NOT_INTERRUPTED = "";
21 | const std::string Response::INTERRUPTED_UNEXPECTED="unexpected";
22 | const std::string Response::INTERRUPTED_END_OF_SPEECH="endofspeech";
23 | const std::string Response::INTERRUPTED_DATA_SIZE_LIMIT="sizelimit";
24 | const std::string Response::INTERRUPTED_TIMEOUT="timeout";
25 | 
26 | } /* namespace apiai */
27 | 
28 | 


--------------------------------------------------------------------------------
/src/Response.h:
--------------------------------------------------------------------------------
 1 | // Response.h
 2 | 
 3 | // Licensed under the Apache License, Version 2.0 (the "License");
 4 | // you may not use this file except in compliance with the License.
 5 | // You may obtain a copy of the License at
 6 | //
 7 | //  http://www.apache.org/licenses/LICENSE-2.0
 8 | //
 9 | // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
10 | // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
11 | // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
12 | // MERCHANTABLITY OR NON-INFRINGEMENT.
13 | // See the Apache 2 License for the specific language governing permissions and
14 | // limitations under the License.
15 | 
16 | #ifndef RESPONSE_H_
17 | #define RESPONSE_H_
18 | 
19 | #include <string>
20 | #include <vector>
21 | 
22 | namespace apiai {
23 | 
24 | /**
25 |  * Recognition results holder
26 |  */
27 | struct RecognitionResult {
28 | 	/**
29 | 	 * Confidence value given in percents
30 | 	 */
31 | 	float confidence;
32 | 	/**
33 | 	 * Recognition result text
34 | 	 */
35 | 	std::string text;
36 | };
37 | 
38 | /**
39 |  * Interface for recognition data collector
40 |  */
41 | class Response {
42 | public:
43 | 	virtual ~Response() {};
44 | 
45 | 	/** Get content type MIME string */
46 | 	virtual const std::string &GetContentType() = 0;
47 | 
48 | 	/** Set final results */
49 | 	virtual void SetResult(std::vector<RecognitionResult> &data, int timeMarkMs) = 0;
50 | 	/** Set final result.
51 | 	 * Value of interrupted flag is set to true if recognition process was interrupted before
52 | 	 * all given data been read.
53 | 	 */
54 | 	virtual void SetResult(std::vector<RecognitionResult> &data, const std::string &interrupted, int timeMarkMs) = 0;
55 | 	/** Set intermediate result */
56 | 	virtual void SetIntermediateResult(RecognitionResult &decodedData, int timeMarkMs) = 0;
57 | 	/** Set error value */
58 | 	virtual void SetError(const std::string &message) = 0;
59 | 
60 | 	static const std::string NOT_INTERRUPTED;
61 | 	static const std::string INTERRUPTED_UNEXPECTED;
62 | 	static const std::string INTERRUPTED_END_OF_SPEECH;
63 | 	static const std::string INTERRUPTED_DATA_SIZE_LIMIT;
64 | 	static const std::string INTERRUPTED_TIMEOUT;
65 | };
66 | 
67 | } /* namespace apiai */
68 | 
69 | #endif /* RESPONSE_H_ */
70 | 


--------------------------------------------------------------------------------
/src/ResponseJsonWriter.cc:
--------------------------------------------------------------------------------
 1 | // ResponseJsonWriter.cc
 2 | 
 3 | // Licensed under the Apache License, Version 2.0 (the "License");
 4 | // you may not use this file except in compliance with the License.
 5 | // You may obtain a copy of the License at
 6 | //
 7 | //  http://www.apache.org/licenses/LICENSE-2.0
 8 | //
 9 | // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
10 | // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
11 | // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
12 | // MERCHANTABLITY OR NON-INFRINGEMENT.
13 | // See the Apache 2 License for the specific language governing permissions and
14 | // limitations under the License.
15 | 
16 | #include "ResponseJsonWriter.h"
17 | 
18 | namespace apiai {
19 | 
20 | const std::string ResponseJsonWriter::MIME_APPLICATION_JAVA = "application/json";
21 | 
22 | void ResponseJsonWriter::SendJson(std::string json, bool final) {
23 | 	*out_ << json << std::endl;
24 | 	out_->flush();
25 | }
26 | 
27 | void ResponseJsonWriter::Write(std::ostringstream &outss, RecognitionResult &data) {
28 |   outss << "{"
29 |   	<< "\"confidence\":" << data.confidence << ","
30 |   	<< "\"text\":\"" << data.text << "\""
31 |   	<< "}";
32 | }
33 | 
34 | void ResponseJsonWriter::SetResult(std::vector<RecognitionResult> &data, int timeMarkMs) {
35 | 	SetResult(data, NOT_INTERRUPTED, timeMarkMs);
36 | }
37 | 
38 | void ResponseJsonWriter::SetResult(std::vector<RecognitionResult> &data, const std::string &interrupted, int timeMarkMs) {
39 | 
40 | 	std::ostringstream msg;
41 | 	msg << "{";
42 | 	msg << "\"status\":\"ok\"";
43 | 	msg << ",\"data\":[";
44 | 	for (int i = 0; i < data.size(); i++) {
45 | 		if (i) {
46 | 			msg << ",";
47 | 		}
48 | 		Write(msg, data.at(i));
49 | 	}
50 | 	msg << "]";
51 | 	if (interrupted.size() > 0) {
52 | 		msg << ",\"interrupted\":\"" << interrupted << "\"";
53 | 		if (timeMarkMs > 0) {
54 | 		    msg << ",\"time\":" << timeMarkMs;
55 | 		}
56 | 	}
57 | 	msg << "}";
58 | 	SendJson(msg.str(), true);
59 | }
60 | 
61 | void ResponseJsonWriter::SetIntermediateResult(RecognitionResult &decodedData, int timeMarkMs) {
62 | 	std::ostringstream msg;
63 | 	msg << "{";
64 | 	msg << "\"status\":\"intermediate\"";
65 | 	msg << ",\"data\":[";
66 | 	Write(msg, decodedData);
67 | 	msg << "]}";
68 | 	SendJson(msg.str(), false);
69 | }
70 | 
71 | void ResponseJsonWriter::SetError(const std::string &message) {
72 | 	std::ostringstream msg;
73 |     msg << "{";
74 |     msg << "\"status\":\"error\"";
75 |     msg << ",\"data\":[{\"text\":\""<< message << "\"}]";
76 |     msg << "}";
77 |     SendJson(msg.str(), true);
78 | }
79 | 
80 | } /* namespace apiai */
81 | 


--------------------------------------------------------------------------------
/src/ResponseJsonWriter.h:
--------------------------------------------------------------------------------
 1 | // ResponseJsonWriter.h
 2 | 
 3 | // Licensed under the Apache License, Version 2.0 (the "License");
 4 | // you may not use this file except in compliance with the License.
 5 | // You may obtain a copy of the License at
 6 | //
 7 | //  http://www.apache.org/licenses/LICENSE-2.0
 8 | //
 9 | // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
10 | // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
11 | // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
12 | // MERCHANTABLITY OR NON-INFRINGEMENT.
13 | // See the Apache 2 License for the specific language governing permissions and
14 | // limitations under the License.
15 | 
16 | #ifndef RESPONSEJSONWRITER_H_
17 | #define RESPONSEJSONWRITER_H_
18 | 
19 | #include "Response.h"
20 | #include <sstream>
21 | 
22 | namespace apiai {
23 | 
24 | /**
25 |  * Writes recognition data to output stream as JSON serialized objects
26 |  */
27 | class ResponseJsonWriter : public Response {
28 | public:
29 | 	ResponseJsonWriter(std::ostream *osb) : out_(osb) {}
30 | 	virtual ~ResponseJsonWriter() {};
31 | 
32 | 	virtual const std::string &GetContentType() { return MIME_APPLICATION_JAVA; }
33 | 
34 | 	virtual void SetResult(std::vector<RecognitionResult> &data, int timeMarkMs);
35 | 	virtual void SetResult(std::vector<RecognitionResult> &data, const std::string &interrupted, int timeMarkMs);
36 | 	virtual void SetIntermediateResult(RecognitionResult &decodedData, int timeMarkMs);
37 | 	virtual void SetError(const std::string &message);
38 | protected:
39 | 	std::ostream *out() { return out_; }
40 | 
41 | 	virtual void SendJson(std::string json, bool final);
42 | private:
43 | 	void Write(std::ostringstream &outss, RecognitionResult &data);
44 | 	std::ostream *out_;
45 | 
46 | 	static const std::string MIME_APPLICATION_JAVA;
47 | };
48 | 
49 | } /* namespace apiai */
50 | 
51 | #endif /* RESPONSEJSONWRITER_H_ */
52 | 


--------------------------------------------------------------------------------
/src/ResponseMultipartJsonWriter.cc:
--------------------------------------------------------------------------------
 1 | // ResponseJsonWriter.cc
 2 | 
 3 | // Licensed under the Apache License, Version 2.0 (the "License");
 4 | // you may not use this file except in compliance with the License.
 5 | // You may obtain a copy of the License at
 6 | //
 7 | //  http://www.apache.org/licenses/LICENSE-2.0
 8 | //
 9 | // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
10 | // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
11 | // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
12 | // MERCHANTABLITY OR NON-INFRINGEMENT.
13 | // See the Apache 2 License for the specific language governing permissions and
14 | // limitations under the License.
15 | 
16 | #include "ResponseMultipartJsonWriter.h"
17 | 
18 | namespace apiai {
19 | 
20 | const std::string ResponseMultipartJsonWriter::MIME_MULTIPART = "multipart/x-mixed-replace";
21 | 
22 | ResponseMultipartJsonWriter::~ResponseMultipartJsonWriter() {
23 | 
24 | }
25 | 
26 | ResponseMultipartJsonWriter::ResponseMultipartJsonWriter(std::ostream *osb)
27 | 	: ResponseJsonWriter(osb)
28 | {
29 | 	boundary_token_ = "ResponseBoundary";
30 | 	content_type_ = MIME_MULTIPART + ";boundary=" + boundary_token_;
31 | 	data_sent_ = false;
32 | }
33 | 
34 | void ResponseMultipartJsonWriter::SendJson(std::string json, bool final) {
35 | 	if (! data_sent_) {
36 | 		*out() << "\r\n--" << boundary_token_ << "\r\n";
37 | 		data_sent_ = true;
38 | 	}
39 | 
40 | 	*out() << "Content-Disposition: form-data; name=\""
41 | 		<< (final ? "result" : "partial")
42 | 		<< "\"\r\n"
43 | 		<< "Content-type: " << ResponseJsonWriter::GetContentType() << "\r\n"
44 | 		<< "\r\n";
45 | 
46 | 	ResponseJsonWriter::SendJson(json, final);
47 | 
48 | 	*out() << "\r\n"
49 | 		<< "--" << boundary_token_
50 | 		<< (final ? "--" : "")
51 | 		<< "\r\n";
52 | 
53 | 	out()->flush();
54 | }
55 | 
56 | } /* namespace apiai */
57 | 


--------------------------------------------------------------------------------
/src/ResponseMultipartJsonWriter.h:
--------------------------------------------------------------------------------
 1 | // ResponseJsonWriter.cc
 2 | 
 3 | // Licensed under the Apache License, Version 2.0 (the "License");
 4 | // you may not use this file except in compliance with the License.
 5 | // You may obtain a copy of the License at
 6 | //
 7 | //  http://www.apache.org/licenses/LICENSE-2.0
 8 | //
 9 | // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
10 | // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
11 | // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
12 | // MERCHANTABLITY OR NON-INFRINGEMENT.
13 | // See the Apache 2 License for the specific language governing permissions and
14 | // limitations under the License.
15 | 
16 | #ifndef RESPONSEMULTIPARTJSONWRITER_H_
17 | #define RESPONSEMULTIPARTJSONWRITER_H_
18 | 
19 | #include "ResponseJsonWriter.h"
20 | 
21 | namespace apiai {
22 | 
23 | class ResponseMultipartJsonWriter: public ResponseJsonWriter {
24 | public:
25 | 	ResponseMultipartJsonWriter(std::ostream *osb);
26 | 	virtual ~ResponseMultipartJsonWriter();
27 | 
28 | 	virtual const std::string &GetContentType() { return content_type_; }
29 | protected:
30 | 	virtual void SendJson(std::string json, bool final);
31 | private:
32 | 	std::string boundary_token_;
33 | 	std::string content_type_;
34 | 	bool data_sent_;
35 | 	static const std::string MIME_MULTIPART;
36 | };
37 | 
38 | } /* namespace apiai */
39 | 
40 | #endif /* RESPONSEMULTIPARTJSONWRITER_H_ */
41 | 


--------------------------------------------------------------------------------
/src/Timing.cc:
--------------------------------------------------------------------------------
 1 | // Timing.cc
 2 | 
 3 | // Licensed under the Apache License, Version 2.0 (the "License");
 4 | // you may not use this file except in compliance with the License.
 5 | // You may obtain a copy of the License at
 6 | //
 7 | //  http://www.apache.org/licenses/LICENSE-2.0
 8 | //
 9 | // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
10 | // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
11 | // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
12 | // MERCHANTABLITY OR NON-INFRINGEMENT.
13 | // See the Apache 2 License for the specific language governing permissions and
14 | // limitations under the License.
15 | 
16 | #include "Timing.h"
17 | 
18 | namespace apiai {
19 | 
20 | milliseconds_t getMilliseconds(struct timezone *tz) {
21 | 	timeval tv;
22 | 	if (!gettimeofday(&tv, tz)) {
23 | 		return tv.tv_sec * 1000 + tv.tv_usec / 1000;
24 | 	} else {
25 | 		return 0;
26 | 	}
27 | }
28 | 
29 | milliseconds_t getMillisecondsSince(milliseconds_t since, struct timezone *tz) {
30 | 	return getMilliseconds(tz) - since;
31 | }
32 | 
33 | milliseconds_t getMilliseconds() {
34 | 	return getMilliseconds(0);
35 | }
36 | 
37 | milliseconds_t getMillisecondsSince(milliseconds_t since) {
38 | 	return getMillisecondsSince(since, 0);
39 | }
40 | 
41 | 
42 | } /* namespace apiai */
43 | 


--------------------------------------------------------------------------------
/src/Timing.h:
--------------------------------------------------------------------------------
 1 | // Timing.h
 2 | 
 3 | // Licensed under the Apache License, Version 2.0 (the "License");
 4 | // you may not use this file except in compliance with the License.
 5 | // You may obtain a copy of the License at
 6 | //
 7 | //  http://www.apache.org/licenses/LICENSE-2.0
 8 | //
 9 | // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
10 | // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
11 | // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
12 | // MERCHANTABLITY OR NON-INFRINGEMENT.
13 | // See the Apache 2 License for the specific language governing permissions and
14 | // limitations under the License.
15 | 
16 | #ifndef TIMING_H_
17 | #define TIMING_H_
18 | 
19 | #include <sys/time.h>
20 | 
21 | namespace apiai {
22 | 
23 | typedef long int milliseconds_t;
24 | 
25 | /**
26 |  * Get current time of specified time zone in milliseconds
27 |  */
28 | milliseconds_t getMilliseconds(struct timezone *tz);
29 | 
30 | /**
31 |  * Get time difference between current time of specified time zone and the given time
32 |  */
33 | milliseconds_t getMillisecondsSince(milliseconds_t since, struct timezone *tz);
34 | 
35 | /**
36 |  * Get current time in milliseconds
37 |  */
38 | milliseconds_t getMilliseconds();
39 | 
40 | /**
41 |  * Get time difference between current time and the given
42 |  */
43 | milliseconds_t getMillisecondsSince(milliseconds_t since);
44 | 
45 | } /* namespace apiai */
46 | 
47 | 
48 | #endif /* TIMING_H_ */
49 | 


--------------------------------------------------------------------------------
/src/fcgi-nnet3-decoder.cc:
--------------------------------------------------------------------------------
 1 | // fcgi-decoder-nnet3.cc
 2 | 
 3 | // Licensed under the Apache License, Version 2.0 (the "License");
 4 | // you may not use this file except in compliance with the License.
 5 | // You may obtain a copy of the License at
 6 | //
 7 | //  http://www.apache.org/licenses/LICENSE-2.0
 8 | //
 9 | // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
10 | // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
11 | // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
12 | // MERCHANTABLITY OR NON-INFRINGEMENT.
13 | // See the Apache 2 License for the specific language governing permissions and
14 | // limitations under the License.
15 | 
16 | #include "Nnet3LatgenFasterDecoder.h"
17 | #include "FcgiDecodingApp.h"
18 | 
19 | using namespace apiai;
20 | 
21 | int main(int argc, char **argv) {
22 | 
23 | 	Nnet3LatgenFasterDecoder decoder;
24 | 	FcgiDecodingApp decodingApp(decoder);
25 | 
26 | 	return decodingApp.Run(argc, argv);
27 | }
28 | 


--------------------------------------------------------------------------------