├── .gitignore
├── LICENSE
├── Makefile
├── README.adoc
├── examples
    ├── eliza.py
    ├── espeakng_tts.py
    ├── live_recorder.py
    ├── live_vad.py
    ├── va_eliza.py
    ├── va_simple.py
    └── wav_decoder.py
├── images
    └── highlevel.png
├── nltools
    ├── __init__.py
    ├── asr.py
    ├── macro_engine.py
    ├── misc.py
    ├── phonetics.py
    ├── pulseplayer.py
    ├── pulserecorder.py
    ├── sequiturclient.py
    ├── threadpool.py
    ├── tokenizer.py
    ├── tts.py
    └── vad.py
├── run_tests.sh
├── setup.py
└── tests
    ├── test_asr.py
    ├── test_macro_engine.py
    ├── test_misc.py
    ├── test_phonetics.py
    ├── test_pulseplayer.py
    ├── test_pulserecorder.py
    ├── test_sequitur.py
    ├── test_tokenizer.py
    └── test_tts.py


/.gitignore:
--------------------------------------------------------------------------------
 1 | # use glob syntax.
 2 | syntax: glob
 3 | *.swp
 4 | *.swo
 5 | *.pyc
 6 | tmp
 7 | old
 8 | *.log
 9 | TODO
10 | 
11 | # Byte-compiled / optimized / DLL files
12 | __pycache__/
13 | *.py[cod]
14 | *$py.class
15 | 
16 | # C extensions
17 | *.so
18 | 
19 | models
20 | foo.wav
21 | .asciidoctor
22 | README.html
23 | README.md
24 | README.xml
25 | build
26 | dist
27 | py_nltools.egg-info
28 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 | 
  2 |                                  Apache License
  3 |                            Version 2.0, January 2004
  4 |                         http://www.apache.org/licenses/
  5 | 
  6 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  7 | 
  8 |    1. Definitions.
  9 | 
 10 |       "License" shall mean the terms and conditions for use, reproduction,
 11 |       and distribution as defined by Sections 1 through 9 of this document.
 12 | 
 13 |       "Licensor" shall mean the copyright owner or entity authorized by
 14 |       the copyright owner that is granting the License.
 15 | 
 16 |       "Legal Entity" shall mean the union of the acting entity and all
 17 |       other entities that control, are controlled by, or are under common
 18 |       control with that entity. For the purposes of this definition,
 19 |       "control" means (i) the power, direct or indirect, to cause the
 20 |       direction or management of such entity, whether by contract or
 21 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 22 |       outstanding shares, or (iii) beneficial ownership of such entity.
 23 | 
 24 |       "You" (or "Your") shall mean an individual or Legal Entity
 25 |       exercising permissions granted by this License.
 26 | 
 27 |       "Source" form shall mean the preferred form for making modifications,
 28 |       including but not limited to software source code, documentation
 29 |       source, and configuration files.
 30 | 
 31 |       "Object" form shall mean any form resulting from mechanical
 32 |       transformation or translation of a Source form, including but
 33 |       not limited to compiled object code, generated documentation,
 34 |       and conversions to other media types.
 35 | 
 36 |       "Work" shall mean the work of authorship, whether in Source or
 37 |       Object form, made available under the License, as indicated by a
 38 |       copyright notice that is included in or attached to the work
 39 |       (an example is provided in the Appendix below).
 40 | 
 41 |       "Derivative Works" shall mean any work, whether in Source or Object
 42 |       form, that is based on (or derived from) the Work and for which the
 43 |       editorial revisions, annotations, elaborations, or other modifications
 44 |       represent, as a whole, an original work of authorship. For the purposes
 45 |       of this License, Derivative Works shall not include works that remain
 46 |       separable from, or merely link (or bind by name) to the interfaces of,
 47 |       the Work and Derivative Works thereof.
 48 | 
 49 |       "Contribution" shall mean any work of authorship, including
 50 |       the original version of the Work and any modifications or additions
 51 |       to that Work or Derivative Works thereof, that is intentionally
 52 |       submitted to Licensor for inclusion in the Work by the copyright owner
 53 |       or by an individual or Legal Entity authorized to submit on behalf of
 54 |       the copyright owner. For the purposes of this definition, "submitted"
 55 |       means any form of electronic, verbal, or written communication sent
 56 |       to the Licensor or its representatives, including but not limited to
 57 |       communication on electronic mailing lists, source code control systems,
 58 |       and issue tracking systems that are managed by, or on behalf of, the
 59 |       Licensor for the purpose of discussing and improving the Work, but
 60 |       excluding communication that is conspicuously marked or otherwise
 61 |       designated in writing by the copyright owner as "Not a Contribution."
 62 | 
 63 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 64 |       on behalf of whom a Contribution has been received by Licensor and
 65 |       subsequently incorporated within the Work.
 66 | 
 67 |    2. Grant of Copyright License. Subject to the terms and conditions of
 68 |       this License, each Contributor hereby grants to You a perpetual,
 69 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 70 |       copyright license to reproduce, prepare Derivative Works of,
 71 |       publicly display, publicly perform, sublicense, and distribute the
 72 |       Work and such Derivative Works in Source or Object form.
 73 | 
 74 |    3. Grant of Patent License. Subject to the terms and conditions of
 75 |       this License, each Contributor hereby grants to You a perpetual,
 76 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 77 |       (except as stated in this section) patent license to make, have made,
 78 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 79 |       where such license applies only to those patent claims licensable
 80 |       by such Contributor that are necessarily infringed by their
 81 |       Contribution(s) alone or by combination of their Contribution(s)
 82 |       with the Work to which such Contribution(s) was submitted. If You
 83 |       institute patent litigation against any entity (including a
 84 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 85 |       or a Contribution incorporated within the Work constitutes direct
 86 |       or contributory patent infringement, then any patent licenses
 87 |       granted to You under this License for that Work shall terminate
 88 |       as of the date such litigation is filed.
 89 | 
 90 |    4. Redistribution. You may reproduce and distribute copies of the
 91 |       Work or Derivative Works thereof in any medium, with or without
 92 |       modifications, and in Source or Object form, provided that You
 93 |       meet the following conditions:
 94 | 
 95 |       (a) You must give any other recipients of the Work or
 96 |           Derivative Works a copy of this License; and
 97 | 
 98 |       (b) You must cause any modified files to carry prominent notices
 99 |           stating that You changed the files; and
100 | 
101 |       (c) You must retain, in the Source form of any Derivative Works
102 |           that You distribute, all copyright, patent, trademark, and
103 |           attribution notices from the Source form of the Work,
104 |           excluding those notices that do not pertain to any part of
105 |           the Derivative Works; and
106 | 
107 |       (d) If the Work includes a "NOTICE" text file as part of its
108 |           distribution, then any Derivative Works that You distribute must
109 |           include a readable copy of the attribution notices contained
110 |           within such NOTICE file, excluding those notices that do not
111 |           pertain to any part of the Derivative Works, in at least one
112 |           of the following places: within a NOTICE text file distributed
113 |           as part of the Derivative Works; within the Source form or
114 |           documentation, if provided along with the Derivative Works; or,
115 |           within a display generated by the Derivative Works, if and
116 |           wherever such third-party notices normally appear. The contents
117 |           of the NOTICE file are for informational purposes only and
118 |           do not modify the License. You may add Your own attribution
119 |           notices within Derivative Works that You distribute, alongside
120 |           or as an addendum to the NOTICE text from the Work, provided
121 |           that such additional attribution notices cannot be construed
122 |           as modifying the License.
123 | 
124 |       You may add Your own copyright statement to Your modifications and
125 |       may provide additional or different license terms and conditions
126 |       for use, reproduction, or distribution of Your modifications, or
127 |       for any such Derivative Works as a whole, provided Your use,
128 |       reproduction, and distribution of the Work otherwise complies with
129 |       the conditions stated in this License.
130 | 
131 |    5. Submission of Contributions. Unless You explicitly state otherwise,
132 |       any Contribution intentionally submitted for inclusion in the Work
133 |       by You to the Licensor shall be under the terms and conditions of
134 |       this License, without any additional terms or conditions.
135 |       Notwithstanding the above, nothing herein shall supersede or modify
136 |       the terms of any separate license agreement you may have executed
137 |       with Licensor regarding such Contributions.
138 | 
139 |    6. Trademarks. This License does not grant permission to use the trade
140 |       names, trademarks, service marks, or product names of the Licensor,
141 |       except as required for reasonable and customary use in describing the
142 |       origin of the Work and reproducing the content of the NOTICE file.
143 | 
144 |    7. Disclaimer of Warranty. Unless required by applicable law or
145 |       agreed to in writing, Licensor provides the Work (and each
146 |       Contributor provides its Contributions) on an "AS IS" BASIS,
147 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
148 |       implied, including, without limitation, any warranties or conditions
149 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
150 |       PARTICULAR PURPOSE. You are solely responsible for determining the
151 |       appropriateness of using or redistributing the Work and assume any
152 |       risks associated with Your exercise of permissions under this License.
153 | 
154 |    8. Limitation of Liability. In no event and under no legal theory,
155 |       whether in tort (including negligence), contract, or otherwise,
156 |       unless required by applicable law (such as deliberate and grossly
157 |       negligent acts) or agreed to in writing, shall any Contributor be
158 |       liable to You for damages, including any direct, indirect, special,
159 |       incidental, or consequential damages of any character arising as a
160 |       result of this License or out of the use or inability to use the
161 |       Work (including but not limited to damages for loss of goodwill,
162 |       work stoppage, computer failure or malfunction, or any and all
163 |       other commercial damages or losses), even if such Contributor
164 |       has been advised of the possibility of such damages.
165 | 
166 |    9. Accepting Warranty or Additional Liability. While redistributing
167 |       the Work or Derivative Works thereof, You may choose to offer,
168 |       and charge a fee for, acceptance of support, warranty, indemnity,
169 |       or other liability obligations and/or rights consistent with this
170 |       License. However, in accepting such obligations, You may act only
171 |       on Your own behalf and on Your sole responsibility, not on behalf
172 |       of any other Contributor, and only if You agree to indemnify,
173 |       defend, and hold each Contributor harmless for any liability
174 |       incurred by, or claims asserted against, such Contributor by reason
175 |       of your accepting any such warranty or additional liability.
176 | 
177 |    END OF TERMS AND CONDITIONS
178 | 
179 |    APPENDIX: How to apply the Apache License to your work.
180 | 
181 |       To apply the Apache License to your work, attach the following
182 |       boilerplate notice, with the fields enclosed by brackets "[]"
183 |       replaced with your own identifying information. (Don't include
184 |       the brackets!)  The text should be enclosed in the appropriate
185 |       comment syntax for the file format. We also recommend that a
186 |       file or class name and description of purpose be included on the
187 |       same "printed page" as the copyright notice for easier
188 |       identification within third-party archives.
189 | 
190 |    Copyright [yyyy] [name of copyright owner]
191 | 
192 |    Licensed under the Apache License, Version 2.0 (the "License");
193 |    you may not use this file except in compliance with the License.
194 |    You may obtain a copy of the License at
195 | 
196 |        http://www.apache.org/licenses/LICENSE-2.0
197 | 
198 |    Unless required by applicable law or agreed to in writing, software
199 |    distributed under the License is distributed on an "AS IS" BASIS,
200 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
201 |    See the License for the specific language governing permissions and
202 |    limitations under the License.
203 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | all:	README.html README.md dist
 2 | 
 3 | SHELL := /bin/bash
 4 | 
 5 | %.html: %.adoc
 6 | 	asciidoctor -r asciidoctor-diagram -a toc $<
 7 | 
 8 | README.md: README.adoc
 9 | 	asciidoc -b docbook README.adoc
10 | 	iconv -t utf-8 README.xml | pandoc -f docbook -t markdown_strict | iconv -f utf-8 > README.md
11 | 
12 | tests:
13 | 	nosetests
14 | 
15 | dist:	README.md
16 | 	python setup.py sdist
17 | 	python setup.py bdist_wheel --universal
18 | 
19 | upload:
20 | 	twine upload dist/*
21 | 
22 | clean:
23 | 	rm -f *.html images/*.png 
24 | 	rm -rf dist build  py_nltools.egg-info  README.md  README.xml 
25 | 


--------------------------------------------------------------------------------
/README.adoc:
--------------------------------------------------------------------------------
 1 | py-nltools
 2 | ----------
 3 | 
 4 | A collection of abstraction layers and support functions that form the natural
 5 | language processing foundation of the Zamia AI project:
 6 |  
 7 | * `phonetics`: translation functions between various phonetic alphabets (IPA, X-SAMPA, X-ARPABET, ...)
 8 | * `tts`: abstraction layer towards using eSpeak NG, MaryTTS, SVOX Pico TTS or a remote TTS server and sequitur g2p
 9 | * `asr`: abstraction layer towards using kaldi-asr and pocketsphinx, models can be found here: http://goofy.zamia.org/voxforge/
10 | * `sequiturclient`: g2p using sequitur
11 | * `pulseplayer`: audio playback through pulseaudio
12 | * `pulserecorder`: audio recording through pulseaudio
13 | * `tokenizer`: english, french and german word tokenizers aimed at spoken language applications
14 | * `threadpool`: simple thread pool implementation
15 | * `vad`: Voice Activity Detection finite state machine based on webrtc VAD
16 | * `macro_engine`: Simple macro engine aimed at generating natural language expansions
17 | 
18 | I plan to add modules as I need them in the Zamia AI projects. Some modules like `phonetics` and `tokenizer`
19 | have some overlap with larger projects like NLTK or spaCy - my modules tend to be more hands-on and simple minded
20 | than these and therefore are in no way meant to replace them. 
21 | 
22 | ifndef::imagesdir[:imagesdir: images]
23 | 
24 | ifndef::env-github[]
25 | [ditaa,"highlevel"]
26 | ....
27 |  +-----------------------------------------------------------------------------------------------+
28 |  |                                           nltools                                             |
29 |  | +-----------+  +-----------+  +------------+  +--------------+                                |
30 |  | | tokenizer |  | phonetics |  | threadpool |  | macro_engine |                                |
31 |  | +-----------+  +-----------+  +------------+  +--------------+                                |
32 |  |                                                                                               |
33 |  |      +-----------+               +-----------+     +-----------+ +-----------+ +-----------+  |
34 |  |      |    tts    |               |    asr    |     |    vad    | |    g2p    | |   audio   |  |
35 |  |      +-----------+               +-----------+     +-----------+ +-----------+ +-----------+  |
36 |  |            |                           |                 |             |             |        |
37 |  +-----------------------------------------------------------------------------------------------+
38 |               |                           |                 |             |             |          
39 |      +--------+---------+          +------+----+            |             |             |
40 |      |        |         |          |           |            |             |             |
41 |      v        v         v          v           v            v             v             v
42 |  +------+ +--------+ +------+  +-------+ +-----------+ +--------+    +----------+ +------------+ 
43 |  | mary | | eSpeak | | pico |  | kaldi | | cmusphinx | | webrtc |    | sequitur | | pulseaudio |
44 |  +------+ +--------+ +------+  +-------+ +-----------+ +--------+    +----------+ +------------+
45 | ....
46 | endif::env-github[]
47 | ifdef::env-github[]
48 | image::highlevel.png[Highlevel Diagram]
49 | endif::env-github[]
50 | 
51 | Requirements
52 | ~~~~~~~~~~~~
53 | 
54 | *Note*: probably incomplete.
55 | 
56 | * Python 2.7 
57 | * for TTS one or more of:
58 |   - MaryTTS, py-marytts
59 |   - espeak-ng, py-espeak-ng
60 |   - SVOX Pico TTS, py-picotts
61 | * for ASR one or more of:
62 |   - kaldi-asr 5.1, py-kaldi-asr
63 |   - pocketsphinx
64 | * sequitur
65 | * pulseaudio
66 | * webrtc
67 | 
68 | License
69 | ~~~~~~~
70 | 
71 | My own code is Apache-2.0 licensed unless otherwise noted in the script's copyright
72 | headers.
73 | 
74 | Some scripts and files are based on works of others, in those cases it is my
75 | intention to keep the original license intact. Please make sure to check the
76 | copyright headers inside for more information.
77 | 
78 | Authors
79 | ~~~~~~~
80 | 
81 | Guenter Bartsch <guenter@zamia.org>
82 | Paul Guyot <pguyot@kallisys.net>
83 | 
84 | 


--------------------------------------------------------------------------------
/examples/eliza.py:
--------------------------------------------------------------------------------
  1 | #----------------------------------------------------------------------
  2 | #  eliza.py
  3 | #
  4 | #  a cheezy little Eliza knock-off by Joe Strout
  5 | #  with some updates by Jeff Epler
  6 | #  hacked into a module and updated by Jez Higgins
  7 | #----------------------------------------------------------------------
  8 | 
  9 | import string
 10 | import re
 11 | import random
 12 | 
 13 | class eliza:
 14 |   def __init__(self):
 15 |     self.keys = list(map(lambda x:re.compile(x[0], re.IGNORECASE),gPats))
 16 |     self.values = list(map(lambda x:x[1],gPats))
 17 | 
 18 |   #----------------------------------------------------------------------
 19 |   # translate: take a string, replace any words found in dict.keys()
 20 |   #  with the corresponding dict.values()
 21 |   #----------------------------------------------------------------------
 22 |   def translate(self,str,dict):
 23 |     words = str.lower().split()
 24 |     keys = dict.keys();
 25 |     for i in range(0,len(words)):
 26 |       if words[i] in keys:
 27 |         words[i] = dict[words[i]]
 28 |     return ' '.join(words)
 29 | 
 30 |   #----------------------------------------------------------------------
 31 |   #  respond: take a string, a set of regexps, and a corresponding
 32 |   #    set of response lists; find a match, and return a randomly
 33 |   #    chosen response from the corresponding list.
 34 |   #----------------------------------------------------------------------
 35 |   def respond(self,str):
 36 |     # find a match among keys
 37 |     for i in range(0, len(self.keys)):
 38 |       match = self.keys[i].match(str)
 39 |       if match:
 40 |         # found a match ... stuff with corresponding value
 41 |         # chosen randomly from among the available options
 42 |         resp = random.choice(self.values[i])
 43 |         # we've got a response... stuff in reflected text where indicated
 44 |         pos = resp.find('%')
 45 |         while pos > -1:
 46 |           num = int(resp[pos+1:pos+2])
 47 |           resp = resp[:pos] + \
 48 |             self.translate(match.group(num),gReflections) + \
 49 |             resp[pos+2:]
 50 |           pos = resp.find('%')
 51 |         # fix munged punctuation at the end
 52 |         if resp[-2:] == '?.': resp = resp[:-2] + '.'
 53 |         if resp[-2:] == '??': resp = resp[:-2] + '?'
 54 |         return resp
 55 | 
 56 | #----------------------------------------------------------------------
 57 | # gReflections, a translation table used to convert things you say
 58 | #    into things the computer says back, e.g. "I am" --> "you are"
 59 | #----------------------------------------------------------------------
 60 | gReflections = {
 61 |   "am"   : "are",
 62 |   "was"  : "were",
 63 |   "i"    : "you",
 64 |   "i'd"  : "you would",
 65 |   "i've"  : "you have",
 66 |   "i'll"  : "you will",
 67 |   "my"  : "your",
 68 |   "are"  : "am",
 69 |   "you've": "I have",
 70 |   "you'll": "I will",
 71 |   "your"  : "my",
 72 |   "yours"  : "mine",
 73 |   "you"  : "me",
 74 |   "me"  : "you"
 75 | }
 76 | 
 77 | #----------------------------------------------------------------------
 78 | # gPats, the main response table.  Each element of the list is a
 79 | #  two-element list; the first is a regexp, and the second is a
 80 | #  list of possible responses, with group-macros labelled as
 81 | #  %1, %2, etc.
 82 | #----------------------------------------------------------------------
 83 | gPats = [
 84 |   [r'I need (.*)',
 85 |   [  "Why do you need %1?",
 86 |     "Would it really help you to get %1?",
 87 |     "Are you sure you need %1?"]],
 88 | 
 89 |   [r'Why don\'?t you ([^\?]*)\??',
 90 |   [  "Do you really think I don't %1?",
 91 |     "Perhaps eventually I will %1.",
 92 |     "Do you really want me to %1?"]],
 93 | 
 94 |   [r'Why can\'?t I ([^\?]*)\??',
 95 |   [  "Do you think you should be able to %1?",
 96 |     "If you could %1, what would you do?",
 97 |     "I don't know -- why can't you %1?",
 98 |     "Have you really tried?"]],
 99 | 
100 |   [r'I can\'?t (.*)',
101 |   [  "How do you know you can't %1?",
102 |     "Perhaps you could %1 if you tried.",
103 |     "What would it take for you to %1?"]],
104 | 
105 |   [r'I am (.*)',
106 |   [  "Did you come to me because you are %1?",
107 |     "How long have you been %1?",
108 |     "How do you feel about being %1?"]],
109 | 
110 |   [r'I\'?m (.*)',
111 |   [  "How does being %1 make you feel?",
112 |     "Do you enjoy being %1?",
113 |     "Why do you tell me you're %1?",
114 |     "Why do you think you're %1?"]],
115 | 
116 |   [r'Are you ([^\?]*)\??',
117 |   [  "Why does it matter whether I am %1?",
118 |     "Would you prefer it if I were not %1?",
119 |     "Perhaps you believe I am %1.",
120 |     "I may be %1 -- what do you think?"]],
121 | 
122 |   [r'What (.*)',
123 |   [  "Why do you ask?",
124 |     "How would an answer to that help you?",
125 |     "What do you think?"]],
126 | 
127 |   [r'How (.*)',
128 |   [  "How do you suppose?",
129 |     "Perhaps you can answer your own question.",
130 |     "What is it you're really asking?"]],
131 | 
132 |   [r'Because (.*)',
133 |   [  "Is that the real reason?",
134 |     "What other reasons come to mind?",
135 |     "Does that reason apply to anything else?",
136 |     "If %1, what else must be true?"]],
137 | 
138 |   [r'(.*) sorry (.*)',
139 |   [  "There are many times when no apology is needed.",
140 |     "What feelings do you have when you apologize?"]],
141 | 
142 |   [r'Hello(.*)',
143 |   [  "Hello... I'm glad you could drop by today.",
144 |     "Hi there... how are you today?",
145 |     "Hello, how are you feeling today?"]],
146 | 
147 |   [r'I think (.*)',
148 |   [  "Do you doubt %1?",
149 |     "Do you really think so?",
150 |     "But you're not sure %1?"]],
151 | 
152 |   [r'(.*) friend (.*)',
153 |   [  "Tell me more about your friends.",
154 |     "When you think of a friend, what comes to mind?",
155 |     "Why don't you tell me about a childhood friend?"]],
156 | 
157 |   [r'Yes',
158 |   [  "You seem quite sure.",
159 |     "OK, but can you elaborate a bit?"]],
160 | 
161 |   [r'(.*) computer(.*)',
162 |   [  "Are you really talking about me?",
163 |     "Does it seem strange to talk to a computer?",
164 |     "How do computers make you feel?",
165 |     "Do you feel threatened by computers?"]],
166 | 
167 |   [r'Is it (.*)',
168 |   [  "Do you think it is %1?",
169 |     "Perhaps it's %1 -- what do you think?",
170 |     "If it were %1, what would you do?",
171 |     "It could well be that %1."]],
172 | 
173 |   [r'It is (.*)',
174 |   [  "You seem very certain.",
175 |     "If I told you that it probably isn't %1, what would you feel?"]],
176 | 
177 |   [r'Can you ([^\?]*)\??',
178 |   [  "What makes you think I can't %1?",
179 |     "If I could %1, then what?",
180 |     "Why do you ask if I can %1?"]],
181 | 
182 |   [r'Can I ([^\?]*)\??',
183 |   [  "Perhaps you don't want to %1.",
184 |     "Do you want to be able to %1?",
185 |     "If you could %1, would you?"]],
186 | 
187 |   [r'You are (.*)',
188 |   [  "Why do you think I am %1?",
189 |     "Does it please you to think that I'm %1?",
190 |     "Perhaps you would like me to be %1.",
191 |     "Perhaps you're really talking about yourself?"]],
192 | 
193 |   [r'You\'?re (.*)',
194 |   [  "Why do you say I am %1?",
195 |     "Why do you think I am %1?",
196 |     "Are we talking about you, or me?"]],
197 | 
198 |   [r'I don\'?t (.*)',
199 |   [  "Don't you really %1?",
200 |     "Why don't you %1?",
201 |     "Do you want to %1?"]],
202 | 
203 |   [r'I feel (.*)',
204 |   [  "Good, tell me more about these feelings.",
205 |     "Do you often feel %1?",
206 |     "When do you usually feel %1?",
207 |     "When you feel %1, what do you do?"]],
208 | 
209 |   [r'I have (.*)',
210 |   [  "Why do you tell me that you've %1?",
211 |     "Have you really %1?",
212 |     "Now that you have %1, what will you do next?"]],
213 | 
214 |   [r'I would (.*)',
215 |   [  "Could you explain why you would %1?",
216 |     "Why would you %1?",
217 |     "Who else knows that you would %1?"]],
218 | 
219 |   [r'Is there (.*)',
220 |   [  "Do you think there is %1?",
221 |     "It's likely that there is %1.",
222 |     "Would you like there to be %1?"]],
223 | 
224 |   [r'My (.*)',
225 |   [  "I see, your %1.",
226 |     "Why do you say that your %1?",
227 |     "When your %1, how do you feel?"]],
228 | 
229 |   [r'You (.*)',
230 |   [  "We should be discussing you, not me.",
231 |     "Why do you say that about me?",
232 |     "Why do you care whether I %1?"]],
233 | 
234 |   [r'Why (.*)',
235 |   [  "Why don't you tell me the reason why %1?",
236 |     "Why do you think %1?" ]],
237 | 
238 |   [r'I want (.*)',
239 |   [  "What would it mean to you if you got %1?",
240 |     "Why do you want %1?",
241 |     "What would you do if you got %1?",
242 |     "If you got %1, then what would you do?"]],
243 | 
244 |   [r'(.*) mother(.*)',
245 |   [  "Tell me more about your mother.",
246 |     "What was your relationship with your mother like?",
247 |     "How do you feel about your mother?",
248 |     "How does this relate to your feelings today?",
249 |     "Good family relations are important."]],
250 | 
251 |   [r'(.*) father(.*)',
252 |   [  "Tell me more about your father.",
253 |     "How did your father make you feel?",
254 |     "How do you feel about your father?",
255 |     "Does your relationship with your father relate to your feelings today?",
256 |     "Do you have trouble showing affection with your family?"]],
257 | 
258 |   [r'(.*) child(.*)',
259 |   [  "Did you have close friends as a child?",
260 |     "What is your favorite childhood memory?",
261 |     "Do you remember any dreams or nightmares from childhood?",
262 |     "Did the other children sometimes tease you?",
263 |     "How do you think your childhood experiences relate to your feelings today?"]],
264 | 
265 |   [r'(.*)\?',
266 |   [  "Why do you ask that?",
267 |     "Please consider whether you can answer your own question.",
268 |     "Perhaps the answer lies within yourself?",
269 |     "Why don't you tell me?"]],
270 | 
271 |   [r'quit',
272 |   [  "Thank you for talking with me.",
273 |     "Good-bye.",
274 |     "Thank you, that will be $150.  Have a good day!"]],
275 | 
276 |   [r'(.*)',
277 |   [  "Please tell me more.",
278 |     "Let's change focus a bit... Tell me about your family.",
279 |     "Can you elaborate on that?",
280 |     "Why do you say that %1?",
281 |     "I see.",
282 |     "Very interesting.",
283 |     "%1.",
284 |     "I see.  And what does that tell you?",
285 |     "How does that make you feel?",
286 |     "How do you feel when you say that?"]]
287 |   ]
288 | 
289 | #----------------------------------------------------------------------
290 | #  command_interface
291 | #----------------------------------------------------------------------
292 | def command_interface():
293 |   print('Therapist\n---------')
294 |   print('Talk to the program by typing in plain English, using normal upper-')
295 |   print('and lower-case letters and punctuation.  Enter "quit" when done.')
296 |   print('='*72)
297 |   print('Hello.  How are you feeling today?')
298 | 
299 |   s = ''
300 |   therapist = eliza();
301 |   while s != 'quit':
302 |     try:
303 |       s = input('> ')
304 |     except EOFError:
305 |       s = 'quit'
306 |     print(s)
307 |     while s[-1] in '!.':
308 |       s = s[:-1]
309 |     print(therapist.respond(s))
310 | 
311 | 
312 | if __name__ == "__main__":
313 |   command_interface()
314 | 


--------------------------------------------------------------------------------
/examples/espeakng_tts.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | from nltools.tts import TTS
3 | 
4 | tts = TTS(engine="espeak", voice="en")
5 | tts.say("hello from your pi")
6 | 


--------------------------------------------------------------------------------
/examples/live_recorder.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | 
  3 | import logging
  4 | import wave
  5 | import struct
  6 | import os
  7 | import sys
  8 | 
  9 | from datetime import date
 10 | from optparse import OptionParser
 11 | 
 12 | from nltools.asr           import ASR
 13 | from nltools.pulserecorder import PulseRecorder, MIX_MODE_BOTH, MIX_MODE_LEFT, MIX_MODE_RIGHT
 14 | from nltools.vad           import VAD
 15 | from nltools               import misc
 16 | 
 17 | DEFAULT_VOLUME   =   150
 18 | SAMPLE_RATE      = 16000
 19 | DEFAULT_MIX_MODE = 'both'
 20 | 
 21 | #
 22 | # init 
 23 | #
 24 | 
 25 | misc.init_app ('live_recorder')
 26 | 
 27 | #
 28 | # commandline parsing
 29 | #
 30 | 
 31 | parser = OptionParser("usage: %prog [options]")
 32 | 
 33 | parser.add_option ("-m", "--mix-mode", dest='mix_mode', type='str', default=DEFAULT_MIX_MODE,
 34 |                    help="mix mode (left, right, both - default: %s)" % DEFAULT_MIX_MODE)
 35 | 
 36 | parser.add_option ("-V", "--volume", dest='volume', type='int', default=DEFAULT_VOLUME,
 37 |                    help="volume, default: %d%%" % DEFAULT_VOLUME)
 38 | 
 39 | parser.add_option ("-v", "--verbose", action="store_true", dest="verbose",
 40 |                    help="enable verbose logging")
 41 | 
 42 | (options, args) = parser.parse_args()
 43 | 
 44 | if options.verbose:
 45 |     logging.basicConfig(level=logging.DEBUG)
 46 | else:
 47 |     logging.basicConfig(level=logging.INFO)
 48 | 
 49 | 
 50 | if options.mix_mode == 'left':
 51 |     mix_mode = MIX_MODE_LEFT
 52 | elif options.mix_mode == 'right':
 53 |     mix_mode = MIX_MODE_RIGHT
 54 | elif options.mix_mode == 'both':
 55 |     mix_mode = MIX_MODE_BOTH
 56 | else:
 57 |     parser.print_usage()
 58 |     sys.exit(1)
 59 | 
 60 | 
 61 | logging.info ("Initializing...")
 62 | 
 63 | rec = PulseRecorder (volume=options.volume)
 64 | vad = VAD()
 65 | 
 66 | rec.start_recording(mix_mode=mix_mode)
 67 | logging.info ("Please speak. (CTRL-C to exit)")
 68 | 
 69 | cnt = 0
 70 | wfs = None
 71 | 
 72 | while True:
 73 | 
 74 |     samples = rec.get_samples()
 75 | 
 76 |     audio, finalize = vad.process_audio(samples)
 77 | 
 78 |     if not audio:
 79 |         continue
 80 | 
 81 |     logging.debug ("%8d got audio. finalize: %s" % (cnt, repr(finalize)))
 82 |     cnt =+ 1
 83 | 
 84 |     if not wfs:
 85 | 
 86 |         ds = date.strftime(date.today(), '%Y%m%d')
 87 |         audiofn = 'rec-%s.wav' % ds
 88 |         logging.debug('audiofn: %s' % audiofn)
 89 |         
 90 |         audiocnt = 0
 91 |         while True:
 92 |             audiocnt += 1
 93 |             audiofn = 'rec-%s-%03d.wav' % (ds, audiocnt)
 94 |             if not os.path.isfile(audiofn):
 95 |                 break
 96 |         
 97 |         
 98 |         # create wav file 
 99 |         
100 |         wfs = wave.open(audiofn, 'wb')
101 |         wfs.setnchannels(1)
102 |         wfs.setsampwidth(2)
103 |         wfs.setframerate(SAMPLE_RATE)
104 | 
105 |         logging.info('voice activity detected, recording to: %s' % audiofn)
106 | 
107 |     packed_audio = struct.pack('%sh' % len(audio), *audio)
108 |     wfs.writeframes(packed_audio)
109 | 
110 |     if finalize:
111 | 
112 |         logging.info('recording to %s finished.' % audiofn)
113 | 
114 |         wfs.close()
115 |         wfs = None
116 | 
117 | 
118 | 


--------------------------------------------------------------------------------
/examples/live_vad.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | import logging
 3 | logging.basicConfig(level=logging.INFO)
 4 | from nltools.asr           import ASR
 5 | from nltools.pulserecorder import PulseRecorder
 6 | from nltools.vad           import VAD
 7 | 
 8 | MODELDIR = '/opt/kaldi/model/kaldi-generic-en-tdnn_250'
 9 | VOLUME   = 150
10 | 
11 | print ("Initializing...")
12 | 
13 | rec = PulseRecorder (volume=VOLUME)
14 | asr = ASR(model_dir = MODELDIR)
15 | vad = VAD()
16 | 
17 | rec.start_recording()
18 | print ("Please speak. (CTRL-C to exit)")
19 | 
20 | while True:
21 | 
22 |     samples = rec.get_samples()
23 | 
24 |     audio, finalize = vad.process_audio(samples)
25 | 
26 |     if not audio:
27 |         continue
28 | 
29 |     user_utt, confidence = asr.decode(audio, finalize)
30 | 
31 |     print ("\r%s           " % user_utt, end='', flush=True)
32 | 
33 |     if finalize:
34 |         print ()
35 | 
36 | 


--------------------------------------------------------------------------------
/examples/va_eliza.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | import logging
 3 | logging.basicConfig(level=logging.INFO)
 4 | from enum                  import Enum
 5 | from nltools.asr           import ASR
 6 | from nltools.pulserecorder import PulseRecorder
 7 | from nltools.vad           import VAD
 8 | from nltools.tts           import TTS
 9 | from nltools.macro_engine  import MacroEngine
10 | from nltools.misc          import edit_distance
11 | from nltools.tokenizer     import tokenize
12 | from eliza                 import eliza
13 | 
14 | MODELDIR          = '/opt/kaldi/model/kaldi-generic-en-tdnn_250'
15 | VOLUME            = 150
16 | ED_THRESHOLD      = 2
17 | 
18 | class Intent(Enum):
19 |     HELLO     = 1
20 |     LIGHT     = 2
21 |     RADIO     = 3
22 | 
23 | print ("Initializing...")
24 | 
25 | radio_on  = False
26 | lights_on = False
27 | asr       = ASR(model_dir = MODELDIR)
28 | rec       = PulseRecorder (volume=VOLUME)
29 | vad       = VAD()
30 | tts       = TTS(engine="espeak", voice="en")
31 | me        = MacroEngine()
32 | eliza     = eliza()
33 | 
34 | utt_map   = {}
35 | def add_utt (pattern, intent):
36 |     for utterance, t in me.expand_macros('en', pattern):
37 |         utt = ' '.join(utterance)
38 |         utt_map[utt] = intent
39 | 
40 | add_utt("(hi|hello|ok) computer",             Intent.HELLO)
41 | add_utt("switch (on|off) the (light|lights)", Intent.LIGHT)
42 | add_utt("switch the (light|lights) (on|off)", Intent.LIGHT)
43 | add_utt("switch (on|off) the (music|radio)",  Intent.RADIO)
44 | add_utt("switch the (music|radio) (on|off)",  Intent.RADIO)
45 | 
46 | rec.start_recording()
47 | print ("Please speak. (CTRL-C to exit)")
48 | 
49 | while True:
50 |     samples = rec.get_samples()
51 |     audio, finalize = vad.process_audio(samples)
52 |     if not audio:
53 |         continue
54 | 
55 |     user_utt, c = asr.decode(audio, finalize)
56 |     print ("\r%s           " % user_utt, end='', flush=True)
57 | 
58 |     if finalize:
59 |         print ()
60 | 
61 |         best_dist = ED_THRESHOLD
62 |         intent = None
63 |         for utt in utt_map:
64 |             dist = edit_distance (tokenize (utt, lang='en'), 
65 |                        tokenize (user_utt, lang='en'))
66 |             if (dist<ED_THRESHOLD) and (dist<best_dist):
67 |                 best_dist = dist
68 |                 intent    = utt_map[utt]
69 | 
70 |         if intent == Intent.HELLO:
71 |             resp = "Hello there!"
72 |         elif intent == Intent.LIGHT:
73 |             if lights_on:
74 |                 resp = "OK, switching off the lights."
75 |             else:
76 |                 resp = "OK, switching on the lights."
77 |             lights_on = not lights_on
78 |         elif intent == Intent.RADIO:
79 |             if radio_on:
80 |                 resp = "OK, switching off the radio."
81 |             else:
82 |                 resp = "OK, switching on the radio."
83 |             radio_on = not radio_on
84 |         if not intent:
85 |             resp = eliza.respond (user_utt)
86 | 
87 |         rec.stop_recording()
88 |         print (resp)
89 |         tts.say(resp)
90 |         rec.start_recording()
91 | 


--------------------------------------------------------------------------------
/examples/va_simple.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | import logging
 3 | logging.basicConfig(level=logging.INFO)
 4 | from enum                  import Enum
 5 | from nltools.asr           import ASR
 6 | from nltools.pulserecorder import PulseRecorder
 7 | from nltools.vad           import VAD
 8 | from nltools.tts           import TTS
 9 | 
10 | MODELDIR          = '/opt/kaldi/model/kaldi-generic-en-tdnn_250'
11 | VOLUME            = 150
12 | 
13 | class Intent(Enum):
14 |     HELLO     = 1
15 |     LIGHT     = 2
16 |     RADIO     = 3
17 | 
18 | print ("Initializing...")
19 | 
20 | radio_on  = False
21 | lights_on = False
22 | asr       = ASR(model_dir = MODELDIR)
23 | rec       = PulseRecorder (volume=VOLUME)
24 | vad       = VAD()
25 | tts       = TTS(engine="espeak", voice="en")
26 | 
27 | utt_map = {}
28 | def add_utt (utterance, intent):
29 |     utt_map[utterance] = intent
30 | 
31 | add_utt("hello computer",        Intent.HELLO)
32 | add_utt("switch on the lights",  Intent.LIGHT)
33 | add_utt("switch off the lights", Intent.LIGHT)
34 | add_utt("switch on the radio",   Intent.RADIO)
35 | add_utt("switch off the radio",  Intent.RADIO)
36 | 
37 | rec.start_recording()
38 | print ("Please speak. (CTRL-C to exit)")
39 | 
40 | while True:
41 |     samples = rec.get_samples()
42 |     audio, finalize = vad.process_audio(samples)
43 |     if not audio:
44 |         continue
45 | 
46 |     user_utt, c = asr.decode(audio, finalize)
47 |     print ("\r%s           " % user_utt, end='', flush=True)
48 | 
49 |     if finalize:
50 |         print ()
51 | 
52 |         intent = utt_map.get(user_utt, None)
53 |         if intent == Intent.HELLO:
54 |             resp = "Hello there!"
55 |         elif intent == Intent.LIGHT:
56 |             if lights_on:
57 |                 resp = "OK, switching off the lights."
58 |             else:
59 |                 resp = "OK, switching on the lights."
60 |             lights_on = not lights_on
61 |         elif intent == Intent.RADIO:
62 |             if radio_on:
63 |                 resp = "OK, switching off the radio."
64 |             else:
65 |                 resp = "OK, switching on the radio."
66 |             radio_on = not radio_on
67 |         if not intent:
68 |             continue
69 | 
70 |         rec.stop_recording()
71 |         print (resp)
72 |         tts.say(resp)
73 |         rec.start_recording()
74 | 


--------------------------------------------------------------------------------
/examples/wav_decoder.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | from nltools.asr import ASR
 3 | 
 4 | MODELDIR = '/opt/kaldi/model/kaldi-generic-en-tdnn_250'
 5 | WAVFILE  = 'dw961.wav'
 6 | 
 7 | asr = ASR(model_dir = MODELDIR)
 8 | 
 9 | s, l = asr.decode_wav_file(WAVFILE)
10 | print ("Decoded %s: %s" % (WAVFILE, s))
11 | 


--------------------------------------------------------------------------------
/images/highlevel.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gooofy/py-nltools/67d85eb186d31480f5d93dc0855031e3c8345695/images/highlevel.png


--------------------------------------------------------------------------------
/nltools/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gooofy/py-nltools/67d85eb186d31480f5d93dc0855031e3c8345695/nltools/__init__.py


--------------------------------------------------------------------------------
/nltools/asr.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding: utf-8 -*- 
  3 | 
  4 | #
  5 | # Copyright 2017, 2018 Guenter Bartsch
  6 | #
  7 | # Licensed under the Apache License, Version 2.0 (the "License");
  8 | # you may not use this file except in compliance with the License.
  9 | # You may obtain a copy of the License at
 10 | #
 11 | #     http://www.apache.org/licenses/LICENSE-2.0
 12 | #
 13 | # Unless required by applicable law or agreed to in writing, software
 14 | # distributed under the License is distributed on an "AS IS" BASIS,
 15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 16 | # See the License for the specific language governing permissions and
 17 | # limitations under the License.
 18 | #
 19 | 
 20 | #
 21 | # Abstraction layer for multiple speech recognition engines,
 22 | # kaldi-asr and pocketsphinx at the moment
 23 | #
 24 | 
 25 | import traceback
 26 | import logging
 27 | import time
 28 | import re
 29 | import struct
 30 | import wave
 31 | import numpy as np
 32 | 
 33 | from base64             import b64encode
 34 | from kaldiasr.nnet3     import KaldiNNet3OnlineModel, KaldiNNet3OnlineDecoder
 35 | 
 36 | ASR_ENGINE_NNET3        = 'kaldi-nnet3'
 37 | ASR_ENGINE_POCKETSPHINX = 'pocketsphinx'
 38 | 
 39 | DEFAULT_ENGINE          = ASR_ENGINE_NNET3
 40 | DEFAULT_MODEL_DIR       = 'models/kaldi-generic-en-tdnn_sp-latest'
 41 | DEFAULT_MODEL_NAME      = 'model'
 42 | DEFAULT_STREAM_ID       = '__default__'
 43 | DEFAULT_SAMPLE_RATE     = 16000
 44 | 
 45 | DEFAULT_KALDI_BEAM                      = 7.0 # nnet3: 15.0
 46 | DEFAULT_KALDI_ACOUSTIC_SCALE            = 1.0 # nnet3:  0.1
 47 | DEFAULT_KALDI_FRAME_SUBSAMPLING_FACTOR  = 3   # nnet3:  1
 48 | 
 49 | class ASR(object):
 50 | 
 51 |     def __init__(self, 
 52 |                  engine      = DEFAULT_ENGINE,
 53 |                  model_dir   = DEFAULT_MODEL_DIR,
 54 |                  model_name  = DEFAULT_MODEL_NAME,
 55 | 
 56 |                  kaldi_beam                     = DEFAULT_KALDI_BEAM,
 57 |                  kaldi_acoustic_scale           = DEFAULT_KALDI_ACOUSTIC_SCALE, 
 58 |                  kaldi_frame_subsampling_factor = DEFAULT_KALDI_FRAME_SUBSAMPLING_FACTOR, 
 59 |                 ):
 60 | 
 61 |         self._engine      = engine
 62 |         self._model_dir   = model_dir
 63 |         self._model_name  = model_name
 64 |         self.asr_decoders = {} # stream_id -> decoder
 65 | 
 66 |         if self._engine == ASR_ENGINE_NNET3:
 67 | 
 68 |             logging.debug ('loading ASR model %s from %s...' % (self._model_name, self._model_dir))
 69 |             start_time = time.time()
 70 |             self.nnet3_model = KaldiNNet3OnlineModel ( self._model_dir, self._model_name, 
 71 |                                                        beam                     = kaldi_beam, 
 72 |                                                        acoustic_scale           = kaldi_acoustic_scale, 
 73 |                                                        frame_subsampling_factor = kaldi_frame_subsampling_factor)
 74 |             logging.debug ('ASR model loaded. took %fs' % (time.time() - start_time))
 75 | 
 76 |         elif self._engine == ASR_ENGINE_POCKETSPHINX:
 77 | 
 78 |             import pocketsphinx
 79 |             self.ps_config = pocketsphinx.Decoder.default_config()
 80 | 
 81 |             # determine CFG_N_TIED_STATES, CFG_WAVFILE_SRATE
 82 |             # cmusphinx-cont-voxforge-en-latest/etc/sphinx_train.cfg
 83 |             traincfg_fn        = '%s/etc/sphinx_train.cfg' % model_dir
 84 |             n_tied_states      = 6000
 85 |             self.ps_samplerate = 16000
 86 |             with open (traincfg_fn, 'r') as traincfg_f:
 87 |                 for line in traincfg_f:
 88 |                     if not line:
 89 |                         break
 90 |                     # $CFG_N_TIED_STATES = 6000;
 91 |                     if 'CFG_N_TIED_STATES' in line:
 92 |                         # logging.debug ('parsing train cfg line %s' % line)
 93 |                         m = re.match (r"\$CFG_N_TIED_STATES\s+=\s+([0-9]+)\s*;", line.strip())
 94 |                         if m:
 95 |                             n_tied_states = int(m.group(1))
 96 |                             # logging.debug ('matched, n_tied_states=%d' % n_tied_states)
 97 | 
 98 |                     # $CFG_WAVFILE_SRATE = 16000.0;
 99 |                     if 'CFG_WAVFILE_SRATE' in line:
100 |                         m = re.match (r"\$CFG_WAVFILE_SRATE\s+=\s+([0-9.]+)\s*;", line.strip())
101 |                         if m:
102 |                             self.ps_samplerate = int(float(m.group(1)))
103 |                             
104 |             self.ps_config.set_string('-hmm', '%s/model_parameters/%s.cd_cont_%d' % (model_dir, model_name, n_tied_states))
105 |             self.ps_config.set_float ('-lw', 10)
106 |             self.ps_config.set_string('-feat', '1s_c_d_dd')
107 |             self.ps_config.set_float ('-beam', 1e-80)
108 |             self.ps_config.set_float ('-wbeam', 1e-40)
109 |             self.ps_config.set_string('-dict', '%s/etc/%s.dic' % (model_dir, model_name))
110 |             self.ps_config.set_float ('-wip', 0.2)
111 |             self.ps_config.set_string('-agc', 'none')
112 |             self.ps_config.set_string('-varnorm', 'no')
113 |             self.ps_config.set_string('-cmn', 'current')
114 |             self.ps_config.set_string('-lm', '%s/etc/%s.lm.bin' % (model_dir, model_name))
115 | 
116 |             self.ps_config.set_string('-logfn', "/dev/null")
117 | 
118 |             self.asr_in_utt = {} # stream_id -> Boolean
119 | 
120 |         else:
121 |             raise Exception ('unknown ASR engine: %s' % self._engine)
122 | 
123 |     def decode (self, audio, do_finalize, sample_rate = DEFAULT_SAMPLE_RATE, stream_id = DEFAULT_STREAM_ID):
124 | 
125 |         if self._engine == ASR_ENGINE_NNET3:
126 | 
127 |             if not stream_id in self.asr_decoders:
128 |                 self.asr_decoders[stream_id] = KaldiNNet3OnlineDecoder (self.nnet3_model)
129 | 
130 |             decoder = self.asr_decoders[stream_id]
131 |             decoder.decode(sample_rate, np.array(audio, dtype=np.float32), do_finalize)
132 | 
133 |             hstr, confidence = decoder.get_decoded_string()
134 |             hstr = hstr.strip()
135 | 
136 |         elif self._engine == ASR_ENGINE_POCKETSPHINX:
137 | 
138 |             if sample_rate != self.ps_samplerate:
139 |                 raise Exception ('decode: samplerate does not match model: %d vs %d' % (sample_rate, self.ps_samplerate))
140 | 
141 |             if not stream_id in self.asr_decoders:
142 |                 import pocketsphinx
143 |                 self.asr_decoders[stream_id] = pocketsphinx.Decoder(self.ps_config)
144 |                 self.asr_in_utt[stream_id] = False
145 | 
146 |             decoder = self.asr_decoders[stream_id]
147 |             if not self.asr_in_utt[stream_id]:
148 |                 decoder.start_utt()
149 |                 self.asr_in_utt[stream_id] = True
150 | 
151 |             audios = struct.pack('<%dh' % len(audio), *audio)
152 | 
153 |             decoder.process_raw(audios, False, False)
154 | 
155 |             if not do_finalize:
156 |                 return None, 0.0
157 | 
158 |             decoder.end_utt()
159 |             self.asr_in_utt[stream_id] = False
160 | 
161 |             hypothesis = decoder.hyp()
162 |             logmath = decoder.get_logmath()
163 |             hstr = hypothesis.hypstr.decode('utf8').strip()
164 |             confidence = logmath.exp(hypothesis.prob)
165 | 
166 |         else:
167 |             raise Exception ('unknown ASR engine: %s' % self._engine)
168 | 
169 |         return hstr, confidence
170 | 
171 | 
172 |     @property
173 |     def engine(self):
174 |         return self._engine
175 |     # @engine.setter
176 |     # def engine(self, v):
177 |     #     self._engine = v
178 | 
179 |     @property
180 |     def model_dir(self):
181 |         return self._model_dir
182 |     # @model_dir.setter
183 |     # def model_dir(self, v):
184 |     #     self._model_dir = v
185 | 
186 |     @property
187 |     def model_name(self):
188 |         return self._model_name
189 |     # @model_name.setter
190 |     # def model_name(self, v):
191 |     #     self._model_name = v
192 | 
193 |     def decode_wav_file(self, wavfile):
194 | 
195 |         wavf = wave.open(wavfile, 'rb')
196 | 
197 |         # check format
198 |         assert wavf.getnchannels()==1
199 |         assert wavf.getsampwidth()==2
200 |         assert wavf.getnframes()>0
201 | 
202 |         sample_rate = wavf.getframerate()
203 | 
204 |         # read the whole file into memory, for now
205 |         num_frames = wavf.getnframes()
206 |         frames = wavf.readframes(num_frames)
207 | 
208 |         samples = struct.unpack_from('<%dh' % num_frames, frames)
209 | 
210 |         wavf.close()
211 | 
212 |         return self.decode(samples, True, sample_rate)
213 | 
214 | 


--------------------------------------------------------------------------------
/nltools/macro_engine.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding: utf-8 -*- 
  3 | 
  4 | #
  5 | # Copyright 2018 Guenter Bartsch
  6 | #
  7 | # Licensed under the Apache License, Version 2.0 (the "License");
  8 | # you may not use this file except in compliance with the License.
  9 | # You may obtain a copy of the License at
 10 | #
 11 | #     http://www.apache.org/licenses/LICENSE-2.0
 12 | #
 13 | # Unless required by applicable law or agreed to in writing, software
 14 | # distributed under the License is distributed on an "AS IS" BASIS,
 15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 16 | # See the License for the specific language governing permissions and
 17 | # limitations under the License.
 18 | #
 19 | 
 20 | #
 21 | # simple macro engine aimed at generating natural language expansions
 22 | #
 23 | # maintains dict of named macros for various languages
 24 | # contains utility functions that expand macros to produce
 25 | # training data input
 26 | #
 27 | 
 28 | import logging
 29 | 
 30 | from copy                import copy
 31 | from past.builtins       import basestring
 32 | 
 33 | from nltools.tokenizer   import tokenize
 34 | 
 35 | class MacroEngine(object):
 36 | 
 37 |     def __init__(self):
 38 |         self.named_macros = {}
 39 | 
 40 |     def add_macro_expansion(self, name, expansion):
 41 |         if not name in self.named_macros:
 42 |             self.named_macros[name] = []
 43 | 
 44 |         if isinstance(expansion, dict):
 45 |             exp = expansion
 46 |         else:
 47 |             exp = {'W': expansion}
 48 | 
 49 |         self.named_macros[name].append(exp)
 50 | 
 51 |     def expand_macros (self, lang, txt):
 52 | 
 53 |         logging.debug(u"expand macros  : %s" % txt)
 54 | 
 55 |         implicit_macros = {}
 56 | 
 57 |         txt2 = ''
 58 | 
 59 |         i = 0
 60 |         while i<len(txt):
 61 | 
 62 |             if txt[i] == '(':
 63 | 
 64 |                 j = txt[i+1:].find(')')
 65 |                 if j<0:
 66 |                     raise Exception (') missing')
 67 |                 j += i
 68 | 
 69 |                 # extract macro
 70 | 
 71 |                 macro_s = txt[i+1:j+1]
 72 | 
 73 |                 # print "macro_s: %s" % macro_s
 74 | 
 75 |                 macro_name = 'MACRO_%d' % len(implicit_macros)
 76 | 
 77 |                 implicit_macros[macro_name] = []
 78 |                 for s in macro_s.split('|'):
 79 |                     sub_parts = tokenize(s, lang=lang, keep_punctuation=False)
 80 |                     implicit_macros[macro_name].append({'W': sub_parts})
 81 | 
 82 |                 txt2 += '{' + macro_name + ':W}'
 83 | 
 84 |                 i = j+2
 85 |             else:
 86 | 
 87 |                 txt2 += txt[i]
 88 |                 i+=1
 89 | 
 90 |         logging.debug ( "implicit macros: %s" % repr(implicit_macros) )
 91 |         logging.debug ( "txt2           : %s" % txt2 )
 92 | 
 93 |         parts = []
 94 |         for p1 in txt2.split('{'):
 95 |             for p2 in p1.split('}'):
 96 |                 parts.append(p2)
 97 | 
 98 |         done = []
 99 | 
100 |         todo = [ (parts, 0, [], {}, {}) ]
101 | 
102 |         # import pdb; pdb.set_trace()
103 |         while len(todo)>0:
104 | 
105 |             parts1, cnt, r, mpos, macro_rs = todo.pop()
106 | 
107 |             if cnt >= len(parts1):
108 |                 done.append((r, mpos))
109 |                 continue
110 | 
111 |             p1 = parts1[cnt]
112 | 
113 |             if cnt % 2 == 1:
114 |                 
115 |                 sub_parts = p1.split(':')
116 | 
117 |                 if len(sub_parts) != 2:
118 |                     raise Exception ('syntax error in macro call %s' % repr(p1))
119 | 
120 |                 name = sub_parts[0]
121 | 
122 |                 if name == 'empty':
123 |                     todo.append((parts, cnt+1, copy(r), mpos, copy(macro_rs)))
124 |                 else:
125 | 
126 |                     vn    = sub_parts[1]
127 | 
128 |                     if name in macro_rs:
129 |                         macro = [ macro_rs[name] ]
130 |                     else:
131 |                         macro = self.named_macros.get(name, None)
132 |                         if not macro:
133 |                             macro = implicit_macros.get(name, None)
134 |                         if not macro:
135 |                             raise Exception ('unknown macro "%s" called' % name)
136 | 
137 |                     for r3 in macro:
138 |                         r1        = copy(r)
139 |                         mpos1     = copy(mpos)
140 |                         macro_rs1 = copy(macro_rs)
141 | 
142 |                         macro_rs1[name] = r3
143 | 
144 |                         # take care of multiple invocactions of the same macro
145 |         
146 |                         mpnn = 0
147 |                         while True:
148 |                             mpn = '%s_%d_start' % (name, mpnn)
149 |                             if not mpn in mpos1:
150 |                                 break
151 |                             mpnn += 1
152 | 
153 |                         mpos1['%s_%d_start' % (name, mpnn)] = len(r1)
154 |                         s3 = r3[vn]
155 |                         if isinstance (s3, basestring):
156 |                             s3 = tokenize (s3, lang=lang)
157 |                             r3[vn] = s3
158 |                         r1.extend(r3[vn])
159 |                         mpos1['%s_%d_end' % (name, mpnn)]   = len(r1)
160 | 
161 |                         for vn3 in r3:
162 |                             mpos1['%s_%d_%s' % (name, mpnn, vn3.lower())] = r3[vn3]
163 | 
164 |                         todo.append((parts, cnt+1, r1, mpos1, macro_rs1))
165 |                         
166 |                         # if name == 'home_locations':
167 |                         #     import pdb; pdb.set_trace()
168 | 
169 |             else:
170 | 
171 |                 sub_parts = tokenize(p1, lang=lang, keep_punctuation=False)
172 | 
173 |                 r  = copy(r)
174 |                 r.extend(sub_parts)
175 | 
176 |                 todo.append((parts, cnt+1, r, mpos, macro_rs))
177 | 
178 |         return done
179 | 
180 | 


--------------------------------------------------------------------------------
/nltools/misc.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding: utf-8 -*- 
  3 | 
  4 | #
  5 | # Copyright 2014, 2015, 2016, 2017 Guenter Bartsch
  6 | #
  7 | # Licensed under the Apache License, Version 2.0 (the "License");
  8 | # you may not use this file except in compliance with the License.
  9 | # You may obtain a copy of the License at
 10 | #
 11 | #     http://www.apache.org/licenses/LICENSE-2.0
 12 | #
 13 | # Unless required by applicable law or agreed to in writing, software
 14 | # distributed under the License is distributed on an "AS IS" BASIS,
 15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 16 | # See the License for the specific language governing permissions and
 17 | # limitations under the License.
 18 | #
 19 | 
 20 | #
 21 | # just a collection of random utility subprograms
 22 | #
 23 | 
 24 | import sys
 25 | import os
 26 | import subprocess
 27 | try:
 28 |     import ConfigParser as configparser
 29 | except ImportError:
 30 |     import configparser
 31 | try:
 32 |     from imp import reload
 33 | except ImportError:
 34 |     pass
 35 | import shutil
 36 | import errno
 37 | import curses
 38 | import curses.textpad
 39 | import traceback
 40 | import logging
 41 | import code
 42 | import signal
 43 | 
 44 | from setproctitle import setproctitle
 45 | from os.path import expanduser
 46 | 
 47 | def load_config(configfn = '.nlprc', defaults={}):
 48 | 
 49 |     home_path = expanduser("~")
 50 | 
 51 |     config = configparser.ConfigParser(defaults)
 52 |     config.read("%s/%s" % (home_path, configfn))
 53 | 
 54 |     return config
 55 | 
 56 | def _debug(sig, frame):
 57 |     """Interrupt running process, and provide a python prompt for
 58 |     interactive debugging.
 59 |     
 60 |     source: http://stackoverflow.com/questions/132058/showing-the-stack-trace-from-a-running-python-application
 61 |     """
 62 |     d={'_frame':frame}         # Allow access to frame object.
 63 |     d.update(frame.f_globals)  # Unless shadowed by global
 64 |     d.update(frame.f_locals)
 65 | 
 66 |     i = code.InteractiveConsole(d)
 67 |     message  = "Signal received : entering python shell.\nTraceback:\n"
 68 |     message += ''.join(traceback.format_stack(frame))
 69 |     i.interact(message)
 70 | 
 71 | def init_app (proc_title):
 72 | 
 73 |     setproctitle (proc_title)
 74 | 
 75 |     if sys.version_info < (3, 0):
 76 |         reload(sys)
 77 |         sys.setdefaultencoding('utf-8')
 78 |         sys.stdout = os.fdopen(sys.stdout.fileno(), 'w', 0)
 79 | 
 80 |     # install signal handler so SIGUSR1 will enter pdb
 81 | 
 82 |     signal.signal(signal.SIGUSR1, _debug)  # Register handler
 83 | 
 84 | 
 85 | def compress_ws (s):
 86 | 
 87 |     vc = True
 88 | 
 89 |     res = ''
 90 | 
 91 |     for c in s:
 92 | 
 93 |         if c == ' ':
 94 |             vc = False
 95 |         else:
 96 |             if vc:
 97 |                 res = res + c
 98 |             else:
 99 |                 res = res + ' ' + c
100 |             vc = True
101 | 
102 |     return res 
103 | 
104 | def run_command(command, capture_stderr=True):
105 |     p = subprocess.Popen(command,
106 |                          stdout=subprocess.PIPE,
107 |                          stderr=subprocess.STDOUT if capture_stderr else subprocess.PIPE)
108 |     return iter(p.stdout.readline, b'')
109 | 
110 | tex_umlaut_map = { u'ä': '"a', u'ü': '"u', u'ö': '"o', u'Ä':'"A', u'Ü':'"U', u'Ö':'"O', u'ß':'"s' }
111 | 
112 | def tex_encode (u):
113 | 
114 |     s = ''
115 | 
116 |     for c in u:
117 | 
118 |         if c in tex_umlaut_map:
119 |             s += tex_umlaut_map[c]
120 |         else:
121 |             s += str(c)
122 | 
123 |     return s
124 | 
125 | def tex_decode (s):
126 | 
127 |     u = ''
128 | 
129 |     pos = 0
130 |     while (pos < len(s)):
131 | 
132 |         found = False
133 | 
134 |         for umlaut in tex_umlaut_map:
135 |             v = tex_umlaut_map[umlaut]
136 |             if s[pos:].startswith(v):
137 |                 u += umlaut
138 |                 pos += len(v)
139 |                 found = True
140 |                 break
141 | 
142 |         if not found:
143 |             u += unicode(s[pos])
144 |             pos += 1
145 | 
146 |     return u
147 | 
148 | def symlink(targetfn, linkfn):
149 |     try:
150 |         os.symlink(targetfn, linkfn)
151 |     except OSError as e:
152 |         if e.errno == errno.EEXIST:
153 |             logging.debug('symlink %s -> %s already exists' % (targetfn, linkfn))
154 | 
155 | def mkdirs(path):
156 |     try:
157 |         os.makedirs(path)
158 |     except OSError as exception:
159 |         if exception.errno != errno.EEXIST:
160 |             raise
161 | 
162 | def copy_file (src, dst):
163 |     logging.debug("copying %s to %s" % (src, dst))
164 |     shutil.copy(src, dst)
165 | 
166 | 
167 | def edit_distance (s, t):
168 |     # https://en.wikipedia.org/wiki/Wagner%E2%80%93Fischer_algorithm
169 | 
170 |     # for all i and j, d[i,j] will hold the Levenshtein distance between
171 |     # the first i words of s and the first j words of t;
172 |     # note that d has (m+1)x(n+1) values
173 |     
174 |     m = len(s)
175 |     n = len(t)
176 | 
177 |     d = [[0 for i in range(n+1)] for j in range(m+1)]
178 | 
179 |     for i in range (m+1):
180 |         d[i][0] = i                        # the distance of any first seq to an empty second seq
181 |     for j in range (n+1):
182 |         d[0][j] = j                         # the distance of any second seq to an empty first seq
183 |   
184 |     for j in range (1, n+1):
185 |         for i in range (1, m+1):
186 | 
187 |             if s[i-1] == t[j-1]:
188 |                 d[i][j] = d[i-1][j-1]       # no operation required
189 |             else:
190 |                 d[i][j] = min ([
191 |                             d[i-1][j] + 1,       # a deletion
192 |                             d[i][j-1] + 1,       # an insertion
193 |                             d[i-1][j-1] + 1      # a substitution
194 |                          ])
195 |   
196 |     return d[m][n]
197 | 
198 | def limit_str(s, limit):
199 | 
200 |     l = len(s)
201 | 
202 |     if l<=limit:
203 |         return s
204 | 
205 |     l = limit-3
206 | 
207 |     return s[:l] + '...'
208 | 
209 | 
210 | #
211 | # curses utils
212 | #
213 | 
214 | def edit_popup (stdscr, title, s):
215 | 
216 |     my, mx = stdscr.getmaxyx()
217 | 
218 |     ww = mx * 9 / 10
219 |     wh = 3
220 | 
221 |     wox = mx / 2 - ww/2
222 |     woy = my / 2 - wh/2
223 | 
224 |     win = curses.newwin(wh, ww, woy, wox)
225 |     win.box()
226 |     win.addstr(0, 3, title)
227 | 
228 |     win.refresh()
229 | 
230 |     swin = win.derwin (1, ww-4, 1, 2)
231 | 
232 |     tb = curses.textpad.Textbox(swin, insert_mode=True)
233 | 
234 |     swin.insstr (0, 0, tex_encode(s))
235 | 
236 |     swin.refresh()
237 | 
238 |     s = tex_decode(tb.edit())
239 | 
240 |     return s.rstrip()
241 | 
242 | def message_popup (stdscr, title, msg):
243 | 
244 |     my, mx = stdscr.getmaxyx()
245 | 
246 |     ww = len(title)
247 | 
248 |     lines = msg.split('\n')
249 |     for line in lines:
250 |         if len(line)>ww:
251 |             ww = len(line)
252 |     ww += 6
253 |     wh = len(lines) + 2
254 | 
255 |     wox = mx / 2 - ww/2
256 |     woy = my / 2 - wh/2
257 | 
258 |     win = curses.newwin(wh, ww, woy, wox)
259 |     win.box()
260 |     win.addstr(0, 3, title.encode('utf8'))
261 | 
262 |     win.refresh()
263 | 
264 |     swin = win.derwin (wh-2, ww-4, 1, 1)
265 | 
266 |     for i, line in enumerate(lines):
267 |         swin.insstr (i, 0, line.encode('utf8'))
268 | 
269 |     swin.refresh()
270 | 
271 |     return swin
272 | 
273 | def render_template(template_file, dst_file, **kwargs):
274 |     """Copy template and substitute template strings
275 | 
276 |     File `template_file` is copied to `dst_file`. Then, each template variable
277 |     is replaced by a value. Template variables are of the form
278 | 
279 |         {{val}}
280 | 
281 |     Example:
282 | 
283 |     Contents of template_file:
284 | 
285 |         VAR1={{val1}}
286 |         VAR2={{val2}}
287 |         VAR3={{val3}}
288 | 
289 |     render_template(template_file, output_file, val1="hello", val2="world")
290 | 
291 |     Contents of output_file:
292 | 
293 |         VAR1=hello
294 |         VAR2=world
295 |         VAR3={{val3}}
296 | 
297 |     :param template_file: Path to the template file.
298 |     :param dst_file: Path to the destination file.
299 |     :param kwargs: Keys correspond to template variables.
300 |     :return:
301 |     """
302 |     with open(template_file) as f:
303 |         template_text = f.read()
304 | 
305 |     dst_text = template_text
306 | 
307 |     for key, value in kwargs.iteritems():
308 |         dst_text = dst_text .replace("{{" + key + "}}", value)
309 | 
310 |     with open(dst_file, "wt") as f:
311 |         f.write(dst_text)
312 | 


--------------------------------------------------------------------------------
/nltools/phonetics.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding: utf-8 -*- 
  3 | 
  4 | #
  5 | # Copyright 2013, 2014, 2016, 2017 Guenter Bartsch
  6 | #
  7 | # Licensed under the Apache License, Version 2.0 (the "License");
  8 | # you may not use this file except in compliance with the License.
  9 | # You may obtain a copy of the License at
 10 | #
 11 | #     http://www.apache.org/licenses/LICENSE-2.0
 12 | #
 13 | # Unless required by applicable law or agreed to in writing, software
 14 | # distributed under the License is distributed on an "AS IS" BASIS,
 15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 16 | # See the License for the specific language governing permissions and
 17 | # limitations under the License.
 18 | #
 19 | 
 20 | #
 21 | # big phoneme table
 22 | #
 23 | # entries:
 24 | # ( IPA, XSAMPA, MARY, ESPEAK )
 25 | #
 26 | 
 27 | MAX_PHONEME_LENGTH = 2
 28 | 
 29 | big_phoneme_table = [
 30 | 
 31 |         #
 32 |         # stop
 33 |         #
 34 | 
 35 |         ( u'p' , 'p' , 'p', 'p' ),
 36 |         ( u'b' , 'b' , 'b', 'b' ),
 37 |         ( u't' , 't' , 't', 't' ),
 38 |         ( u'd' , 'd' , 'd', 'd' ),
 39 |         ( u'k' , 'k' , 'k', 'k' ),
 40 |         ( u'g' , 'g' , 'g', 'g' ),
 41 |         ( u'ʔ' , '?' , '?', '?' ),
 42 | 
 43 |         #
 44 |         # 2 consonants
 45 |         #
 46 | 
 47 |         ( u'pf' , 'pf' , 'pf' , 'pf' ),
 48 |         ( u'ts' , 'ts' , 'ts' , 'ts' ),
 49 |         ( u'tʃ' , 'tS' , 'tS' , 'tS' ),
 50 |         ( u'dʒ' , 'dZ' , 'dZ' , 'dZ' ),
 51 | 
 52 |         #
 53 |         # fricative
 54 |         #
 55 | 
 56 |         ( u'f' , 'f' , 'f' , 'f' ),
 57 |         ( u'v' , 'v' , 'v' , 'v' ),
 58 |         ( u'θ' , 'T' , 'T' , 'T' ),
 59 |         ( u'ð' , 'D' , 'D' , 'D' ),
 60 |         ( u's' , 's' , 's' , 's' ),
 61 |         ( u'z' , 'z' , 'z' , 'z' ),
 62 |         ( u'ʃ' , 'S' , 'S' , 'S' ),
 63 |         ( u'ʒ' , 'Z' , 'Z' , 'Z' ),
 64 |         ( u'ç' , 'C' , 'C' , 'C' ),
 65 |         ( u'j' , 'j' , 'j' , 'j' ),
 66 |         ( u'x' , 'x' , 'x' , 'x' ),
 67 |         ( u'ʁ' , 'R' , 'R' , 'R' ),
 68 |         ( u'h' , 'h' , 'h' , 'h' ),
 69 |         ( u'ɥ' , 'H' , 'H' , 'H' ),
 70 | 
 71 |         #
 72 |         # nasal
 73 |         #
 74 | 
 75 |         ( u'm' , 'm' , 'm' , 'm' ),
 76 |         ( u'n' , 'n' , 'n' , 'n' ),
 77 |         ( u'ɳ' , 'N' , 'N' , 'N' ),
 78 |         ( u'ɲ' , 'J' , 'J' , 'J' ),
 79 | 
 80 |         #
 81 |         # liquid
 82 |         #
 83 | 
 84 |         ( u'l' , 'l' , 'l' , 'l' ),
 85 |         ( u'r' , 'r' , 'r' , 'r' ),
 86 | 
 87 |         #
 88 |         # glide
 89 |         #
 90 | 
 91 |         ( u'w' , 'w' , 'w', 'w' ),
 92 |         # see above ( u'j' , 'j' , 'j' ),
 93 | 
 94 |         #
 95 |         # vowels: monophongs
 96 |         #
 97 | 
 98 |         # front
 99 |         ( u'i' , 'i' , 'i' , 'i' ),
100 |         ( u'ɪ' , 'I' , 'I' , 'I' ),
101 |         ( u'y' , 'y' , 'y' , 'y' ),
102 |         ( u'ʏ' , 'Y' , 'Y' , 'y' ),
103 |         ( u'e' , 'e' , 'e' , 'e' ),
104 |         ( u'ø' , '2' , '2' , 'W' ),
105 |         ( u'œ' , '9' , '9' , 'W' ),
106 |         ( u'œ̃' , '9~' , '9~' , 'W~' ),
107 |         ( u'ɛ' , 'E' , 'E' , 'E' ),
108 |         ( u'ɛ̃' , 'E~' , 'E~' , 'E~' ),
109 |         ( u'æ' , '{' , '{' , 'a' ),
110 |         ( u'a' , 'a' , 'a' , 'a' ),
111 | 
112 |         # central
113 |         ( u'ʌ' , 'V' , 'V' , 'A'  ),
114 |         ( u'ə' , '@' , '@' , '@'  ),
115 |         ( u'ɐ' , '6' , '6' , '@' ),
116 |         ( u'ɜ' , '3' , 'r=', '3'  ), 
117 | 
118 |         # back
119 |         ( u'u' , 'u' , 'u' , 'u' ),
120 |         ( u'ʊ' , 'U' , 'U' , 'U' ),
121 |         ( u'o' , 'o' , 'o' , 'o' ),
122 |         ( u'ɔ' , 'O' , 'O' , 'O' ),
123 |         ( u'ɔ̃' , 'O~' , 'O~' , 'O~' ),
124 |         ( u'ɑ' , 'A' , 'A' , 'A' ),
125 |         ( u'ɑ̃' , 'A~' , 'A~' , 'A~' ),
126 |         ( u'ɒ' , 'Q' , 'Q' , 'Q' ),
127 | 
128 |         # diphtongs
129 | 
130 |         ( u'aɪ' , 'aI' , 'aI' , 'aI' ),
131 |         ( u'ɔɪ' , 'OI' , 'OI' , 'OI' ),
132 |         ( u'aʊ' , 'aU' , 'aU' , 'aU' ),
133 |         ( u'ɔʏ' , 'OY' , 'OY' , 'OY' ),
134 | 
135 |         #
136 |         # misc
137 |         #
138 |         ( u'ː'  , ':'  , ':'  , ':'  ),
139 |         ( u'-'  , '-'  , '-'  , '-'  ),
140 |         ( u'\'' , '\'' , '\'' , '\'' ),
141 | 
142 |         #
143 |         # noise
144 |         #
145 | 
146 |         ( u'#' , '#' , '#' ),
147 |     ]
148 | 
149 | IPA_normalization = {
150 |         u':' : u'ː',
151 |         u'?' : u'ʔ',
152 |         u'ɾ' : u'ʁ',
153 |         u'ɡ' : u'g',
154 |         u'ŋ' : u'ɳ',
155 |         u' ' : None,
156 |         u'(' : None,
157 |         u')' : None,
158 |         u'\u02c8' : u'\'',
159 |         u'\u032f' : None,
160 |         u'\u0329' : None,
161 |         u'\u02cc' : None,
162 |         u'\u200d' : None,
163 |         u'\u0279' : None,
164 |         u'\u0361' : None,
165 |     }
166 | 
167 | IPA_vowels = set([
168 |         u'i' ,
169 |         u'ɪ' ,
170 |         u'y' ,
171 |         u'ʏ' ,
172 |         u'e' ,
173 |         u'ø' ,
174 |         u'œ' ,
175 |         u'ɛ' ,
176 |         u'æ' ,
177 |         u'a' ,
178 | 
179 |         # central
180 |         u'ʌ' ,
181 |         u'ə' ,
182 |         u'ɐ' ,
183 |         u'ɜ' ,
184 | 
185 |         # back
186 |         u'u' ,
187 |         u'ʊ' ,
188 |         u'o' ,
189 |         u'ɔ' ,
190 |         u'ɑ' ,
191 |         u'ɒ' ,
192 | 
193 |         # diphtongs
194 | 
195 |         u'aɪ' ,
196 |         u'ɔɪ' ,
197 |         u'aʊ' ,
198 |         u'ɔʏ' ])
199 | 
200 | XSAMPA_normalization = {
201 |     ' ': None,
202 |     '0': 'O',
203 |     ',': None,
204 |     }
205 | 
206 | def _normalize (s, norm_table):
207 | 
208 |     buf = ""
209 | 
210 |     for c in s:
211 | 
212 |         if c in norm_table:
213 |             
214 |             x = norm_table[c]
215 |             if x:
216 |                 buf += x
217 |         else:
218 |             buf += c
219 | 
220 |     return buf
221 | 
222 | def _translate (graph, s, f_idx, t_idx, spaces=False):
223 | 
224 |     buf = ""
225 |     i = 0
226 |     l = len(s)
227 | 
228 |     while i < l:
229 | 
230 |         found = False
231 | 
232 |         for pl in range(MAX_PHONEME_LENGTH, 0, -1):
233 | 
234 |             if i + pl > l:
235 |                 continue
236 | 
237 |             substr = s[i : i+pl ]
238 | 
239 |             #print u"i: %s, pl: %d, substr: '%s'" % (i, pl, substr)
240 | 
241 |             for pe in big_phoneme_table:
242 |                 p_f = pe[f_idx]
243 |                 p_t = pe[t_idx]
244 | 
245 |                 if substr == p_f:
246 |                     buf += p_t
247 |                     i += pl
248 |                     if i<l and s[i] != u'ː' and spaces:
249 |                         buf += ' '
250 |                     found = True
251 |                     break
252 | 
253 |             if found:
254 |                 break
255 | 
256 |         if not found:
257 | 
258 |             p = s[i]
259 |             
260 |             msg = (u"_translate: %s: %s Phoneme not found: %s (%s)" % (graph, s, p, repr(p))).encode('UTF8')
261 | 
262 |             raise Exception (msg)
263 | 
264 |     return buf
265 | 
266 | def ipa_move_stress_to_vowels(ipa):
267 | 
268 |     stress    = False
269 | 
270 |     res = u''
271 | 
272 |     for c in ipa:
273 | 
274 |         if c == '\'':
275 |             stress = True
276 |             continue
277 | 
278 |         if stress and c in IPA_vowels:
279 |             res += '\''
280 |             stress = False
281 | 
282 |         res += c
283 | 
284 |     return res
285 | 
286 | def ipa2xsampa (graph, ipas, spaces=False, stress_to_vowels=True):
287 |     ipas = _normalize (ipas,  IPA_normalization)
288 |     if stress_to_vowels:
289 |         ipas = ipa_move_stress_to_vowels(ipas)
290 |     return _translate (graph, ipas, 0, 1, spaces)
291 | 
292 | def ipa2mary (graph, ipas):
293 |     ipas = _normalize (ipas,  IPA_normalization)
294 |     return _translate (graph, ipas, 0, 2)
295 | 
296 | def xsampa2ipa (graph, xs):
297 |     xs = _normalize (xs,  XSAMPA_normalization)
298 |     return _translate (graph, xs, 1, 0)
299 | 
300 | def mary2ipa (graph, ms):
301 |     ms = _normalize (ms,  XSAMPA_normalization)
302 |     return _translate (graph, ms, 2, 0)
303 | 
304 | ESPEAK_normalization = {
305 |     ' '  : '',
306 |     ';'  : '',
307 |     '~'  : '',
308 |     '0'  : 'O',
309 |     ','  : '',
310 |     # '3'  : '@',
311 |     't#' : 't',
312 |     'E2' : 'E',
313 |     'I#' : 'I',
314 |     'L'  : 'l',
315 |     '_!' : '',
316 |     '_::': '', 
317 |     '_;_': '', 
318 |     '_!' : '',
319 |     '_|' : '',
320 |     '_:' : '',
321 |     '_'  : '',
322 |     '!'  : '',
323 |     '('  : '',
324 |     ')'  : '',
325 |     'Y'  : 'y',
326 |     'pF' : 'pf',
327 |     }
328 | 
329 | def espeak2ipa (graph, ms):
330 |     for c in ESPEAK_normalization:
331 |         ms = ms.replace(c, ESPEAK_normalization[c])
332 |     return _translate (graph, ms, 3, 0)
333 | 
334 | def ipa2espeak (graph, ipas, spaces=False, stress_to_vowels=True):
335 |     ipas = _normalize (ipas,  IPA_normalization)
336 |     if stress_to_vowels:
337 |         ipas = ipa_move_stress_to_vowels(ipas)
338 |     return _translate (graph, ipas, 0, 3, spaces)
339 | 
340 | #
341 | # X-ARPABET is my own creation - similar to arpabet plus
342 | # some of my own creating for those phones defined in
343 | #
344 | # http://www.dev.voxforge.org/projects/de/wiki/PhoneSet
345 | #
346 | # uses only latin alpha chars
347 | #
348 | 
349 | xs2xa_table = [
350 | 
351 |     #
352 |     # stop
353 |     #
354 | 
355 |     ('p'  , 'P'),
356 |     ('b'  , 'B'),
357 |     ('t'  , 'T'),
358 |     ('d'  , 'D'),
359 |     ('k'  , 'K'),
360 |     ('g'  , 'G'),
361 |     ('?'  , 'Q'),
362 | 
363 |     #
364 |     # 2 consonants
365 |     #
366 | 
367 |     ('pf'  , 'PF'),
368 |     ('ts'  , 'TS'),
369 |     ('tS'  , 'CH'),
370 |     ('dZ'  , 'JH'),
371 | 
372 |     #
373 |     # fricative
374 |     #
375 | 
376 |     ('f'  , 'F'),
377 |     ('v'  , 'V'),
378 |     ('T'  , 'TH'),
379 |     ('D'  , 'DH'),
380 |     ('s'  , 'S'),
381 |     ('z'  , 'Z'),
382 |     ('S'  , 'SH'),
383 |     ('Z'  , 'ZH'),
384 |     ('C'  , 'CC'),
385 |     ('j'  , 'Y'),
386 |     ('x'  , 'X'),
387 |     ('R'  , 'RR'),
388 |     ('h'  , 'HH'),
389 |     ('H'  , 'HHH'),
390 | 
391 |     #
392 |     # nasal
393 |     #
394 | 
395 |     ('m'  , 'M'),
396 |     ('n'  , 'N'),
397 |     ('N'  , 'NG'),
398 |     ('J'  , 'NJ'),
399 | 
400 |     #
401 |     # liquid
402 |     #
403 | 
404 |     ('l'  , 'L'),
405 |     ('r'  , 'R'),
406 | 
407 |     #
408 |     # glide
409 |     #
410 | 
411 |     ('w'  , 'W'),
412 | 
413 |     #
414 |     # vowels, monophongs
415 |     #
416 | 
417 |     # front
418 |     ('i'  , 'IY'),
419 |     ('i:' , 'IIH'),
420 |     ('I'  , 'IH'),
421 |     ('y'  , 'UE'),
422 |     ('y:' , 'YYH'),
423 |     ('Y'  , 'YY'),
424 |     ('e'  , 'EE'),
425 |     ('e:' , 'EEH'),
426 |     ('2'  , 'OH'),
427 |     ('2:' , 'OHH'),
428 |     ('9'  , 'OE'),
429 |     ('9~' , 'OEN'),
430 |     ('E'  , 'EH'),
431 |     ('E:' , 'EHH'),
432 |     ('E~' , 'EN'),
433 |     ('{'  , 'AE'),
434 |     ('{:' , 'AEH'),
435 |     ('a'  , 'AH'),
436 |     ('a:' , 'AAH'),
437 |     ('3'  , 'ER'),
438 |     ('3:' , 'ERH'),
439 | 
440 |     # central
441 |     ('V'  , 'VV'),
442 |     ('@'  , 'AX'),
443 |     ('6'  , 'EX'),
444 |     #('3'  , 'AOR'),
445 | 
446 |     # back
447 |     ('u'  , 'UH'),
448 |     ('u:' , 'UUH'),
449 |     ('U'  , 'UU'),
450 |     ('o'  , 'AO'),
451 |     ('o:' , 'OOH'),
452 |     ('O'  , 'OO'),
453 |     ('O:' , 'OOOH'),
454 |     ('O~' , 'ON'),
455 |     ('A'  , 'AA'),
456 |     ('A:' , 'AAAH'),
457 |     ('A~' , 'AN'),
458 |     ('Q'  , 'QQ'),
459 | 
460 |     # diphtongs
461 |     ('aI'  , 'AY'),
462 |     ('OI'  , 'OI'),
463 |     ('aU'  , 'AW'),
464 |     ('OY'  , 'OY'),
465 | 
466 |     # misc (noise)
467 |     ('#'   , 'NSPC'),
468 | 
469 |     ]
470 | 
471 | XARPABET_normalization = {
472 |     '-': None,
473 |     '\'': None,
474 |     }
475 | 
476 | def xsampa2xarpabet (graph, s):
477 |     s = _normalize (s,  XARPABET_normalization)
478 | 
479 |     buf = ""
480 |     i = 0
481 |     l = len(s)
482 | 
483 |     while i < l:
484 | 
485 |         found = False
486 | 
487 |         for pl in range(MAX_PHONEME_LENGTH, 0, -1):
488 | 
489 |             if i + pl > l:
490 |                 continue
491 | 
492 |             substr = s[i : i+pl ]
493 | 
494 |             #print u"i: %s, pl: %d, substr: '%s'" % (i, pl, substr)
495 | 
496 |             for pe in xs2xa_table:
497 |                 p_f = pe[0]
498 |                 p_t = pe[1]
499 | 
500 |                 if substr == p_f:
501 |                     if len(buf)>0:
502 |                         buf += ' '
503 |                     buf += p_t
504 |                     i += pl
505 |                     found = True
506 |                     break
507 | 
508 |             if found:
509 |                 break
510 | 
511 |         if not found:
512 | 
513 |             p = s[i]
514 | 
515 |             msg = u"xsampa2xarpabet: graph:'%s' - s:'%s' Phoneme not found: '%s' (%d) '%s'" % (graph, s, p, ord(p), s[i:])
516 | 
517 |             raise Exception (msg.encode('UTF8'))
518 | 
519 |     return buf
520 | 
521 | 
522 | 


--------------------------------------------------------------------------------
/nltools/pulseplayer.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding: utf-8 -*- 
  3 | 
  4 | #
  5 | # Copyright 2013, 2014, 2016, 2017, 2018 Guenter Bartsch
  6 | #
  7 | # Licensed under the Apache License, Version 2.0 (the "License");
  8 | # you may not use this file except in compliance with the License.
  9 | # You may obtain a copy of the License at
 10 | #
 11 | #     http://www.apache.org/licenses/LICENSE-2.0
 12 | #
 13 | # Unless required by applicable law or agreed to in writing, software
 14 | # distributed under the License is distributed on an "AS IS" BASIS,
 15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 16 | # See the License for the specific language governing permissions and
 17 | # limitations under the License.
 18 | #
 19 | #
 20 | # simple pulseaudio playback client
 21 | #
 22 | 
 23 | from io import BytesIO
 24 | import wave
 25 | import copy
 26 | import ctypes
 27 | import wave
 28 | import sys
 29 | import logging
 30 | 
 31 | from builtins import str as text
 32 | 
 33 | from threading import Thread, Lock, Condition
 34 | 
 35 | pa = ctypes.cdll.LoadLibrary('libpulse-simple.so.0')
 36 |  
 37 | PA_STREAM_PLAYBACK = 1
 38 | PA_SAMPLE_S16LE = 3
 39 | BUFFSIZE = 1024
 40 | 
 41 | # class struct_pa_sample_spec(ctypes.Structure):
 42 | #     __slots__ = [
 43 | #         'format',
 44 | #         'rate',
 45 | #         'channels',
 46 | #     ]
 47 | #  
 48 | # struct_pa_sample_spec._fields_ = [
 49 | #     ('format', ctypes.c_int),
 50 | #     ('rate', ctypes.c_uint32),
 51 | #     ('channels', ctypes.c_uint8),
 52 | # ]
 53 | # pa_sample_spec = struct_pa_sample_spec  # /usr/include/pulse/sample.h:174
 54 | 
 55 | 
 56 | class pa_sample_spec(ctypes.Structure):
 57 |     _fields_ = [
 58 |                 ('format',   ctypes.c_int),
 59 |                 ('rate',     ctypes.c_uint32),
 60 |                 ('channels', ctypes.c_uint8),
 61 |             ]
 62 | 
 63 | pa_simple_new = pa.pa_simple_new
 64 | pa_simple_new.restype  = ctypes.c_void_p # pointer(pa_simple)
 65 | pa_simple_new.argtypes = [
 66 |                           ctypes.c_char_p,                   # server
 67 |                           ctypes.c_char_p,                   # name,
 68 |                           ctypes.c_int,                      # dir,
 69 |                           ctypes.c_char_p,                   # dev,
 70 |                           ctypes.c_char_p,                   # stream_name,
 71 |                           ctypes.POINTER( pa_sample_spec ),  # ss,
 72 |                           ctypes.c_void_p, # pointer( pa_channel_map ),  # map,
 73 |                           ctypes.c_void_p, # pointer( pa_buffer_attr ),  # attr,
 74 |                           ctypes.POINTER(ctypes.c_int),      # error
 75 |                          ]
 76 | 
 77 | pa_simple_write = pa.pa_simple_write
 78 | pa_simple_write.restype = ctypes.c_int
 79 | pa_simple_write.argtypes = [
 80 |                             ctypes.c_void_p,              # s
 81 |                             ctypes.c_void_p,              # data,
 82 |                             ctypes.c_size_t,              # bytes,
 83 |                             ctypes.POINTER(ctypes.c_int), # error 
 84 |                            ]
 85 | 
 86 | pa_simple_drain = pa.pa_simple_drain
 87 | pa_simple_drain.restype = ctypes.c_int
 88 | pa_simple_drain.argtypes = [
 89 |                             ctypes.c_void_p,              # s
 90 |                             ctypes.POINTER(ctypes.c_int), # error 
 91 |                            ]
 92 | 
 93 | pa_simple_free = pa.pa_simple_free
 94 | pa_simple_free.restype = None
 95 | pa_simple_free.argtypes = [ ctypes.c_void_p ]
 96 | 
 97 | class PulsePlayer:
 98 | 
 99 |     def __init__(self, name):
100 |         self.name      = text(name)
101 |         self.playing   = False
102 |         self.terminate = False
103 |         self.thread    = None
104 |         self.lock      = Lock()
105 |         self.cond      = Condition(self.lock)
106 | 
107 |     def _play_loop(self):
108 | 
109 |         logging.debug("_play_loop starts, a_sound: %d bytes" % len(self.a_sound))
110 | 
111 |         while not self.terminate:
112 |             #latency = pa.pa_simple_get_latency(s, error)
113 |             #if latency == -1:
114 |             #    raise Exception('Getting latency failed!')
115 |         
116 |             #print('{0} usec'.format(latency))
117 |         
118 |             # Reading frames and writing to the stream.
119 |             buf = self.wf.readframes(BUFFSIZE)
120 |             if not buf:
121 |                 break
122 | 
123 |             # logging.debug("_play_loop len: %d self.s: %s" % (len(buf), repr(self.s)))
124 |         
125 |             if pa_simple_write(self.s, buf, len(buf), ctypes.byref(self.error)):
126 |                 raise Exception('Could not play file, error: %d!' % self.error.value)
127 |         
128 |         self.wf.close()
129 | 
130 |         if pa_simple_drain(self.s, ctypes.byref(self.error)):
131 |             raise Exception('Could not simple drain!')
132 | 
133 |         # Freeing resources and closing connection.
134 |         logging.debug ('pa.pa_simple_free %s...' % repr(self.s))
135 |         pa_simple_free(self.s)
136 | 
137 |         self.lock.acquire()
138 |         try:
139 |             self.playing = False
140 |             self.cond.notifyAll()
141 |         finally:
142 |             self.lock.release()
143 | 
144 |     def play(self, a_sound, async=True):
145 | 
146 |         logging.debug("play starts, async: %s" % repr(async))
147 | 
148 |         self.lock.acquire()
149 |         try:
150 |             self.terminate = True
151 |             while self.playing:
152 |                 self.cond.wait()
153 | 
154 |             if self.thread:
155 |                 self.thread.join()
156 |                 self.thread = None
157 | 
158 |             self.terminate = False
159 |             self.playing   = True
160 |             self.a_sound   = copy.copy(a_sound)
161 | 
162 |             self.wf = wave.open(BytesIO(self.a_sound), 'rb')
163 | 
164 |             self.ss = pa_sample_spec()
165 | 
166 |             self.ss.rate      = self.wf.getframerate()
167 |             self.ss.channels  = self.wf.getnchannels()
168 |             self.ss.format    = PA_SAMPLE_S16LE
169 | 
170 |             # logging.debug("frame rate: %d, channels: %d" % (self.ss.rate, self.ss.channels))
171 | 
172 |             self.error = ctypes.c_int(0)
173 |     
174 |             self.s = pa_simple_new(
175 |                 None,                    # Default server.
176 |                 self.name.encode('utf8'),# Application's name.
177 |                 PA_STREAM_PLAYBACK,      # Stream for playback.
178 |                 None,                    # Default device.
179 |                 b'playback',             # Stream's description.
180 |                 ctypes.byref(self.ss),   # Sample format.
181 |                 None,                    # Default channel map.
182 |                 None,                    # Default buffering attributes.
183 |                 ctypes.byref(self.error) # Ignore error code.
184 |             )
185 |             if not self.s:
186 |                 raise Exception('Could not create pulse audio stream: {0}!'.format(
187 |                     pa.strerror(ctypes.byref(self.error))))
188 | 
189 |             logging.debug ('pa_simple_new done, self.s: %s' % repr(self.s))
190 | 
191 |         finally:
192 |             self.lock.release()
193 | 
194 |         self.thread = Thread(target=self._play_loop)
195 |         self.thread.start()
196 | 
197 |         if not async:
198 |             # wait for player to finish
199 |             self.lock.acquire()
200 |             try:
201 |                 while self.playing:
202 |                     self.cond.wait()
203 | 
204 |                 self.thread.join()
205 |                 self.thread = None
206 |             finally:
207 |                 self.lock.release()
208 | 
209 | 


--------------------------------------------------------------------------------
/nltools/pulserecorder.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding: utf-8 -*- 
  3 | 
  4 | #
  5 | # Copyright 2013, 2014, 2016, 2017, 2018 Guenter Bartsch
  6 | #
  7 | # Licensed under the Apache License, Version 2.0 (the "License");
  8 | # you may not use this file except in compliance with the License.
  9 | # You may obtain a copy of the License at
 10 | #
 11 | #     http://www.apache.org/licenses/LICENSE-2.0
 12 | #
 13 | # Unless required by applicable law or agreed to in writing, software
 14 | # distributed under the License is distributed on an "AS IS" BASIS,
 15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 16 | # See the License for the specific language governing permissions and
 17 | # limitations under the License.
 18 | #
 19 | #
 20 | # simple pulseaudio recording client
 21 | #
 22 | # based on: http://freshfoo.com/blog/pulseaudio_monitoring
 23 | 
 24 | import ctypes
 25 | import threading
 26 | import logging
 27 | import time
 28 |     
 29 | import numpy as np
 30 | from builtins import str as text, range
 31 | from nltools.vad import BUFFER_DURATION
 32 | 
 33 | SOURCE_TIMEOUT = 30 # 3 seconds
 34 | 
 35 | PA_INVALID_INDEX = 4294967295 # ((uint32_t) -1)
 36 | 
 37 | pa = ctypes.cdll.LoadLibrary('libpulse.so.0')
 38 | 
 39 | class pa_proplist(ctypes.Structure):
 40 |     pass
 41 | pa_encoding = ctypes.c_int # enum
 42 | pa_encoding_t = pa_encoding
 43 | class pa_format_info(ctypes.Structure):
 44 |     pass
 45 | pa_format_info._fields_ = [
 46 |     ('encoding', pa_encoding_t),
 47 |     ('plist', ctypes.POINTER(pa_proplist)),
 48 | ]
 49 | class pa_context(ctypes.Structure):
 50 |     pass
 51 | pa_context._fields_ = [ ]
 52 | pa_context_notify_cb_t = ctypes.CFUNCTYPE(None, ctypes.POINTER(pa_context), ctypes.c_void_p)
 53 | pa_context_success_cb_t = ctypes.CFUNCTYPE(None, ctypes.POINTER(pa_context), ctypes.c_int, ctypes.c_void_p)
 54 | 
 55 | pa_sample_format = ctypes.c_int # enum
 56 | pa_sample_format_t = pa_sample_format
 57 | pa_format_info_set_sample_format = pa.pa_format_info_set_sample_format
 58 | pa_format_info_set_sample_format.restype = None
 59 | pa_format_info_set_sample_format.argtypes = [ctypes.POINTER(pa_format_info), pa_sample_format_t]
 60 | class pa_sink_port_info(ctypes.Structure):
 61 |     pass
 62 | pa_sink_port_info._fields_ = [
 63 |     ('name', ctypes.c_char_p),
 64 |     ('description', ctypes.c_char_p),
 65 |     ('priority', ctypes.c_uint32),
 66 |     ('available', ctypes.c_int),
 67 | ]
 68 | class pa_sink_info(ctypes.Structure):
 69 |     pass
 70 | class pa_sample_spec(ctypes.Structure):
 71 |     pass
 72 | pa_sample_spec._fields_ = [
 73 |     ('format', pa_sample_format_t),
 74 |     ('rate', ctypes.c_uint32),
 75 |     ('channels', ctypes.c_uint8),
 76 | ]
 77 | class pa_source_info(ctypes.Structure):
 78 |     pass
 79 | pa_channel_position = ctypes.c_int # enum
 80 | pa_channel_position_t = pa_channel_position
 81 | class pa_channel_map(ctypes.Structure):
 82 |     pass
 83 | pa_channel_map._fields_ = [
 84 |     ('channels', ctypes.c_uint8),
 85 |     ('map', pa_channel_position_t * 32),
 86 | ]
 87 | class pa_cvolume(ctypes.Structure):
 88 |     pass
 89 | pa_volume_t = ctypes.c_uint32
 90 | pa_cvolume._fields_ = [
 91 |     ('channels', ctypes.c_uint8),
 92 |     ('values', pa_volume_t * 32),
 93 | ]
 94 | pa_source_flags = ctypes.c_int # enum
 95 | pa_source_flags_t = pa_source_flags
 96 | pa_source_state = ctypes.c_int # enum
 97 | pa_source_state_t = pa_source_state
 98 | class pa_source_port_info(ctypes.Structure):
 99 |     pass
100 | pa_source_port_info._fields_ = [
101 |     ('name', ctypes.c_char_p),
102 |     ('description', ctypes.c_char_p),
103 |     ('priority', ctypes.c_uint32),
104 |     ('available', ctypes.c_int),
105 | ]
106 | pa_source_info._fields_ = [
107 |     ('name', ctypes.c_char_p),
108 |     ('index', ctypes.c_uint32),
109 |     ('description', ctypes.c_char_p),
110 |     ('sample_spec', pa_sample_spec),
111 |     ('channel_map', pa_channel_map),
112 |     ('owner_module', ctypes.c_uint32),
113 |     ('volume', pa_cvolume),
114 |     ('mute', ctypes.c_int),
115 |     ('monitor_of_sink', ctypes.c_uint32),
116 |     ('monitor_of_sink_name', ctypes.c_char_p),
117 |     ('latency', ctypes.c_uint64),
118 |     ('driver', ctypes.c_char_p),
119 |     ('flags', pa_source_flags_t),
120 |     ('proplist', ctypes.POINTER(pa_proplist)),
121 |     ('configured_latency', ctypes.c_uint64),
122 |     ('base_volume', pa_volume_t),
123 |     ('state', pa_source_state_t),
124 |     ('n_volume_steps', ctypes.c_uint32),
125 |     ('card', ctypes.c_uint32),
126 |     ('n_ports', ctypes.c_uint32),
127 |     ('ports', ctypes.POINTER(ctypes.POINTER(pa_source_port_info))),
128 |     ('active_port', ctypes.POINTER(pa_source_port_info)),
129 |     ('n_formats', ctypes.c_uint8),
130 |     ('formats', ctypes.POINTER(ctypes.POINTER(pa_format_info))),
131 | ]
132 | pa_source_info_cb_t = ctypes.CFUNCTYPE(None, ctypes.POINTER(pa_context), ctypes.POINTER(pa_source_info), ctypes.c_int, ctypes.c_void_p)
133 | class pa_stream(ctypes.Structure):
134 |     pass
135 | pa_stream._fields_ = [
136 | ]
137 | pa_stream_request_cb_t = ctypes.CFUNCTYPE(None, ctypes.POINTER(pa_stream), ctypes.c_size_t, ctypes.c_void_p)
138 | 
139 | class pa_threaded_mainloop(ctypes.Structure):
140 |     pass
141 | pa_threaded_mainloop._fields_ = [
142 | ]
143 | pa_threaded_mainloop_new = pa.pa_threaded_mainloop_new
144 | pa_threaded_mainloop_new.restype = ctypes.POINTER(pa_threaded_mainloop)
145 | pa_threaded_mainloop_new.argtypes = []
146 | 
147 | class pa_mainloop_api(ctypes.Structure):
148 |     pass
149 | pa_threaded_mainloop_get_api = pa.pa_threaded_mainloop_get_api
150 | pa_threaded_mainloop_get_api.restype = ctypes.POINTER(pa_mainloop_api)
151 | pa_threaded_mainloop_get_api.argtypes = [ctypes.POINTER(pa_threaded_mainloop)]
152 | 
153 | pa_context_new = pa.pa_context_new
154 | pa_context_new.restype = ctypes.POINTER(pa_context)
155 | pa_context_new.argtypes = [ctypes.POINTER(pa_mainloop_api), ctypes.c_char_p]
156 | 
157 | pa_context_set_state_callback = pa.pa_context_set_state_callback
158 | pa_context_set_state_callback.restype = None
159 | pa_context_set_state_callback.argtypes = [ctypes.POINTER(pa_context), pa_context_notify_cb_t, ctypes.c_void_p]
160 | 
161 | pa_context_flags = ctypes.c_int # enum
162 | pa_context_flags_t = pa_context_flags
163 | 
164 | class pa_spawn_api(ctypes.Structure):
165 |     pass
166 | 
167 | pa_context_connect = pa.pa_context_connect
168 | pa_context_connect.restype = ctypes.c_int
169 | pa_context_connect.argtypes = [ctypes.POINTER(pa_context), ctypes.c_char_p, pa_context_flags_t, ctypes.POINTER(pa_spawn_api)]
170 | 
171 | pa_threaded_mainloop_start = pa.pa_threaded_mainloop_start
172 | pa_threaded_mainloop_start.restype = ctypes.c_int
173 | pa_threaded_mainloop_start.argtypes = [ctypes.POINTER(pa_threaded_mainloop)]
174 | 
175 | pa_threaded_mainloop_lock = pa.pa_threaded_mainloop_lock
176 | pa_threaded_mainloop_lock.restype = None
177 | pa_threaded_mainloop_lock.argtypes = [ctypes.POINTER(pa_threaded_mainloop)]
178 | 
179 | pa_context_disconnect = pa.pa_context_disconnect
180 | pa_context_disconnect.restype = None
181 | pa_context_disconnect.argtypes = [ctypes.POINTER(pa_context)]
182 | 
183 | pa_context_unref = pa.pa_context_unref
184 | pa_context_unref.restype = None
185 | pa_context_unref.argtypes = [ctypes.POINTER(pa_context)]
186 | 
187 | pa_threaded_mainloop_unlock = pa.pa_threaded_mainloop_unlock
188 | pa_threaded_mainloop_unlock.restype = None
189 | pa_threaded_mainloop_unlock.argtypes = [ctypes.POINTER(pa_threaded_mainloop)]
190 | 
191 | pa_threaded_mainloop_stop = pa.pa_threaded_mainloop_stop
192 | pa_threaded_mainloop_stop.restype = None
193 | pa_threaded_mainloop_stop.argtypes = [ctypes.POINTER(pa_threaded_mainloop)]
194 | 
195 | pa_threaded_mainloop_free = pa.pa_threaded_mainloop_free
196 | pa_threaded_mainloop_free.restype = None
197 | pa_threaded_mainloop_free.argtypes = [ctypes.POINTER(pa_threaded_mainloop)]
198 | 
199 | pa_context_get_state = pa.pa_context_get_state
200 | pa_context_get_state.restype = ctypes.c_int
201 | pa_context_get_state.argtypes = [ctypes.POINTER(pa_context)]
202 | 
203 | PA_CONTEXT_NOFLAGS = 0
204 | PA_CONTEXT_NOFAIL = 2
205 | PA_CONTEXT_NOAUTOSPAWN = 1
206 | 
207 | PA_CONTEXT_UNCONNECTED = 0
208 | PA_CONTEXT_CONNECTING = 1
209 | PA_CONTEXT_AUTHORIZING = 2
210 | PA_CONTEXT_READY = 4
211 | PA_CONTEXT_FAILED = 5
212 | PA_CONTEXT_TERMINATED = 6
213 | 
214 | class pa_operation(ctypes.Structure):
215 |     pass
216 | pa_context_get_source_info_list = pa.pa_context_get_source_info_list
217 | pa_context_get_source_info_list.restype = ctypes.POINTER(pa_operation)
218 | pa_context_get_source_info_list.argtypes = [ctypes.POINTER(pa_context), pa_source_info_cb_t, ctypes.c_void_p]
219 | 
220 | PA_VOLUME_NORM = 65536
221 | 
222 | pa_context_set_source_volume_by_index = pa.pa_context_set_source_volume_by_index
223 | pa_context_set_source_volume_by_index.restype = ctypes.POINTER(pa_operation)
224 | pa_context_set_source_volume_by_index.argtypes = [ctypes.POINTER(pa_context), ctypes.c_uint32, ctypes.POINTER(pa_cvolume), pa_context_success_cb_t, ctypes.c_void_p]
225 | 
226 | pa_operation_unref = pa.pa_operation_unref
227 | pa_operation_unref.restype = None
228 | pa_operation_unref.argtypes = [ctypes.POINTER(pa_operation)]
229 | 
230 | PA_SAMPLE_INVALID = -1
231 | PA_SAMPLE_U8 = 0
232 | PA_SAMPLE_ALAW = 1
233 | PA_SAMPLE_ULAW = 2
234 | PA_SAMPLE_S16LE = 3
235 | PA_SAMPLE_S16BE = 4
236 | PA_SAMPLE_FLOAT32LE = 5
237 | PA_SAMPLE_FLOAT32BE = 6
238 | PA_SAMPLE_S32LE = 7
239 | PA_SAMPLE_S32BE = 8
240 | PA_SAMPLE_S24LE = 9
241 | PA_SAMPLE_S24BE = 10
242 | PA_SAMPLE_S24_32LE = 11
243 | PA_SAMPLE_S24_32BE = 12
244 | PA_SAMPLE_MAX = 13
245 | 
246 | pa_stream_new = pa.pa_stream_new
247 | pa_stream_new.restype = ctypes.POINTER(pa_stream)
248 | pa_stream_new.argtypes = [ctypes.POINTER(pa_context), ctypes.c_char_p, ctypes.POINTER(pa_sample_spec), ctypes.POINTER(pa_channel_map)]
249 | 
250 | pa_stream_set_read_callback = pa.pa_stream_set_read_callback
251 | pa_stream_set_read_callback.restype = None
252 | pa_stream_set_read_callback.argtypes = [ctypes.POINTER(pa_stream), pa_stream_request_cb_t, ctypes.c_void_p]
253 | 
254 | PA_STREAM_ADJUST_LATENCY = 8192
255 | 
256 | pa_stream_flags = ctypes.c_int # enum
257 | pa_stream_flags_t = pa_stream_flags
258 | class pa_buffer_attr(ctypes.Structure):
259 |     pass
260 | pa_buffer_attr._fields_ = [
261 |     ('maxlength', ctypes.c_uint32),
262 |     ('tlength',   ctypes.c_uint32),
263 |     ('prebuf',    ctypes.c_uint32),
264 |     ('minreq',    ctypes.c_uint32),
265 |     ('fragsize',  ctypes.c_uint32),
266 | ]
267 | 
268 | pa_stream_connect_record = pa.pa_stream_connect_record
269 | pa_stream_connect_record.restype = ctypes.c_int
270 | pa_stream_connect_record.argtypes = [ctypes.POINTER(pa_stream), ctypes.c_char_p, ctypes.POINTER(pa_buffer_attr), pa_stream_flags_t]
271 | 
272 | pa_stream_peek = pa.pa_stream_peek
273 | pa_stream_peek.restype = ctypes.c_int
274 | pa_stream_peek.argtypes = [ctypes.POINTER(pa_stream), ctypes.POINTER(ctypes.c_void_p), ctypes.POINTER(ctypes.c_size_t)]
275 | 
276 | pa_stream_drop = pa.pa_stream_drop
277 | pa_stream_drop.restype = ctypes.c_int
278 | pa_stream_drop.argtypes = [ctypes.POINTER(pa_stream)]
279 | 
280 | def null_cb(a=None, b=None, c=None, d=None):
281 |     return
282 | 
283 | MIX_MODE_BOTH             = 0
284 | MIX_MODE_LEFT             = 1
285 | MIX_MODE_RIGHT            = 2
286 | 
287 | DEFAULT_VOLUME            =   100
288 | DEFAULT_RATE              = 16000
289 | DEFAULT_NAME              = b'Python PulseRecorder'
290 | DEFAULT_FRAMES_PER_BUFFER = int(DEFAULT_RATE * BUFFER_DURATION / 1000)
291 | DEFAULT_MIX_MODE          = MIX_MODE_BOTH
292 | 
293 | class PulseRecorder(object):
294 | 
295 |     def __init__(self, volume=DEFAULT_VOLUME, rate=DEFAULT_RATE, source_name=None):
296 |         self.match_source_name  = source_name
297 |         self.rate               = rate
298 |         self.volume             = volume
299 |         self.source_idx         = -1
300 |         self.source_score       = 0
301 |         self.source_log         = False
302 |         self.source_name        = ''
303 |         self.source_description = ''
304 | 
305 |         # Wrap callback methods in appropriate ctypefunc instances so
306 |         # that the Pulseaudio C API can call them
307 |         self._context_notify_cb = pa_context_notify_cb_t(self.context_notify_cb)
308 |         self._source_info_cb    = pa_source_info_cb_t(self.source_info_cb)
309 |         self._stream_read_cb    = pa_stream_request_cb_t(self.stream_read_cb)
310 |         self._null_cb           = pa_context_success_cb_t(null_cb)
311 | 
312 |         # lock/cond for buffers
313 | 
314 |         self._lock = threading.Lock()
315 |         self._cond = threading.Condition(self._lock) 
316 | 
317 |     def start_recording(self, frames_per_buffer = DEFAULT_FRAMES_PER_BUFFER, mix_mode = DEFAULT_MIX_MODE):
318 | 
319 |         logging.debug("start_recording...")
320 | 
321 |         self._frames_per_buffer = frames_per_buffer
322 |         self._mix_mode          = mix_mode
323 |         self._record_stereo     = mix_mode != MIX_MODE_BOTH
324 |         self._buffers           = []
325 |         self._cur_buf_cnt       = 0
326 |         self.source_idx         = -1
327 |         self.source_score       = 0
328 |         self.source_log         = False
329 |         self.source_name        = ''
330 |         self.source_description = ''
331 | 
332 |         self._buffers.append(np.empty(self._frames_per_buffer, dtype=np.int16))
333 | 
334 |         self._mainloop = pa_threaded_mainloop_new()
335 |         _mainloop_api  = pa_threaded_mainloop_get_api(self._mainloop)
336 |         self._context  = pa_context_new(_mainloop_api, DEFAULT_NAME)
337 | 
338 |         pa_context_set_state_callback(self._context, self._context_notify_cb, None)
339 |         pa_context_connect(self._context, None, 0, None)
340 | 
341 |         pa_threaded_mainloop_start(self._mainloop)
342 | 
343 |         # wait for audio source detection
344 |         cnt = 0
345 |         while (self.source_idx < 0) and (cnt < SOURCE_TIMEOUT):
346 |             cnt += 1
347 |             time.sleep (0.1)
348 |         if self.source_idx < 0:
349 |             raise Exception ("Pulserecorder: no suitable input source found.")
350 | 
351 | 
352 |     def stop_recording(self):
353 | 
354 |         logging.debug("stop_recording...")
355 | 
356 |         pa_threaded_mainloop_lock(self._mainloop)
357 |         pa_context_disconnect(self._context)
358 |         pa_context_unref(self._context)
359 |         pa_threaded_mainloop_unlock(self._mainloop)
360 | 
361 |         pa_threaded_mainloop_stop(self._mainloop)
362 |         pa_threaded_mainloop_free(self._mainloop)
363 | 
364 |         self.source_idx  = -1
365 | 
366 |     def context_notify_cb(self, context, _):
367 |         state = pa_context_get_state(context)
368 | 
369 |         if state == PA_CONTEXT_READY:
370 |             logging.debug("Pulseaudio connection ready...")
371 |             o = pa_context_get_source_info_list(context, self._source_info_cb, None)
372 |             pa_operation_unref(o)
373 | 
374 |         elif state == PA_CONTEXT_FAILED :
375 |             logging.error("Connection failed")
376 | 
377 |         elif state == PA_CONTEXT_TERMINATED:
378 |             logging.debug("Connection terminated")
379 | 
380 |     def source_info_cb(self, context, source_info_p, eol, __):
381 |         logging.debug("source_info_cb... eol: %d" % eol)
382 | 
383 |         if eol:
384 |             if not self.source_log:
385 |                 logging.info(u'audio source: %s' % self.source_description.decode('utf8','ignore'))
386 |                 logging.debug(u'name: %s' % text(self.source_name) )
387 |                 self.source_log = True
388 | 
389 |             if self.source_idx < 0:
390 |                 logging.error ("Pulserecorder: no suitable input source found.")
391 | 
392 |             #
393 |             # set volume first
394 |             #
395 | 
396 |             cvol = pa_cvolume()
397 |             cvol.channels = 1
398 |             cvol.values[0] = int((self.volume * PA_VOLUME_NORM) / 100)
399 | 
400 |             operation = pa_context_set_source_volume_by_index (self._context, self.source_idx, cvol, self._null_cb, None)
401 |             pa_operation_unref(operation)
402 | 
403 |             logging.debug('recording from %s' % self.source_name)
404 | 
405 |             samplespec = pa_sample_spec()
406 |             samplespec.channels = 2 if self._record_stereo else 1
407 |             samplespec.format   = PA_SAMPLE_S16LE
408 |             samplespec.rate     = self.rate
409 | 
410 |             pa_stream = pa_stream_new(context, b"pulserecorder", samplespec, None)
411 |             pa_stream_set_read_callback(pa_stream,
412 |                                         self._stream_read_cb,
413 |                                         self.source_idx)
414 | 
415 |             # flags = PA_STREAM_NOFLAGS
416 |             flags = PA_STREAM_ADJUST_LATENCY
417 |             
418 |             # buffer_attr = None
419 |             fragsize = self._frames_per_buffer*2
420 |             if self._record_stereo:
421 |                 fragsize *= 2
422 |             buffer_attr = pa_buffer_attr(-1, -1, -1, -1, fragsize=fragsize)
423 | 
424 |             pa_stream_connect_record(pa_stream,
425 |                                      self.source_name,
426 |                                      buffer_attr,
427 |                                      flags)
428 | 
429 |         if not source_info_p:
430 |             return
431 | 
432 |         source_info = source_info_p.contents
433 | 
434 |         logging.debug('index       : %d' % source_info.index)
435 |         logging.debug('name        : %s' % source_info.name)
436 |         logging.debug('description : %s' % source_info.description)
437 |         logging.debug('monitor of  : %d' % source_info.monitor_of_sink)
438 | 
439 |         if source_info.monitor_of_sink != PA_INVALID_INDEX:
440 |             logging.debug("ignoring source: monitor")
441 |             return
442 | 
443 |         score = 1
444 | 
445 |         if self.match_source_name and (text(self.match_source_name) in text(source_info.description)):
446 |             score += 100
447 | 
448 | 
449 |         # microphone source auto-detection magic
450 | 
451 |         # import pdb; pdb.set_trace()
452 | 
453 |         if source_info.ports:
454 |             score += 1
455 | 
456 |             mic_port = False
457 |             for pi in range(source_info.n_ports):
458 |                 if text('mic') in text(source_info.ports[pi].contents.name):
459 |                     logging.debug("mic port found")
460 |                     score += 1
461 |                     break
462 | 
463 |         logging.debug('source score: %d, highest score so far: %d' % (score, self.source_score))
464 | 
465 |         if score > self.source_score:
466 | 
467 |             self.source_idx         = source_info.index
468 |             self.source_score       = score
469 |             self.source_name        = source_info.name
470 |             self.source_description = source_info.description
471 | 
472 | 
473 |     def stream_read_cb(self, stream, length, index_incr):
474 |         data = ctypes.c_void_p()
475 |         pa_stream_peek(stream, data, ctypes.c_ulong(length))
476 |         data = ctypes.cast(data, ctypes.POINTER(ctypes.c_ubyte))
477 | 
478 |         self._lock.acquire()
479 | 
480 |         bytes_per_sample = 4 if self._record_stereo else 2
481 |         num_samples = int(length / bytes_per_sample)
482 | 
483 |         for i in range(num_samples):
484 | 
485 |             if self._mix_mode == MIX_MODE_BOTH:
486 |                 off_low  = 0
487 |                 off_high = 1
488 |             elif self._mix_mode == MIX_MODE_LEFT:
489 |                 off_low  = 0
490 |                 off_high = 1
491 |             elif self._mix_mode == MIX_MODE_RIGHT:
492 |                 off_low  = 2
493 |                 off_high = 3
494 | 
495 |             sample = data[i*bytes_per_sample +off_low ] + 256 * data[i*bytes_per_sample+off_high]
496 | 
497 |             self._buffers[len(self._buffers)-1][self._cur_buf_cnt] = sample
498 |             self._cur_buf_cnt += 1 
499 | 
500 |             # buffer full?
501 |             if self._cur_buf_cnt >= self._frames_per_buffer:
502 | 
503 |                 self._buffers.append(np.empty(self._frames_per_buffer, dtype=np.int16))
504 |                 self._cur_buf_cnt = 0
505 | 
506 |                 self._cond.notifyAll()
507 | 
508 | 
509 |         self._lock.release()
510 | 
511 |         pa_stream_drop(stream)
512 | 
513 | 
514 |     def get_samples(self):
515 | 
516 |         self._lock.acquire()
517 | 
518 |         buf = None
519 |         while len(self._buffers) < 2:
520 |             self._cond.wait()
521 | 
522 |         buf = self._buffers.pop(0)
523 | 
524 |         self._lock.release()
525 | 
526 |         return buf
527 | 
528 | 


--------------------------------------------------------------------------------
/nltools/sequiturclient.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding: utf-8 -*- 
  3 | 
  4 | #
  5 | # Copyright 2016, 2017 Guenter Bartsch
  6 | #
  7 | # Licensed under the Apache License, Version 2.0 (the "License");
  8 | # you may not use this file except in compliance with the License.
  9 | # You may obtain a copy of the License at
 10 | #
 11 | #     http://www.apache.org/licenses/LICENSE-2.0
 12 | #
 13 | # Unless required by applicable law or agreed to in writing, software
 14 | # distributed under the License is distributed on an "AS IS" BASIS,
 15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 16 | # See the License for the specific language governing permissions and
 17 | # limitations under the License.
 18 | #
 19 | #
 20 | # crude sequitur g2p interface
 21 | #
 22 | 
 23 | import logging
 24 | import tempfile
 25 | import traceback
 26 | 
 27 | import misc
 28 | 
 29 | from phonetics import xsampa2ipa
 30 | 
 31 | def sequitur_gen_ipa(modelfn, word):
 32 | 
 33 |     ipa = u''
 34 | 
 35 |     with tempfile.NamedTemporaryFile() as f:
 36 | 
 37 |         f.write((u'%s\n' % word).encode('utf8'))
 38 |         f.flush()
 39 | 
 40 |         cmd = ['g2p.py', '--encoding=UTF8', '--model', modelfn, '--apply', f.name]
 41 | 
 42 |         res = misc.run_command(cmd)
 43 | 
 44 |         logging.debug('%s' % ' '.join(cmd))
 45 | 
 46 |         for l in res:
 47 | 
 48 |             line = l.strip()
 49 | 
 50 |             logging.debug('%s' % line)
 51 | 
 52 |             if 'stack usage:' in line:
 53 |                 continue
 54 | 
 55 |             if word in line.decode('utf8', errors='ignore'):
 56 |                 parts = line.split('\t')
 57 | 
 58 |                 if len(parts) < 2:
 59 |                     continue
 60 | 
 61 |                 xs = parts[1]
 62 |                 # print 'XS', xs
 63 |            
 64 |                 ipa = xsampa2ipa(word, xs)
 65 | 
 66 |     return ipa
 67 | 
 68 | def sequitur_gen_ipa_multi(modelfn, words):
 69 | 
 70 |     ipa_map ={}
 71 | 
 72 |     with tempfile.NamedTemporaryFile() as f:
 73 | 
 74 |         for word in words:
 75 |             f.write((u'%s\n' % word).encode('utf8'))
 76 |         f.flush()
 77 | 
 78 |         cmd = ['g2p.py', '--encoding=UTF8', '--model', modelfn, '--apply', f.name]
 79 | 
 80 |         res = misc.run_command(cmd, capture_stderr=False)
 81 | 
 82 |         logging.debug('%s' % ' '.join(cmd))
 83 | 
 84 |         for l in res:
 85 | 
 86 |             line = l.strip()
 87 | 
 88 |             logging.debug('%s' % line)
 89 | 
 90 |             if 'stack usage:' in line:
 91 |                 continue
 92 | 
 93 |             parts = line.decode('utf8', errors='ignore').split('\t')
 94 | 
 95 |             if len(parts) < 2:
 96 |                 continue
 97 | 
 98 |             try:
 99 |                 word = parts[0]
100 |                 if word in words:
101 | 
102 |                     xs = parts[1]
103 |                     # print 'XS', xs
104 |                
105 |                     ipa = xsampa2ipa(word, xs)
106 |                     ipa_map[word] = ipa
107 |             except:
108 |                 logging.error("Error processing line %s:" % line)
109 |                 logging.error(traceback.format_exc())
110 | 
111 |     return ipa_map
112 | 
113 | 


--------------------------------------------------------------------------------
/nltools/threadpool.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*- 
 3 | 
 4 | #
 5 | # Copyright 2017 Guenter Bartsch
 6 | #
 7 | # Licensed under the Apache License, Version 2.0 (the "License");
 8 | # you may not use this file except in compliance with the License.
 9 | # You may obtain a copy of the License at
10 | #
11 | #     http://www.apache.org/licenses/LICENSE-2.0
12 | #
13 | # Unless required by applicable law or agreed to in writing, software
14 | # distributed under the License is distributed on an "AS IS" BASIS,
15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 | # See the License for the specific language governing permissions and
17 | # limitations under the License.
18 | #
19 | #
20 | # A simple thread pool implementation
21 | #
22 | from __future__ import print_function
23 | 
24 | import traceback
25 | import logging
26 | 
27 | from Queue import Queue, Empty
28 | from threading import Thread, Lock
29 | 
30 | class Worker(Thread):
31 |     """Thread executing tasks from a given tasks queue"""
32 |     def __init__(self, tasks, idx):
33 |         Thread.__init__(self)
34 |         self.tasks  = tasks
35 |         self.idx    = idx
36 |         #self.daemon = True
37 |         self.finish = False
38 |         self.start()
39 | 
40 |     def run(self):
41 |         while not self.finish:
42 | 
43 |             # print "worker #%2d" % self.idx
44 | 
45 |             try:
46 |                 func, args, kargs = self.tasks.get(True, 0.1)
47 |                 try:
48 |                     func(*args, **kargs)
49 |                 except:
50 |                     logging.error('ThreadPool Worker caught exception: %s' % traceback.format_exc())
51 |                     traceback.print_exc()
52 |                 finally:
53 |                     self.tasks.task_done()
54 | 
55 |             except Empty:
56 |                 # print "worker #%2d empty" % self.idx
57 |                 pass
58 | 
59 | 
60 | class ThreadPool:
61 |     """Pool of threads consuming tasks from a queue"""
62 |     def __init__(self, num_threads):
63 |         self.tasks = Queue()
64 |         self.terminal_lock = Lock()
65 |         self.workers = []
66 |         for idx in range(num_threads): 
67 |             self.workers.append(Worker(self.tasks, idx))
68 | 
69 |     def add_task(self, func, *args, **kargs):
70 |         """Add a task to the queue"""
71 |         self.tasks.put((func, args, kargs))
72 | 
73 |     def print_synced(self, s):
74 |         self.terminal_lock.acquire()
75 |         print(s)
76 |         self.terminal_lock.release()
77 | 
78 |     def shutdown(self):
79 |         print("shutdown: tasks.join...")
80 |         self.tasks.join()
81 |         print("shutdown: tasks.join...done. finishing workers...")
82 |         # for worker in self.workers:
83 |         #     worker.finish = True
84 |         #     worker.join()
85 | 
86 |         print("shutdown complete.")
87 | 
88 | 
89 | 


--------------------------------------------------------------------------------
/nltools/tts.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding: utf-8 -*- 
  3 | 
  4 | #
  5 | # Copyright 2016, 2017, 2018 Guenter Bartsch
  6 | #
  7 | # Licensed under the Apache License, Version 2.0 (the "License");
  8 | # you may not use this file except in compliance with the License.
  9 | # You may obtain a copy of the License at
 10 | #
 11 | #     http://www.apache.org/licenses/LICENSE-2.0
 12 | #
 13 | # Unless required by applicable law or agreed to in writing, software
 14 | # distributed under the License is distributed on an "AS IS" BASIS,
 15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 16 | # See the License for the specific language governing permissions and
 17 | # limitations under the License.
 18 | #
 19 | 
 20 | #
 21 | # Abstraction layer for multiple TTS engines (Mary TTS, SVOX Pico TTS and eSpeak NG at the moment)
 22 | # can run those locally or act as a client for our HTTP TTS server
 23 | #
 24 | 
 25 | import traceback
 26 | import json
 27 | import logging
 28 | import requests
 29 | import urllib
 30 | 
 31 | from base64                 import b64encode
 32 | from nltools.pulseplayer    import PulsePlayer
 33 | from nltools.phonetics      import ipa2mary, mary2ipa, ipa2xsampa, xsampa2ipa
 34 | from espeakng               import ESpeakNG
 35 | from marytts                import MaryTTS
 36 | 
 37 | MARY_VOICES = {
 38 | 
 39 |     'en_US': { 'male':   [ "cmu-rms-hsmm", "dfki-spike", "dfki-obadiah", "dfki-obadiah-hsmm", "cmu-bdl-hsmm"],
 40 |                'female': [ "cmu-slt-hsmm", "dfki-poppy", "dfki-poppy-hsmm", "dfki-prudence", "dfki-prudence-hsmm" ]
 41 |              },
 42 | 
 43 |     'de_DE': { 'male':   ["bits3", "bits3-hsmm", "dfki-pavoque-neutral", "dfki-pavoque-neutral-hsmm", "dfki-pavoque-styles"],
 44 |                'female': ["bits1-hsmm"]
 45 |              }
 46 |     }
 47 | DEFAULT_MARY_VOICE   = 'cmu-rms-hsmm'
 48 | DEFAULT_MARY_LOCALE  = 'en_US'
 49 | 
 50 | ESPEAK_VOICES = ['english-us', 'de']
 51 | 
 52 | class TTS(object):
 53 | 
 54 |     def __init__(self, 
 55 |                  host_tts    =        'local', 
 56 |                  port_tts    =           8300, 
 57 |                  locale      =        'en_US', 
 58 |                  engine      =         'mary', 
 59 |                  voice       = 'cmu-rms-hsmm',
 60 |                  pitch       =             50,  # 0-99
 61 |                  speed       =            175): # approx. words per minute
 62 | 
 63 |         self._host_tts = host_tts
 64 |         self._port_tts = port_tts
 65 |         self._locale   = locale
 66 |         self._engine   = engine
 67 |         self._voice    = voice
 68 |         self._pitch    = pitch
 69 |         self._speed    = speed
 70 | 
 71 |         if host_tts == 'local':
 72 |             self.player  = PulsePlayer('Local TTS Client')
 73 |             self.espeak  = ESpeakNG()
 74 |             self.marytts = MaryTTS()
 75 |             self.picotts = None # lazy-loading to reduce package dependencies
 76 | 
 77 |     @property
 78 |     def locale(self):
 79 |         return self._locale
 80 |     @locale.setter
 81 |     def locale(self, v):
 82 |         self._locale = v
 83 | 
 84 |     @property
 85 |     def engine(self):
 86 |         return self._engine
 87 |     @engine.setter
 88 |     def engine(self, v):
 89 |         self._engine = v
 90 | 
 91 |     @property
 92 |     def voice(self):
 93 |         return self._voice
 94 |     @voice.setter
 95 |     def voice(self, v):
 96 |         self._voice = v
 97 | 
 98 |     @property
 99 |     def pitch(self):
100 |         return self._pitch
101 |     @pitch.setter
102 |     def pitch(self, v):
103 |         self._pitch = v
104 | 
105 |     @property
106 |     def speed(self):
107 |         return self._speed
108 |     @speed.setter
109 |     def speed(self, v):
110 |         self._speed = v
111 | 
112 |     def synthesize(self, txt, mode='txt'):
113 | 
114 |         if self._host_tts == 'local':
115 | 
116 |             # import pdb; pdb.set_trace()
117 | 
118 |             wav = None
119 | 
120 |             if self.engine == 'mary':
121 | 
122 |                 self.marytts.voice  = self._voice
123 |                 self.marytts.locale = self._locale
124 | 
125 |                 if mode == 'txt':
126 |                     wav = self.marytts.synth_wav (txt)
127 |                 elif mode == 'ipa':
128 |                     xs = ipa2mary ('ipa', txt)
129 |                     wav = self.marytts.synth_wav (xs, fmt='xs')
130 |                 else:
131 |                     raise Exception ("unknown mary mode '%s'" % mode)
132 | 
133 |             elif self.engine == 'espeak':
134 | 
135 |                 if mode == 'txt':
136 | 
137 |                     self.espeak.voice = self._voice
138 |                     self.espeak.speed = self._speed
139 |                     self.espeak.pitch = self._pitch
140 |                     wav = self.espeak.synth_wav (txt)
141 |                     # logging.debug ('synthesize: %s %s -> %s' % (txt, mode, repr(wav)))
142 | 
143 |                 elif mode == 'ipa':
144 |                     xs = ipa2xsampa ('ipa', txt)
145 |                     logging.debug ('synthesize: %s %s -> %s' % (txt, mode, repr(xs)))
146 |                     wav = self.espeak.synth_wav (xs, fmt='xs')
147 | 
148 |             elif self.engine == 'pico':
149 | 
150 |                 if mode == 'txt':
151 | 
152 |                     if not self.picotts:
153 |                         from picotts import PicoTTS
154 |                         self.picotts = PicoTTS()
155 | 
156 |                     self.picotts.voice = self._voice
157 |                     wav = self.picotts.synth_wav (txt)
158 |                     # logging.debug ('synthesize: %s %s -> %s' % (txt, mode, repr(wav)))
159 | 
160 |                 else:
161 |                     raise Exception ("unknown pico mode '%s'" % mode)
162 |             else:
163 | 
164 |                 raise Exception ("unknown engine '%s'" % self.engine)
165 | 
166 |         else:
167 | 
168 |             args = {'l': self._locale,
169 |                     'v': self._voice,
170 |                     'e': self._engine,
171 |                     'm': mode,
172 |                     't': txt.encode('utf8')}
173 |             url = 'http://%s:%s/tts/synth?%s' % (self._host_tts, self._port_tts, urllib.urlencode(args))
174 | 
175 |             response = requests.get(url)
176 | 
177 |             if response.status_code != 200:
178 |                 return None
179 | 
180 |             wav = response.content
181 | 
182 |         if wav:
183 |             logging.debug ('synthesize: %s %s -> WAV' % (txt, mode))
184 |         else:
185 |             logging.error ('synthesize: %s %s -> NO WAV' % (txt, mode))
186 | 
187 |         return wav
188 | 
189 |     def play_wav (self, wav, async=False):
190 | 
191 |         if self._host_tts == 'local':
192 | 
193 |             if wav:
194 |                 self.player.play(wav, async)
195 |             else:
196 |                 raise Exception ('no wav given')
197 | 
198 |         else:
199 | 
200 |             url = 'http://%s:%s/tts/play' % (self._host_tts, self._port_tts)
201 |                           
202 |             if async:
203 |                 url += '?async=t'
204 | 
205 |             response = requests.post(url, data=wav)
206 | 
207 |     def say (self, utterance, async=False):
208 | 
209 |         wav = self.synthesize(utterance)
210 |         self.play_wav(wav, async=async)
211 | 
212 |     def say_ipa (self, ipa, async=False):
213 | 
214 |         wav = self.synthesize(ipa, mode='ipa')
215 |         self.play_wav(wav, async=async)
216 | 
217 |     def gen_ipa (self, word):
218 | 
219 |         if self._host_tts == 'local':
220 | 
221 |             if self.engine == 'mary':
222 | 
223 |                 self.marytts.voice  = self._voice
224 |                 self.marytts.locale = self._locale
225 | 
226 |                 mp = self.marytts.g2p (word)
227 |                 return mary2ipa(word, mp)
228 | 
229 |             elif self.engine == 'espeak':
230 | 
231 |                 self.espeak.voice = self._voice
232 |                 e_ipa = self.espeak.g2p (word, ipa='2')
233 |                 xs = ipa2xsampa(word, e_ipa)
234 |                 ipa = xsampa2ipa(word, xs)
235 | 
236 |                 logging.debug (u'espeak g2p: %s -> %s -> %s -> %s' % (word, e_ipa, xs, ipa))
237 | 
238 |                 return ipa
239 | 
240 |             elif self.engine == 'sequitur':
241 | 
242 |                 if not self.voice in SEQUITUR_MODELS:
243 |                     raise Exception ("no sequitur model for voice '%s'" % self.voice)
244 | 
245 |                 return sequitur_gen_ipa (SEQUITUR_MODELS[self.voice], word)
246 | 
247 |             else:
248 |                 raise Exception ("unknown engine '%s'" % self.engine)
249 | 
250 | 
251 |         else:
252 |             args = {'l': self._locale,
253 |                     'v': self._voice,
254 |                     'e': self._engine,
255 |                     't': word.encode('utf8')}
256 |             url = 'http://%s:%s/tts/g2p?%s' % (self._host_tts, self._port_tts, urllib.urlencode(args))
257 | 
258 |             response = requests.get(url)
259 | 
260 |             if response.status_code != 200:
261 |                 return None
262 | 
263 |             return response.json()['ipa']
264 | 
265 | 


--------------------------------------------------------------------------------
/nltools/vad.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding: utf-8 -*- 
  3 | 
  4 | #
  5 | # Copyright 2013, 2014, 2016, 2017, 2018 Guenter Bartsch
  6 | #
  7 | # Licensed under the Apache License, Version 2.0 (the "License");
  8 | # you may not use this file except in compliance with the License.
  9 | # You may obtain a copy of the License at
 10 | #
 11 | #     http://www.apache.org/licenses/LICENSE-2.0
 12 | #
 13 | # Unless required by applicable law or agreed to in writing, software
 14 | # distributed under the License is distributed on an "AS IS" BASIS,
 15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 16 | # See the License for the specific language governing permissions and
 17 | # limitations under the License.
 18 | #
 19 | 
 20 | #
 21 | # Voice Activity Detection (VAD) state machine
 22 | #
 23 | 
 24 | import logging
 25 | import webrtcvad
 26 | 
 27 | SAMPLE_RATE           = 16000
 28 | BUFFER_DURATION       = 30 # ms
 29 | RING_BUF_ENTRIES      =  5 * 60 * 1000 / BUFFER_DURATION # 5 minutes max
 30 | 
 31 | MIN_UTT_LENGTH        = 0.4 # seconds
 32 | MAX_UTT_LENGTH        = 12  # seconds
 33 | MAX_UTT_GAP           = 0.7 # seconds
 34 | 
 35 | STATE_IDLE            =  0
 36 | 
 37 | STATE_PRE_SPEECH      =  1
 38 | STATE_PRE_GAP         =  2
 39 | 
 40 | STATE_SPEECH          =  3
 41 | STATE_GAP             =  4
 42 | 
 43 | STATE_IGNORE          =  5
 44 | STATE_IGNORE_GAP      =  6
 45 | 
 46 | FRAME_STAT_CNT        =   300
 47 | LOW_VOLUME_THRESH     =   100
 48 | HIGH_VOLUME_THRESH    = 25000
 49 | 
 50 | class VAD(object):
 51 | 
 52 |     def __init__(self, aggressiveness=2, sample_rate=SAMPLE_RATE,
 53 |                  min_utt_length = MIN_UTT_LENGTH,
 54 |                  max_utt_length = MAX_UTT_LENGTH,
 55 |                  max_utt_gap    = MAX_UTT_GAP):
 56 | 
 57 | 
 58 |         self.sample_rate = sample_rate
 59 | 
 60 |         self.vad = webrtcvad.Vad()
 61 |         self.vad.set_mode(aggressiveness)
 62 | 
 63 |         self.state          = STATE_IDLE
 64 |         self.buf            = []
 65 |         self.buf_sent       = 0
 66 | 
 67 |         self.min_buf_entries = int(min_utt_length * 1000) / BUFFER_DURATION 
 68 |         self.max_buf_entries = int(max_utt_length * 1000) / BUFFER_DURATION
 69 |         self.max_gap         = int(max_utt_gap    * 1000) / BUFFER_DURATION
 70 | 
 71 |         self.frame_cnt       = 0
 72 |         self.avg_vol_sum     = 0.0
 73 |         self.avg_vol_cnt     = 0
 74 | 
 75 |     def _return_audio (self, finalize):
 76 | 
 77 |         res = []
 78 | 
 79 |         buf_max = len(self.buf)-1
 80 | 
 81 |         while self.buf_sent <= buf_max:
 82 |             res.extend(self.buf[self.buf_sent].tolist())
 83 |             self.buf_sent += 1
 84 | 
 85 |         return res, finalize
 86 | 
 87 |     def process_audio (self, audio):
 88 | 
 89 |         cur_frame = audio
 90 | 
 91 |         # give feedback if volume too low / too high
 92 |         if self.frame_cnt <= FRAME_STAT_CNT:
 93 | 
 94 |             for sample in audio:
 95 |                 self.avg_vol_sum += abs(sample)
 96 |                 self.avg_vol_cnt += 1
 97 | 
 98 |             self.frame_cnt += 1
 99 |             if self.frame_cnt == FRAME_STAT_CNT:
100 | 
101 |                 # import pdb; pdb.set_trace()
102 | 
103 |                 self.avg_vol_sum /= float(self.avg_vol_cnt)
104 | 
105 |                 if self.avg_vol_sum < LOW_VOLUME_THRESH:
106 |                     logging.error ('VAD: audio volume too low or wrong source?')
107 |                 elif self.avg_vol_sum > HIGH_VOLUME_THRESH:
108 |                     logging.error ('VAD: audio volume too high or wrong source?')
109 | 
110 |         vad_res = self.vad.is_speech(audio.tobytes(), self.sample_rate)
111 | 
112 |         if self.state == STATE_IDLE:
113 |             if vad_res:
114 |                 self.state       = STATE_PRE_SPEECH
115 |                 self.buf         = [ cur_frame ]
116 |                 self.buf_sent    = 0
117 | 
118 |         elif self.state == STATE_PRE_SPEECH:
119 |             self.buf.append(cur_frame)
120 |             if vad_res: 
121 |                 if len (self.buf) > self.min_buf_entries:
122 |                     logging.debug ("*** SPEECH DETECTED at frame %3d ***" % len(self.buf))
123 |                     self.state = STATE_SPEECH
124 | 
125 |             else:
126 |                 self.state     = STATE_PRE_GAP
127 |                 self.gap_start = len(self.buf)
128 | 
129 |         elif self.state == STATE_PRE_GAP:
130 |             self.buf.append(cur_frame)
131 | 
132 |             if vad_res:
133 |                 self.state = STATE_PRE_SPEECH
134 | 
135 |             else:
136 |                 gap_len = len(self.buf) - self.gap_start
137 |                 if gap_len > self.max_gap:
138 |                     logging.debug ("*** PRE GAP (%d) TOO LONG at frame %3d ***" % (gap_len, len(self.buf)))
139 |                     self.state = STATE_IDLE
140 | 
141 |         elif self.state == STATE_SPEECH:
142 |             self.buf.append(cur_frame)
143 | 
144 |             # check if attention span is over
145 |             if len (self.buf) > self.max_buf_entries:
146 |                 logging.debug ("*** START OF IGNORE at frame %3d ***" % len(self.buf))
147 |                 self.state = STATE_IGNORE
148 |                 return self._return_audio(True)
149 | 
150 |             else:
151 |                 if not vad_res:
152 |                     logging.debug ("*** START OF GAP at frame %3d ***" % len(self.buf))
153 |                     self.state     = STATE_GAP
154 |                     self.gap_start = len(self.buf)
155 |                 return self._return_audio(False)
156 | 
157 |         elif self.state == STATE_GAP:
158 |             self.buf.append(cur_frame)
159 | 
160 |             gap_len = len(self.buf) - self.gap_start
161 |             if vad_res:
162 |                 self.state = STATE_SPEECH
163 |                 logging.debug ("*** END OF GAP (%d < %d) at frame %3d ***" % (gap_len, self.max_gap, len(self.buf)))
164 |                 return self._return_audio(False)
165 | 
166 |             else:
167 |                 if gap_len > self.max_gap:
168 |                     logging.debug ("*** GAP (%d > %d) TOO LONG at frame %3d ***" % (gap_len, self.max_gap, len(self.buf)))
169 |                     self.state = STATE_IDLE
170 |                     return self._return_audio(True)
171 |                 else:
172 |                     return self._return_audio(False)
173 | 
174 |         elif self.state == STATE_IGNORE:
175 |             self.buf.append(cur_frame)
176 |             if not vad_res:
177 |                 self.state     = STATE_IGNORE_GAP
178 |                 self.gap_start = len(self.buf)
179 | 
180 |         elif self.state == STATE_IGNORE_GAP:
181 |             self.buf.append(cur_frame)
182 |             if vad_res:
183 |                 self.state = STATE_IGNORE
184 |             else:
185 |                 gap_len = len(self.buf) - self.gap_start
186 |                 if gap_len > self.max_gap:
187 |                     logging.debug ("*** end of ignore at frame %3d ***" % len(self.buf))
188 |                     self.state = STATE_IDLE
189 | 
190 |         return None, False
191 | 
192 | 


--------------------------------------------------------------------------------
/run_tests.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | nosetests
4 | 
5 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import setup
 2 | 
 3 | setup(
 4 |     name                 = 'py-nltools',
 5 |     version              = '0.4.0',
 6 |     description          = 'A collection of basic python modules for spoken natural language processing',
 7 |     long_description     = open('README.adoc').read(),
 8 |     author               = 'Guenter Bartsch',
 9 |     author_email         = 'guenter@zamia.org',
10 |     maintainer           = 'Guenter Bartsch',
11 |     maintainer_email     = 'guenter@zamia.org',
12 |     url                  = 'https://github.com/gooofy/py-nltools',
13 |     packages             = ['nltools'],
14 |     install_requires     = [
15 |                             'num2words', 'py-marytts', 'py-picotts', 'py-espeak-ng', 'pocketsphinx', 'py-kaldi-asr', 'numpy', 'webrtcvad', 'setproctitle'
16 |                            ],
17 |     classifiers          = [
18 |                                'Operating System :: POSIX :: Linux',
19 |                                'License :: OSI Approved :: Apache Software License',
20 |                                'Programming Language :: Python :: 2',
21 |                                'Programming Language :: Python :: 2.7',
22 |                                'Programming Language :: Python :: 3',
23 |                                'Programming Language :: Python :: 3.5',
24 |                                'Intended Audience :: Developers',
25 |                                'Topic :: Multimedia :: Sound/Audio :: Speech',
26 |                                'Topic :: Scientific/Engineering :: Artificial Intelligence'
27 |                            ],
28 |     license              = 'Apache',
29 |     keywords             = 'natural language processing tokenizer nlp tts asr speech synthesis recognition',
30 |     )
31 | 
32 | 


--------------------------------------------------------------------------------
/tests/test_asr.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding: utf-8 -*- 
  3 | 
  4 | #
  5 | # Copyright 2014, 2016, 2017, 2018 Guenter Bartsch
  6 | #
  7 | # Licensed under the Apache License, Version 2.0 (the "License");
  8 | # you may not use this file except in compliance with the License.
  9 | # You may obtain a copy of the License at
 10 | #
 11 | #     http://www.apache.org/licenses/LICENSE-2.0
 12 | #
 13 | # Unless required by applicable law or agreed to in writing, software
 14 | # distributed under the License is distributed on an "AS IS" BASIS,
 15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 16 | # See the License for the specific language governing permissions and
 17 | # limitations under the License.
 18 | #
 19 | 
 20 | import unittest
 21 | import logging
 22 | import wave
 23 | import struct
 24 | 
 25 | from nltools.asr import ASR, ASR_ENGINE_NNET3, ASR_ENGINE_POCKETSPHINX
 26 | from nltools     import misc
 27 | 
 28 | TEST_WAVE_EN       = 'tests/foo.wav'
 29 | TEST_WAVE_EN_TS    = 'ah indeed'
 30 | TEST_WAVE_EN_TS_PS = 'aha in dayton'
 31 | 
 32 | POCKETSPHINX_MODELDIR  = 'models/cmusphinx-cont-generic-en-latest'
 33 | POCKETSPHINX_MODELNAME = 'voxforge'
 34 | 
 35 | class TestASR (unittest.TestCase):
 36 | 
 37 |     def test_asr_kaldi(self):
 38 | 
 39 |         asr = ASR(engine = ASR_ENGINE_NNET3)
 40 | 
 41 |         wavf = wave.open(TEST_WAVE_EN, 'rb')
 42 | 
 43 |         # check format
 44 |         self.assertEqual(wavf.getnchannels(), 1)
 45 |         self.assertEqual(wavf.getsampwidth(), 2)
 46 | 
 47 |         # process file in 250ms chunks
 48 | 
 49 |         chunk_frames = 250 * wavf.getframerate() / 1000
 50 |         tot_frames   = wavf.getnframes()
 51 | 
 52 |         num_frames = 0
 53 |         while num_frames < tot_frames:
 54 | 
 55 |             finalize = False
 56 |             if (num_frames + chunk_frames) < tot_frames:
 57 |                 nframes = chunk_frames
 58 |             else:
 59 |                 nframes = tot_frames - num_frames
 60 |                 finalize = True
 61 | 
 62 |             frames = wavf.readframes(nframes)
 63 |             num_frames += nframes
 64 |             samples = struct.unpack_from('<%dh' % nframes, frames)
 65 | 
 66 |             s, l = asr.decode(samples, finalize, wavf.getframerate())
 67 | 
 68 |         wavf.close()
 69 | 
 70 |         self.assertEqual(s.strip(), TEST_WAVE_EN_TS)
 71 | 
 72 |     def test_asr_kaldi_wavefile(self):
 73 |         asr = ASR(engine = ASR_ENGINE_NNET3)
 74 |         s, l = asr.decode_wav_file(TEST_WAVE_EN)
 75 |         self.assertEqual(s.strip(), TEST_WAVE_EN_TS)
 76 | 
 77 |     def test_asr_pocketsphinx(self):
 78 | 
 79 |         asr = ASR(engine = ASR_ENGINE_POCKETSPHINX, model_dir = POCKETSPHINX_MODELDIR, model_name = POCKETSPHINX_MODELNAME)
 80 | 
 81 |         wavf = wave.open(TEST_WAVE_EN, 'rb')
 82 | 
 83 |         # check format
 84 |         self.assertEqual(wavf.getnchannels(), 1)
 85 |         self.assertEqual(wavf.getsampwidth(), 2)
 86 | 
 87 |         # process file in 250ms chunks
 88 | 
 89 |         chunk_frames = 250 * wavf.getframerate() / 1000
 90 |         tot_frames   = wavf.getnframes()
 91 | 
 92 |         num_frames = 0
 93 |         while num_frames < tot_frames:
 94 | 
 95 |             finalize = False
 96 |             if (num_frames + chunk_frames) < tot_frames:
 97 |                 nframes = chunk_frames
 98 |             else:
 99 |                 nframes = tot_frames - num_frames
100 |                 finalize = True
101 | 
102 |             frames = wavf.readframes(nframes)
103 |             num_frames += nframes
104 |             samples = struct.unpack_from('<%dh' % nframes, frames)
105 | 
106 |             s, l = asr.decode(samples, finalize, wavf.getframerate())
107 | 
108 |             if not finalize:
109 |                 self.assertEqual(s, None)
110 | 
111 |         wavf.close()
112 | 
113 |         self.assertEqual(s.strip(), TEST_WAVE_EN_TS_PS)
114 | 
115 |     def test_asr_pocketsphinx_wavefile(self):
116 |         asr = ASR(engine = ASR_ENGINE_POCKETSPHINX, model_dir = POCKETSPHINX_MODELDIR, model_name = POCKETSPHINX_MODELNAME)
117 |         s, l = asr.decode_wav_file(TEST_WAVE_EN)
118 |         self.assertEqual(s.strip(), TEST_WAVE_EN_TS_PS)
119 | 
120 | 
121 | if __name__ == "__main__":
122 | 
123 |     # logging.basicConfig(level=logging.ERROR)
124 |     logging.basicConfig(level=logging.DEBUG)
125 | 
126 |     unittest.main()
127 | 
128 | 


--------------------------------------------------------------------------------
/tests/test_macro_engine.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*- 
 3 | 
 4 | #
 5 | # Copyright 2018 Guenter Bartsch
 6 | #
 7 | # Licensed under the Apache License, Version 2.0 (the "License");
 8 | # you may not use this file except in compliance with the License.
 9 | # You may obtain a copy of the License at
10 | #
11 | #     http://www.apache.org/licenses/LICENSE-2.0
12 | #
13 | # Unless required by applicable law or agreed to in writing, software
14 | # distributed under the License is distributed on an "AS IS" BASIS,
15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 | # See the License for the specific language governing permissions and
17 | # limitations under the License.
18 | #
19 | 
20 | import unittest
21 | import logging
22 | 
23 | from nltools.macro_engine  import MacroEngine
24 | from nltools               import misc
25 | 
26 | class TestME (unittest.TestCase):
27 | 
28 |     def test_implicit_me(self):
29 | 
30 |         me = MacroEngine()
31 | 
32 |         expansions = me.expand_macros('en', "(a|b|c) (c|d|e) foo")
33 | 
34 |         logging.debug(repr(expansions))
35 | 
36 |         self.assertEqual(len(expansions), 9)
37 |         self.assertEqual(u" ".join(expansions[0][0]), u"c e foo")
38 | 
39 |     def test_explicit_me(self):
40 | 
41 |         me = MacroEngine()
42 | 
43 |         me.add_macro_expansion("prefix",   u"")
44 |         me.add_macro_expansion("prefix",   u"please")
45 |         me.add_macro_expansion("prefix",   u"computer")
46 |         me.add_macro_expansion("location", u"living room")
47 |         me.add_macro_expansion("location", u"bedroom")
48 |         me.add_macro_expansion("location", u"kitchen")
49 | 
50 | 
51 |         expansions = me.expand_macros('en', "{prefix:W} switch (on|off) the light in the {location:W}")
52 | 
53 |         logging.debug(repr(expansions))
54 | 
55 |         self.assertEqual(len(expansions), 18)
56 |         self.assertEqual(u" ".join(expansions[0][0]), u"computer switch off the light in the kitchen")
57 | 
58 | if __name__ == "__main__":
59 | 
60 |     # logging.basicConfig(level=logging.ERROR)
61 |     logging.basicConfig(level=logging.DEBUG)
62 | 
63 |     unittest.main()
64 | 
65 | 


--------------------------------------------------------------------------------
/tests/test_misc.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding: utf-8 -*- 
  3 | 
  4 | #
  5 | # Copyright 2013, 2014, 2016, 2017, 2018 Guenter Bartsch
  6 | #
  7 | # Licensed under the Apache License, Version 2.0 (the "License");
  8 | # you may not use this file except in compliance with the License.
  9 | # You may obtain a copy of the License at
 10 | #
 11 | #     http://www.apache.org/licenses/LICENSE-2.0
 12 | #
 13 | # Unless required by applicable law or agreed to in writing, software
 14 | # distributed under the License is distributed on an "AS IS" BASIS,
 15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 16 | # See the License for the specific language governing permissions and
 17 | # limitations under the License.
 18 | #
 19 | 
 20 | import shutil
 21 | import tempfile
 22 | import os.path
 23 | import unittest
 24 | import logging
 25 | 
 26 | from nltools           import misc
 27 | from nltools.tokenizer import tokenize
 28 | 
 29 | class TestMisc (unittest.TestCase):
 30 | 
 31 |     def setUp(self):
 32 |         self.test_dir = tempfile.mkdtemp()
 33 | 
 34 |     def tearDown(self):
 35 |         shutil.rmtree(self.test_dir)
 36 | 
 37 |     def test_load_config(self):
 38 | 
 39 |         cfg = misc.load_config('.speechrc')
 40 | 
 41 |         host = cfg.get('tts', 'host')
 42 | 
 43 |         self.assertEqual (host, 'local')
 44 | 
 45 | 
 46 |     def test_compress_ws(self):
 47 | 
 48 |         self.assertEqual (misc.compress_ws(u'   abc cde   12   '), u' abc cde 12')
 49 | 
 50 |     def test_run_command(self):
 51 | 
 52 |         txt = ''
 53 |         for line in misc.run_command(['uname', '-a']):
 54 |             txt += line.strip()
 55 | 
 56 |         self.assertEqual('Linux' in txt, True)
 57 | 
 58 |     def test_tex(self):
 59 | 
 60 |         self.assertEqual(misc.tex_decode('"uber'), u'\xfcber')
 61 |         self.assertEqual(misc.tex_decode('da"s'), u'daß')
 62 | 
 63 |         self.assertEqual(misc.tex_encode(u'über'), '"uber')
 64 |         self.assertEqual(misc.tex_encode(u'daß'), 'da"s')
 65 | 
 66 |     def test_edit_distance(self):
 67 | 
 68 |         self.assertEqual (misc.edit_distance('hubba', 'hubba'), 0)
 69 |         self.assertEqual (misc.edit_distance('hubba', 'hubb'), 1)
 70 |         self.assertEqual (misc.edit_distance('hubba', 'hub'), 2)
 71 |         self.assertEqual (misc.edit_distance('hubba', 'bba'), 2)
 72 | 
 73 |         self.assertEqual (misc.edit_distance(
 74 |                              tokenize(u'die leistung wurde zurückverlangt'), 
 75 |                              tokenize(u'die leistung wurde zurückverlangt')), 0)
 76 |         self.assertEqual (misc.edit_distance(
 77 |                              tokenize(u'die leistung wurde'), 
 78 |                              tokenize(u'die leistung wurde zurückverlangt')), 1)
 79 |         self.assertEqual (misc.edit_distance(
 80 |                              tokenize(u'DIE LEISTUNG'), 
 81 |                              tokenize(u'DIE LEISTUNG WURDE ZURÜCKVERLANGT')), 2)
 82 |         self.assertEqual (misc.edit_distance(
 83 |                              tokenize(u'DIE'), 
 84 |                              tokenize(u'DIE LEISTUNG WURDE ZURÜCKVERLANGT')), 3)
 85 |         self.assertEqual (misc.edit_distance(
 86 |                              tokenize(u'DIE ZURÜCKVERLANGT'), 
 87 |                              tokenize(u'DIE LEISTUNG WURDE ZURÜCKVERLANGT')), 2)
 88 |         self.assertEqual (misc.edit_distance(
 89 |                              tokenize(u'DIE LEISTUNG WURDE ZURÜCKVERLANGT'), 
 90 |                              tokenize(u'LEISTUNG WURDE ZURÜCKVERLANGT')), 1)
 91 |         self.assertEqual (misc.edit_distance(
 92 |                              tokenize(u'DIE LEISTUNG WURDE ZURÜCKVERLANGT'), 
 93 |                              tokenize(u'WURDE ZURÜCKVERLANGT')), 2)
 94 |         self.assertEqual (misc.edit_distance(
 95 |                              tokenize(u'DIE LEISTUNG WURDE ZURÜCKVERLANGT'), 
 96 |                              tokenize(u'ZURÜCKVERLANGT')), 3)
 97 |         self.assertEqual (misc.edit_distance(
 98 |                              tokenize(u'DIE LEISTUNG WURDE ZURÜCKVERLANGT'), 
 99 |                              tokenize(u'')), 4)
100 |         self.assertEqual (misc.edit_distance(
101 |                              tokenize(u'DIE LEISTUNG WURDE ZURÜCKVERLANGT'), 
102 |                              tokenize(u'LEISTUNG FOO ZURÜCKVERLANGT')), 2)
103 |         self.assertEqual (misc.edit_distance(
104 |                              tokenize(u'SIE IST FÜR DIE LEISTUNG DANKBAR'), 
105 |                              tokenize(u'SIE STRITTIG LEISTUNG DANKBAR')), 3)
106 | 
107 |     def test_limit_str(self):
108 | 
109 |         self.assertEqual(misc.limit_str('1234567890', 10), '1234567890')
110 |         self.assertEqual(misc.limit_str('1234567890',  9), '123456...')
111 | 
112 |     def test_render_template(self):
113 |         # given
114 |         template_text = """VAR1={{val1}}
115 |         VAR2={{val2}}
116 |         """
117 | 
118 |         val1 = "v1"
119 |         val2 = "v2"
120 | 
121 |         expected_text = """VAR1=%s
122 |         VAR2=%s
123 |         """ % (val1, val2)
124 | 
125 |         src_path = os.path.join(str(self.test_dir), "src.txt")
126 |         dst_path = os.path.join(str(self.test_dir), "dst.txt")
127 | 
128 |         with open(src_path, "wt") as f:
129 |             f.write(template_text)
130 | 
131 |         # when
132 |         misc.render_template(src_path, dst_path, val1=val1, val2=val2)
133 | 
134 |         # then
135 |         with open(dst_path) as f:
136 |             actual_text = f.read()
137 | 
138 |         self.assertEqual(expected_text, actual_text)
139 | 
140 | 
141 | if __name__ == "__main__":
142 | 
143 |     logging.basicConfig(level=logging.ERROR)
144 |     
145 |     unittest.main()
146 | 
147 | 


--------------------------------------------------------------------------------
/tests/test_phonetics.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding: utf-8 -*- 
  3 | 
  4 | #
  5 | # Copyright 2013, 2014, 2016, 2017 Guenter Bartsch
  6 | #
  7 | # Licensed under the Apache License, Version 2.0 (the "License");
  8 | # you may not use this file except in compliance with the License.
  9 | # You may obtain a copy of the License at
 10 | #
 11 | #     http://www.apache.org/licenses/LICENSE-2.0
 12 | #
 13 | # Unless required by applicable law or agreed to in writing, software
 14 | # distributed under the License is distributed on an "AS IS" BASIS,
 15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 16 | # See the License for the specific language governing permissions and
 17 | # limitations under the License.
 18 | #
 19 | 
 20 | import logging
 21 | import unittest
 22 | from nltools.phonetics import ipa2xsampa, ipa2mary, xsampa2xarpabet, xs2xa_table, xsampa2ipa
 23 | 
 24 | class TestPhoneticAlphabets (unittest.TestCase):
 25 | 
 26 |     def setUp(self):
 27 |         self.seq = range(10)
 28 | 
 29 |     def test_ipa(self):
 30 | 
 31 |         res = ipa2xsampa ("EISENBAHN", u"ˈaɪ̯zən̩ˌbaːn")
 32 |         #print "res: %s" % res
 33 |         self.assertEqual (res, "'aIz@nba:n")
 34 | 
 35 |         res = ipa2xsampa ("DIPHTONGTEST", u"aɪɔɪaʊɜ'")
 36 |         #print "res: %s" % res
 37 |         self.assertEqual (res, "aIOIaU3")
 38 | 
 39 |         res = ipa2xsampa ("BON", u"bɔ̃")
 40 |         #print "res: %s" % res
 41 |         self.assertEqual (res, "bO~")
 42 | 
 43 |         res = ipa2xsampa ("RESTAURANT", u"ʁɛstɔʁɑ̃")
 44 |         #print "res: %s" % res
 45 |         self.assertEqual (res, "REstORA~")
 46 | 
 47 |         res = ipa2xsampa ("VIN", u"vɛ̃")
 48 |         #print "res: %s" % res
 49 |         self.assertEqual (res, "vE~")
 50 | 
 51 |         res = ipa2xsampa ("BRUN", u"bʁœ̃")
 52 |         #print "res: %s" % res
 53 |         self.assertEqual (res, "bR9~")
 54 | 
 55 |         res = ipa2xsampa ("POIGNANT", u"pwaɲɑ̃")
 56 |         #print "res: %s" % res
 57 |         self.assertEqual (res, "pwaJA~")
 58 | 
 59 |         res = ipa2mary ("EISENBAHN", u"ˈaɪ̯zən̩ˌbaːn")
 60 |         #print "res: %s" % res
 61 |         self.assertEqual (res, "'aIz@nba:n")
 62 | 
 63 |         res = ipa2mary ("DIPHTONGTEST", u"aɪɔɪaʊɜ'")
 64 |         #print "res: %s" % res
 65 |         self.assertEqual (res, "aIOIaUr='")
 66 | 
 67 |         res = ipa2mary ("BON", u"bɔ̃")
 68 |         #print "res: %s" % res
 69 |         self.assertEqual (res, "bO~")
 70 | 
 71 |         res = ipa2mary ("RESTAURANT", u"ʁɛstɔʁɑ̃")
 72 |         #print "res: %s" % res
 73 |         self.assertEqual (res, "REstORA~")
 74 | 
 75 |         res = ipa2mary ("VIN", u"vɛ̃")
 76 |         #print "res: %s" % res
 77 |         self.assertEqual (res, "vE~")
 78 | 
 79 |         res = ipa2mary ("BRUN", u"bʁœ̃")
 80 |         #print "res: %s" % res
 81 |         self.assertEqual (res, "bR9~")
 82 | 
 83 |         res = ipa2mary ("POIGNANT", u"pwaɲɑ̃")
 84 |         #print "res: %s" % res
 85 |         self.assertEqual (res, "pwaJA~")
 86 | 
 87 |         res = xsampa2ipa(u"entrée A~ t R e", u"A~ t R e")
 88 |         #print "res: %s" % res
 89 |         self.assertEqual (res, u"ɑ̃tʁe")
 90 | 
 91 |     def test_xarpa(self):
 92 | 
 93 |         res = xsampa2xarpabet ("JAHRHUNDERTE", "ja:6-'hUn-d6-t@")
 94 |         #print "res: %s" % res
 95 |         self.assertEqual (res, "Y AAH EX HH UU N D EX T AX")
 96 | 
 97 |         res = xsampa2xarpabet ("ABGESCHRIEBEN", "'ap-g@-SRi:-b@n")
 98 |         #print "res: %s" % res
 99 |         self.assertEqual (res, "AH P G AX SH RR IIH B AX N")
100 | 
101 |         res = xsampa2xarpabet ("ZUGEGRIFFEN", "'tsu:-g@-gRI-f@n")
102 |         #print "res: %s" % res
103 |         self.assertEqual (res, "TS UUH G AX G RR IH F AX N")
104 | 
105 |         res = xsampa2xarpabet ("AUSLEGUNG", "'aU-sle:-gUN")
106 |         #print "res: %s" % res
107 |         self.assertEqual (res, "AW S L EEH G UU NG")
108 | 
109 |         res = xsampa2xarpabet ("BON", "bO~")
110 |         #print "res: %s" % res
111 |         self.assertEqual (res, "B ON")
112 | 
113 |         res = xsampa2xarpabet ("RESTAURANT", "REstORA~")
114 |         #print "res: %s" % res
115 |         self.assertEqual (res, "RR EH S T OO RR AN")
116 | 
117 |         res = xsampa2xarpabet ("VIN", u"vE~")
118 |         #print "res: %s" % res
119 |         self.assertEqual (res, "V EN")
120 | 
121 |         res = xsampa2xarpabet ("BRUN", u"bR9~")
122 |         #print "res: %s" % res
123 |         self.assertEqual (res, "B RR OEN")
124 | 
125 |         res = xsampa2xarpabet ("POIGNANT", u"pwaJA~")
126 |         #print "res: %s" % res
127 |         self.assertEqual (res, "P W AH NJ AN")
128 | 
129 |     def test_xarpa_unique(self):
130 | 
131 |         # all xarpa transcriptions have to be unique
132 | 
133 |         uniq_xs = set()
134 |         uniq_xa = set()
135 | 
136 |         for entry in xs2xa_table:
137 |             xs = entry[0]
138 |             xa = entry[1]
139 |             #print (u"xs: %s, xa: %s" % (xs, xa)).encode('utf8')
140 |             self.assertFalse (xa in uniq_xa)
141 |             uniq_xa.add(xa)
142 |             self.assertFalse (xs in uniq_xs)
143 |             uniq_xs.add(xs)
144 |         
145 | if __name__ == "__main__":
146 | 
147 |     logging.basicConfig(level=logging.ERROR)
148 |     
149 |     unittest.main()
150 | 
151 | 


--------------------------------------------------------------------------------
/tests/test_pulseplayer.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*- 
 3 | 
 4 | #
 5 | # Copyright 2013, 2014, 2016, 2017 Guenter Bartsch
 6 | #
 7 | # Licensed under the Apache License, Version 2.0 (the "License");
 8 | # you may not use this file except in compliance with the License.
 9 | # You may obtain a copy of the License at
10 | #
11 | #     http://www.apache.org/licenses/LICENSE-2.0
12 | #
13 | # Unless required by applicable law or agreed to in writing, software
14 | # distributed under the License is distributed on an "AS IS" BASIS,
15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 | # See the License for the specific language governing permissions and
17 | # limitations under the License.
18 | #
19 | 
20 | import unittest
21 | import logging
22 | 
23 | from nltools.pulseplayer import PulsePlayer
24 | 
25 | class TestPulsePlayer (unittest.TestCase):
26 | 
27 |     def test_playback(self):
28 | 
29 |         player = PulsePlayer('nltools unittest')
30 | 
31 |         with open('foo.wav', 'rb') as wavf:
32 |             wav = wavf.read()
33 | 
34 |             player.play(wav)
35 | 
36 | if __name__ == "__main__":
37 | 
38 |     logging.basicConfig(level=logging.ERROR)
39 |     
40 |     unittest.main()
41 | 
42 | 


--------------------------------------------------------------------------------
/tests/test_pulserecorder.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*- 
 3 | 
 4 | #
 5 | # Copyright 2013, 2014, 2016, 2017 Guenter Bartsch
 6 | #
 7 | # Licensed under the Apache License, Version 2.0 (the "License");
 8 | # you may not use this file except in compliance with the License.
 9 | # You may obtain a copy of the License at
10 | #
11 | #     http://www.apache.org/licenses/LICENSE-2.0
12 | #
13 | # Unless required by applicable law or agreed to in writing, software
14 | # distributed under the License is distributed on an "AS IS" BASIS,
15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 | # See the License for the specific language governing permissions and
17 | # limitations under the License.
18 | #
19 | 
20 | import unittest
21 | import logging
22 | import time
23 | 
24 | from nltools.pulserecorder import PulseRecorder
25 | 
26 | SOURCE              = 'Monitor'
27 | SAMPLERATE          = 16000
28 | VOLUME              = 120
29 | 
30 | class TestPulseRecorder (unittest.TestCase):
31 | 
32 |     def test_rec(self):
33 | 
34 |         recorder = PulseRecorder(source_name=SOURCE, rate=SAMPLERATE, volume=VOLUME)
35 |         recorder.start_recording(1000)
36 |         time.sleep(1)
37 |         recorder.stop_recording()
38 | 
39 |         samples = recorder.get_samples()
40 | 
41 |         logging.debug(repr(samples))
42 | 
43 |         self.assertGreater (len(samples), 900)
44 |         
45 | 
46 | if __name__ == "__main__":
47 | 
48 |     logging.basicConfig(level=logging.DEBUG)
49 |     
50 |     unittest.main()
51 | 
52 | 


--------------------------------------------------------------------------------
/tests/test_sequitur.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*- 
 3 | 
 4 | #
 5 | # Copyright 2014, 2016, 2017, 2018 Guenter Bartsch
 6 | #
 7 | # Licensed under the Apache License, Version 2.0 (the "License");
 8 | # you may not use this file except in compliance with the License.
 9 | # You may obtain a copy of the License at
10 | #
11 | #     http://www.apache.org/licenses/LICENSE-2.0
12 | #
13 | # Unless required by applicable law or agreed to in writing, software
14 | # distributed under the License is distributed on an "AS IS" BASIS,
15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 | # See the License for the specific language governing permissions and
17 | # limitations under the License.
18 | #
19 | 
20 | import unittest
21 | import logging
22 | 
23 | from nltools.sequiturclient import sequitur_gen_ipa
24 | 
25 | MODELFN = 'models/sequitur-dict-de.ipa-latest'
26 | 
27 | G2P_TESTS = [
28 |                 (u'gelbseidenen',     u"'g\u025blb-za\u026a-d\u0259-n\u0259n"   ),
29 |                 (u'unmute',           u"'\u0294\u028an-mu\u02d0-t\u0259"        ),
30 |                 (u'übereilt',         u"\u0294y\u02d0-b\u0250-'\u0294a\u026alt" ),
31 |             ]
32 | 
33 | class TestSequitur (unittest.TestCase):
34 | 
35 |     def test_g2p(self):
36 | 
37 |         for word, ipa in G2P_TESTS:
38 | 
39 |             sq_ipa = sequitur_gen_ipa (MODELFN, word)
40 | 
41 |             self.assertEqual (sq_ipa, ipa)
42 | 
43 | 
44 | if __name__ == "__main__":
45 | 
46 |     logging.basicConfig(level=logging.ERROR)
47 |     # logging.basicConfig(level=logging.DEBUG)
48 | 
49 |     unittest.main()
50 | 
51 | 


--------------------------------------------------------------------------------
/tests/test_tokenizer.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding: utf-8 -*- 
  3 | 
  4 | #
  5 | # Copyright 2017, 2018 Guenter Bartsch
  6 | #
  7 | # Licensed under the Apache License, Version 2.0 (the "License");
  8 | # you may not use this file except in compliance with the License.
  9 | # You may obtain a copy of the License at
 10 | #
 11 | #     http://www.apache.org/licenses/LICENSE-2.0
 12 | #
 13 | # Unless required by applicable law or agreed to in writing, software
 14 | # distributed under the License is distributed on an "AS IS" BASIS,
 15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 16 | # See the License for the specific language governing permissions and
 17 | # limitations under the License.
 18 | #
 19 | 
 20 | import unittest
 21 | import logging
 22 | 
 23 | from nltools.tokenizer import *
 24 | 
 25 | class TestTokenizer (unittest.TestCase):
 26 | 
 27 |     def setUp(self):
 28 |         self.seq = range(10)
 29 | 
 30 |     # FIXME
 31 |     # def test_latin1(self):
 32 |     #     self.assertTrue (detect_latin1('/home/ai/voxforge/de/audio/ralfherzog-20071220-de34/etc/prompts-original'))
 33 |     #     self.assertFalse (detect_latin1('/home/ai/voxforge/de/audio/mjw-20110527-dyg/etc/prompts-original'))
 34 | 
 35 |     def test_tokenize_special(self):
 36 | 
 37 |         self.assertEqual (tokenize(u"„kamel“"), [u'kamel'])
 38 |         self.assertEqual (tokenize(u"$test"), [u'dollar', u'test'])
 39 | 
 40 |     def test_tokenize_wrt(self):
 41 | 
 42 |         self.assertEqual (tokenize(u"foo circa bar"), [u'foo', u'circa', u'bar'])
 43 |         self.assertEqual (tokenize(u"foo ok bar"), [u'foo', u'okay', u'bar'])
 44 |         self.assertEqual (tokenize(u"fook ok baokr"), [u'fook', u'okay', u'baokr'])
 45 |         self.assertEqual (tokenize(u"o.k.bar"), [u'okay', u'bar'])
 46 |         self.assertEqual (tokenize(u"foo o. k.bar"), [u'foo', u'okay', u'bar'])
 47 |         
 48 |     def test_tokenize_punctuation(self):
 49 | 
 50 |         self.assertEqual (tokenize(u"abc, def. zzz!   (      abc<?)"), [u'abc', u'def', u'zzz', u'abc'])
 51 |         self.assertEqual (tokenize(u"abc, def. zzz!   (      abc<?)", keep_punctuation=True), 
 52 |                           [u'abc', u',', u'def', u'.', u'zzz', u'!', u'(', u'abc', u'<', u'?', u')'])
 53 |         self.assertEqual (tokenize(u"ip_1 ip_2 x_1"), [u'ip_1', u'ip_2', u'x_1'])
 54 | # Sowie das zauberische Fuhrwerk im dichten Gebüsch verschwand, noch im sanften Nachhallen der Harmonikatöne, fiel Balthasar, ganz außer sich vor Wonne und Entzücken, dem Freunde um den Hals und rief: Referendarius, wir sind gerettet!
 55 |         self.assertEqual (tokenize(u"und rief: Referendarius, wir sind gerettet", keep_punctuation=True), 
 56 |                           [u'und', u'rief', u':', u'referendarius', u',', u'wir', u'sind', u'gerettet'])
 57 |         self.assertEqual (tokenize(u"Flughafen Leipzig/Halle ist"), [u'flughafen', u'leipzig', u'halle', u'ist'])
 58 |         self.assertEqual (tokenize(u"nach … Wecker"), [u'nach', u'wecker'])
 59 | 
 60 |     def test_tokenize_numbers(self):
 61 | 
 62 |         self.assertEqual (tokenize(u"des individual- verwendungs-weise"), [u"des", u"individual", u"verwendungs", u"weise"])
 63 |         self.assertEqual (tokenize(u"-1 -2 3 -42"), [u"minus", u"eins", u"minus", u"zwei", u"drei", u"minus", u"zweiundvierzig"])
 64 |         self.assertEqual (tokenize(u"1,2 3.456"), [u"eins", u"komma", u"zwei", u"drei", u"komma", u'vier', u'fünf', u'sechs'])
 65 |         self.assertEqual (tokenize(u"-42.23"), [u'minus', u'zweiundvierzig', u'komma', u'zwei', u'drei'])
 66 |         self.assertEqual (tokenize(u"1000000 2234567"), [u'einemillion', u'zweimillionenzweihundertvierunddreißigtausendfünfhundertsiebenundsechzig'])
 67 |         self.assertEqual (tokenize(u"zahlten 1000000"), [u'zahlten', u'einemillion'])
 68 | 
 69 |         self.assertEqual (tokenize(u"b5 mal 3 in abc5"), [u'b5', u'mal', u'drei', u'in', u'abc5'])
 70 | 
 71 |         self.assertEqual (tokenize(u"Mein Name ist HAL 9000."), [u'mein', u'name', u'ist', u'hal', u'neuntausend'])
 72 | 
 73 |     def test_ws(self):
 74 |         self.assertEqual (compress_ws('   ws   foo bar'), ' ws foo bar')
 75 | 
 76 |     def test_isgalnum(self):
 77 |         self.assertEqual (isgalnum(u'§1234%'), True)
 78 |         self.assertEqual (isgalnum(u'§1_234%'), False)
 79 | 
 80 |     def test_split(self):
 81 |         self.assertEqual (tokenize(u"1 2 3 4"), ["eins", "zwei", "drei", "vier"])
 82 |         self.assertEqual (tokenize(u"00 01 02 03 04"), ["null", "eins", "zwei", "drei", "vier"])
 83 |         self.assertEqual (tokenize(u"z.B. u. U. Prof. Dr. Dipl. Ing."), [u'zum', u'beispiel', u'unter', u'umständen', u'professor', u'doktor', u'diplom', u'ingenieur'])
 84 | 
 85 |     def test_preserve_macros(self):
 86 |         self.assertEqual (tokenize(u"was ist @ARTICLE:W name von @KNOWN_PERSON_DE:LABEL", keep_macros=True), [u"was", u"ist", u"@article:w", u"name", u"von", u"@known_person_de:label"])
 87 |         self.assertEqual (tokenize(u"what is @ARTICLE:W name of @KNOWN_PERSON_DE:LABEL", lang='en', keep_macros=True), [u"what", u"is", u"@article:w", u"name", u"of", u"@known_person_de:label"])
 88 | 
 89 |     def test_zahl_in_worten(self):
 90 | 
 91 |         for i in range(10000):
 92 |             u = unicode(i)
 93 |             z = zahl_in_worten(i)
 94 |             #print "%4s : %s" % (u, z)
 95 |             if u in wrt:
 96 |                 self.assertEqual (z, wrt[u])
 97 | 
 98 |     def test_kill_umlauts(self):
 99 |         self.assertEqual (kill_umlauts(u'Ü ü Ö ö Ä ä ß'), 'Ue ue Oe oe Ae ae ss')
100 | 
101 |     def test_tokenize_english(self):
102 | 
103 |         self.assertEqual (tokenize(u"this module’s level", lang='en'), [u'this', u"module's", u'level'])
104 |         self.assertEqual (tokenize(u"$test sequences that don’t correspond can't", lang="en"), [ u"dollar", u"test", u"sequences", u"that", u"don't", u"correspond", u"can't" ])
105 |         self.assertEqual (tokenize(u"but I can't and I'm good.", lang='en'), [u'but', u'i', u"can't", u"and", u"i'm", u"good"])
106 |         self.assertEqual (tokenize(u"we're good she'd do that.", lang='en'), [u"we're", u"good", u"she'd", u"do", u"that"])
107 |         self.assertEqual (tokenize(u"we'll be good.", lang='en'), [u"we'll", u"be", u"good"])
108 |         self.assertEqual (tokenize(u"ZUCKERBERG'S", lang='en'), [u"zuckerberg's"])
109 |         self.assertEqual (tokenize(u"THIS IS ZUCKERBERG'S PROPERTY", lang='en'), [u'this', u'is', u"zuckerberg's", u'property'])
110 |         self.assertEqual (tokenize(u"Okay. A 5% raise, and", lang='en'), [u'ok', u'a', u'five', 'percent', u'raise', u'and'])
111 | 
112 |     def test_tokenize_numbers_english(self):
113 |         self.assertEqual (tokenize(u"1 2 3 4", lang='en'), ["one", "two", "three", "four"])
114 |         self.assertEqual (tokenize(u"00 01 02 03 04", lang='en'), ["zero", "one", "two", "three", "four"])
115 | 
116 |     def test_tokenize_french(self):
117 |         self.assertEqual (tokenize(u"aujourd'hui", lang='fr'), [u"aujourd'hui"])
118 |         self.assertEqual (tokenize(u"entr’ouvert", lang='fr'), [u"entr'ouvert"])
119 |         self.assertEqual (tokenize(u"qu’il", lang='fr'), [u"qu'il"])
120 |         self.assertEqual (tokenize(u"jusqu'alors", lang='fr'), [u"jusqu'alors"])
121 |         self.assertEqual (tokenize(u"j’adore", lang='fr'), [u"j'adore"])
122 |         self.assertEqual (tokenize(u"je l'adore", lang='fr'), [u"je", u"l'adore"])
123 |         self.assertEqual (tokenize(u"il s'adore", lang='fr'), [u"il", u"s'adore"])
124 |         self.assertEqual (tokenize(u"sagement les émigrants suivent les péripéties de l'écran", lang='fr'), [u"sagement", u"les", u"émigrants", u"suivent", u"les", u"péripéties", u"de", u"l'écran"])
125 |         self.assertEqual (tokenize(u"-c'était", lang='fr'), [u"c'était"])
126 |         self.assertEqual (tokenize(u"—c'était", lang='fr'), [u"—c'était"])
127 |         self.assertEqual (tokenize(u"pensa-t-il", lang='fr'), [u"pensa-t-il"])
128 |         self.assertEqual (tokenize(u"jugea-t-elle", lang='fr'), [u"jugea-t-elle"])
129 |         self.assertEqual (tokenize(u"dira-t-on", lang='fr'), [u"dira-t-on"])
130 |         self.assertEqual (tokenize(u"fallait-il", lang='fr'), [u"fallait-il"])
131 |         self.assertEqual (tokenize(u"zéro (9 degrés", lang='fr'), [u"zéro", u"neuf", u"degrés"])
132 |         self.assertEqual (tokenize(u"du 31 décembre 1861!", lang='fr'), [u"du", u"trente", u"et", u"un", u"décembre", u"mille", u"huit", "cent", "soixante", u"et", u"un"])
133 |         self.assertEqual (tokenize(u"FR3", lang='fr'), [u"fr3"])
134 |         self.assertEqual (tokenize(u"F.R.3", lang='fr'), [u"f", u"r", u"trois"])
135 |         self.assertEqual (tokenize(u"G20", lang='fr'), [u"g20"])
136 |         self.assertEqual (tokenize(u"d'aujourd'hui", lang='fr'), [u"d'aujourd'hui"])
137 |         self.assertEqual (tokenize(u"qu'aujourd'hui", lang='fr'), [u"qu'aujourd'hui"])
138 | 
139 |     def test_tokenize_numbers_french(self):
140 |         self.assertEqual (tokenize(u'1 2 3 4', lang='fr'), [u'un', u'deux', u'trois', u'quatre'])
141 |         self.assertEqual (tokenize(u'00 01 02 03 04', lang='fr'), [u'zéro', u'un', u'deux', u'trois', u'quatre'])
142 |         self.assertEqual (tokenize(u'5,5 %', lang='fr'), [u'cinq', u'virgule', u'cinq', u'pour', u'cent'])
143 |         self.assertEqual (tokenize(u'-1 -2 3 -42', lang='fr'), [u'moins', u'un', u'moins', u'deux', u'trois', u'moins', u'quarante-deux'])
144 |         self.assertEqual (tokenize(u'1,2 3.456', lang='fr'), [u'un', u'virgule', u'deux', u'trois', u'virgule', u'quatre', u'cent', u'cinquante-six'])
145 |         self.assertEqual (tokenize(u'-42.23', lang='fr'), [u'moins', u'quarante-deux', u'virgule', u'vingt-trois'])
146 |         self.assertEqual (tokenize(u'71', lang='fr'), [u'soixante', u'et', u'onze'])
147 |         self.assertEqual (tokenize(u'72', lang='fr'), [u'soixante-douze'])
148 |         self.assertEqual (tokenize(u'80', lang='fr'), [u'quatre-vingts'])
149 |         self.assertEqual (tokenize(u'81', lang='fr'), [u'quatre-vingt-un'])
150 |         self.assertEqual (tokenize(u'91', lang='fr'), [u'quatre-vingt-onze'])
151 |         self.assertEqual (tokenize(u'92', lang='fr'), [u'quatre-vingt-douze'])
152 |         self.assertEqual (tokenize(u'1,15', lang='fr'), [u'un', u'virgule', u'quinze'])
153 |         self.assertEqual (tokenize(u'1,015', lang='fr'), [u'un', u'virgule', u'zéro', u'quinze'])
154 |         self.assertEqual (tokenize(u'1,0015', lang='fr'), [u'un', u'virgule', u'zéro', u'zéro', u'quinze'])
155 |         self.assertEqual (tokenize(u'1,00015', lang='fr'), [u'un', u'virgule', u'zéro', u'zéro', u'zéro', u'quinze'])
156 |         self.assertEqual (tokenize(u'1,000015', lang='fr'), [u'un', u'virgule', u'zéro', u'zéro', u'zéro', u'zéro', u'quinze'])
157 |         self.assertEqual (tokenize(u'1,00001523', lang='fr'), [u'un', u'virgule', u'zéro', u'zéro', u'zéro', u'zéro', u'un', u'cinq', u'deux', u'trois'])
158 |         self.assertEqual (tokenize(u'1000000 2234567', lang='fr'), [u'un', u'million', u'deux', u'millions', u'deux', u'cent', u'trente-quatre', u'mille', u'cinq', u'cent', u'soixante-sept'])
159 |         self.assertEqual (tokenize(u'42,00', lang='fr'), [u'quarante-deux', u'virgule', u'zéro', u'zéro'])
160 | 
161 |     def test_nombre_en_mots(self):
162 |         self.assertEqual (nombre_en_mots(1), u'un')
163 |         self.assertEqual (nombre_en_mots(19), u'dix-neuf')
164 |         self.assertEqual (nombre_en_mots(20), u'vingt')
165 |         self.assertEqual (nombre_en_mots(21), u'vingt et un')
166 |         self.assertEqual (nombre_en_mots(23), u'vingt-trois')
167 |         self.assertEqual (nombre_en_mots(30), u'trente')
168 |         self.assertEqual (nombre_en_mots(31), u'trente et un')
169 |         self.assertEqual (nombre_en_mots(60), u'soixante')
170 |         self.assertEqual (nombre_en_mots(61), u'soixante et un')
171 |         self.assertEqual (nombre_en_mots(62), u'soixante-deux')
172 |         self.assertEqual (nombre_en_mots(70), u'soixante-dix')
173 |         self.assertEqual (nombre_en_mots(71), u'soixante et onze')
174 |         self.assertEqual (nombre_en_mots(79), u'soixante-dix-neuf')
175 |         self.assertEqual (nombre_en_mots(80), u'quatre-vingts')
176 |         self.assertEqual (nombre_en_mots(81), u'quatre-vingt-un')
177 |         self.assertEqual (nombre_en_mots(85), u'quatre-vingt-cinq')
178 |         self.assertEqual (nombre_en_mots(90), u'quatre-vingt-dix')
179 |         self.assertEqual (nombre_en_mots(91), u'quatre-vingt-onze')
180 |         self.assertEqual (nombre_en_mots(99), u'quatre-vingt-dix-neuf')
181 |         self.assertEqual (nombre_en_mots(120), u'cent vingt')
182 |         self.assertEqual (nombre_en_mots(121), u'cent vingt et un')
183 |         self.assertEqual (nombre_en_mots(123), u'cent vingt-trois')
184 |         self.assertEqual (nombre_en_mots(1515), u'mille cinq cent quinze')
185 | 
186 | if __name__ == "__main__":
187 | 
188 |     logging.basicConfig(level=logging.ERROR)
189 |     
190 |     unittest.main()
191 | 
192 | 


--------------------------------------------------------------------------------
/tests/test_tts.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding: utf-8 -*- 
  3 | 
  4 | #
  5 | # Copyright 2014, 2016, 2017 Guenter Bartsch
  6 | #
  7 | # Licensed under the Apache License, Version 2.0 (the "License");
  8 | # you may not use this file except in compliance with the License.
  9 | # You may obtain a copy of the License at
 10 | #
 11 | #     http://www.apache.org/licenses/LICENSE-2.0
 12 | #
 13 | # Unless required by applicable law or agreed to in writing, software
 14 | # distributed under the License is distributed on an "AS IS" BASIS,
 15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 16 | # See the License for the specific language governing permissions and
 17 | # limitations under the License.
 18 | #
 19 | 
 20 | #
 21 | # these tests read tts settings from ~/.speechrc
 22 | #
 23 | 
 24 | import unittest
 25 | import logging
 26 | 
 27 | from nltools.tts import TTS
 28 | from nltools     import misc
 29 | 
 30 | MARY_TESTS = [
 31 |               ('de_DE', 'bits3',            u'UNMUTE',           u"'\u0294\u028an-'mu\u02d0-t\u0259"),
 32 |               ('de_DE', 'bits3',            u"DÜSTRE",           u"'d\u028fs-t\u0281\u0259"),
 33 |               ('de_DE', 'bits3',            u"EINGANGE",         u"'a\u026a-\u0273a-\u0273\u0259"),
 34 |               ('en_US', 'cmu-rms-hsmm',     u'hello',            u"h\u0259-'l\u0259\u028a"),
 35 |               ('fr_FR', 'upmc-pierre-hsmm', u'bonjour',          u"'bo-\u0292u\u0281"),
 36 |              ]
 37 | 
 38 | ESPEAK_TESTS = [
 39 |                 ('de', u'GELBSEIDENEN',     u"g'\u025blbza\u026ad\u0259n\u0259n"),
 40 |                 ('de', u'UNMUTE',           u"'\u028anmu\u02d0t\u0259"),
 41 |                ]
 42 | 
 43 | PICO_TESTS = [
 44 |               ('en-US', u'musicians'),
 45 |               ('de-DE', u'Andromeda'),
 46 |              ]
 47 | 
 48 | class TestTTS (unittest.TestCase):
 49 | 
 50 |     def test_tts_mary(self):
 51 | 
 52 |         config = misc.load_config('.speechrc')
 53 |         
 54 |         tts = TTS(config.get('tts', 'host'), int(config.get('tts', 'port')))
 55 | 
 56 |         # test mary
 57 | 
 58 |         tts.engine = 'mary'
 59 | 
 60 |         for l, voice, word, ph in MARY_TESTS:
 61 | 
 62 |             tts.locale = l
 63 |             tts.voice  = voice
 64 | 
 65 |             mary_ph = tts.gen_ipa (word)
 66 | 
 67 |             self.assertEqual (mary_ph, ph)
 68 | 
 69 |             wav = tts.synthesize (word)
 70 |             logging.debug('wav len: %d bytes.' % len(wav))
 71 |             self.assertGreater (len(wav), 100)
 72 | 
 73 |             wav = tts.synthesize (ph, mode='ipa')
 74 |             logging.debug('wav len: %d bytes.' % len(wav))
 75 |             self.assertGreater (len(wav), 100)
 76 | 
 77 |             # tts.say (word)
 78 |             # tts.play_wav(wav)
 79 | 
 80 |     def test_tts_espeak(self):
 81 | 
 82 |         config = misc.load_config('.speechrc')
 83 |         
 84 |         tts = TTS(config.get('tts', 'host'), int(config.get('tts', 'port')))
 85 | 
 86 |         tts.engine = 'espeak'
 87 | 
 88 |         first = True
 89 |         for v, word, ph in ESPEAK_TESTS:
 90 | 
 91 |             tts.locale = v
 92 |             tts.voice  = v
 93 | 
 94 |             espeak_ph = tts.gen_ipa (word)
 95 | 
 96 |             self.assertEqual (espeak_ph, ph)
 97 | 
 98 |             wav = tts.synthesize (word)
 99 |             logging.debug('wav len: %d bytes.' % len(wav))
100 |             self.assertGreater (len(wav), 100)
101 | 
102 |             wav = tts.synthesize (ph, mode='ipa')
103 |             logging.debug('wav len: %d bytes.' % len(wav))
104 |             self.assertGreater (len(wav), 100)
105 | 
106 |             if first:
107 |                 tts.say (word)
108 |                 first = False
109 |             # tts.play_wav(wav)
110 | 
111 |     def test_tts_pico(self):
112 | 
113 |         config = misc.load_config('.speechrc')
114 |         
115 |         tts = TTS(config.get('tts', 'host'), int(config.get('tts', 'port')))
116 | 
117 |         tts.engine = 'pico'
118 | 
119 |         for v, word in PICO_TESTS:
120 | 
121 |             tts.locale = v
122 |             tts.voice  = v
123 | 
124 |             wav = tts.synthesize (word)
125 |             logging.debug('wav len: %d bytes.' % len(wav))
126 |             self.assertGreater (len(wav), 100)
127 | 
128 |             tts.say (word)
129 | 
130 | if __name__ == "__main__":
131 | 
132 |     # logging.basicConfig(level=logging.ERROR)
133 |     logging.basicConfig(level=logging.DEBUG)
134 | 
135 |     unittest.main()
136 | 
137 | 


--------------------------------------------------------------------------------