├── .gitignore ├── LICENSE ├── Makefile ├── README.adoc ├── examples ├── eliza.py ├── espeakng_tts.py ├── live_recorder.py ├── live_vad.py ├── va_eliza.py ├── va_simple.py └── wav_decoder.py ├── images └── highlevel.png ├── nltools ├── __init__.py ├── asr.py ├── macro_engine.py ├── misc.py ├── phonetics.py ├── pulseplayer.py ├── pulserecorder.py ├── sequiturclient.py ├── threadpool.py ├── tokenizer.py ├── tts.py └── vad.py ├── run_tests.sh ├── setup.py └── tests ├── test_asr.py ├── test_macro_engine.py ├── test_misc.py ├── test_phonetics.py ├── test_pulseplayer.py ├── test_pulserecorder.py ├── test_sequitur.py ├── test_tokenizer.py └── test_tts.py /.gitignore: -------------------------------------------------------------------------------- 1 | # use glob syntax. 2 | syntax: glob 3 | *.swp 4 | *.swo 5 | *.pyc 6 | tmp 7 | old 8 | *.log 9 | TODO 10 | 11 | # Byte-compiled / optimized / DLL files 12 | __pycache__/ 13 | *.py[cod] 14 | *$py.class 15 | 16 | # C extensions 17 | *.so 18 | 19 | models 20 | foo.wav 21 | .asciidoctor 22 | README.html 23 | README.md 24 | README.xml 25 | build 26 | dist 27 | py_nltools.egg-info 28 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | 2 | Apache License 3 | Version 2.0, January 2004 4 | http://www.apache.org/licenses/ 5 | 6 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 7 | 8 | 1. Definitions. 9 | 10 | "License" shall mean the terms and conditions for use, reproduction, 11 | and distribution as defined by Sections 1 through 9 of this document. 12 | 13 | "Licensor" shall mean the copyright owner or entity authorized by 14 | the copyright owner that is granting the License. 15 | 16 | "Legal Entity" shall mean the union of the acting entity and all 17 | other entities that control, are controlled by, or are under common 18 | control with that entity. For the purposes of this definition, 19 | "control" means (i) the power, direct or indirect, to cause the 20 | direction or management of such entity, whether by contract or 21 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 22 | outstanding shares, or (iii) beneficial ownership of such entity. 23 | 24 | "You" (or "Your") shall mean an individual or Legal Entity 25 | exercising permissions granted by this License. 26 | 27 | "Source" form shall mean the preferred form for making modifications, 28 | including but not limited to software source code, documentation 29 | source, and configuration files. 30 | 31 | "Object" form shall mean any form resulting from mechanical 32 | transformation or translation of a Source form, including but 33 | not limited to compiled object code, generated documentation, 34 | and conversions to other media types. 35 | 36 | "Work" shall mean the work of authorship, whether in Source or 37 | Object form, made available under the License, as indicated by a 38 | copyright notice that is included in or attached to the work 39 | (an example is provided in the Appendix below). 40 | 41 | "Derivative Works" shall mean any work, whether in Source or Object 42 | form, that is based on (or derived from) the Work and for which the 43 | editorial revisions, annotations, elaborations, or other modifications 44 | represent, as a whole, an original work of authorship. For the purposes 45 | of this License, Derivative Works shall not include works that remain 46 | separable from, or merely link (or bind by name) to the interfaces of, 47 | the Work and Derivative Works thereof. 48 | 49 | "Contribution" shall mean any work of authorship, including 50 | the original version of the Work and any modifications or additions 51 | to that Work or Derivative Works thereof, that is intentionally 52 | submitted to Licensor for inclusion in the Work by the copyright owner 53 | or by an individual or Legal Entity authorized to submit on behalf of 54 | the copyright owner. For the purposes of this definition, "submitted" 55 | means any form of electronic, verbal, or written communication sent 56 | to the Licensor or its representatives, including but not limited to 57 | communication on electronic mailing lists, source code control systems, 58 | and issue tracking systems that are managed by, or on behalf of, the 59 | Licensor for the purpose of discussing and improving the Work, but 60 | excluding communication that is conspicuously marked or otherwise 61 | designated in writing by the copyright owner as "Not a Contribution." 62 | 63 | "Contributor" shall mean Licensor and any individual or Legal Entity 64 | on behalf of whom a Contribution has been received by Licensor and 65 | subsequently incorporated within the Work. 66 | 67 | 2. Grant of Copyright License. Subject to the terms and conditions of 68 | this License, each Contributor hereby grants to You a perpetual, 69 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 70 | copyright license to reproduce, prepare Derivative Works of, 71 | publicly display, publicly perform, sublicense, and distribute the 72 | Work and such Derivative Works in Source or Object form. 73 | 74 | 3. Grant of Patent License. Subject to the terms and conditions of 75 | this License, each Contributor hereby grants to You a perpetual, 76 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 77 | (except as stated in this section) patent license to make, have made, 78 | use, offer to sell, sell, import, and otherwise transfer the Work, 79 | where such license applies only to those patent claims licensable 80 | by such Contributor that are necessarily infringed by their 81 | Contribution(s) alone or by combination of their Contribution(s) 82 | with the Work to which such Contribution(s) was submitted. If You 83 | institute patent litigation against any entity (including a 84 | cross-claim or counterclaim in a lawsuit) alleging that the Work 85 | or a Contribution incorporated within the Work constitutes direct 86 | or contributory patent infringement, then any patent licenses 87 | granted to You under this License for that Work shall terminate 88 | as of the date such litigation is filed. 89 | 90 | 4. Redistribution. You may reproduce and distribute copies of the 91 | Work or Derivative Works thereof in any medium, with or without 92 | modifications, and in Source or Object form, provided that You 93 | meet the following conditions: 94 | 95 | (a) You must give any other recipients of the Work or 96 | Derivative Works a copy of this License; and 97 | 98 | (b) You must cause any modified files to carry prominent notices 99 | stating that You changed the files; and 100 | 101 | (c) You must retain, in the Source form of any Derivative Works 102 | that You distribute, all copyright, patent, trademark, and 103 | attribution notices from the Source form of the Work, 104 | excluding those notices that do not pertain to any part of 105 | the Derivative Works; and 106 | 107 | (d) If the Work includes a "NOTICE" text file as part of its 108 | distribution, then any Derivative Works that You distribute must 109 | include a readable copy of the attribution notices contained 110 | within such NOTICE file, excluding those notices that do not 111 | pertain to any part of the Derivative Works, in at least one 112 | of the following places: within a NOTICE text file distributed 113 | as part of the Derivative Works; within the Source form or 114 | documentation, if provided along with the Derivative Works; or, 115 | within a display generated by the Derivative Works, if and 116 | wherever such third-party notices normally appear. The contents 117 | of the NOTICE file are for informational purposes only and 118 | do not modify the License. You may add Your own attribution 119 | notices within Derivative Works that You distribute, alongside 120 | or as an addendum to the NOTICE text from the Work, provided 121 | that such additional attribution notices cannot be construed 122 | as modifying the License. 123 | 124 | You may add Your own copyright statement to Your modifications and 125 | may provide additional or different license terms and conditions 126 | for use, reproduction, or distribution of Your modifications, or 127 | for any such Derivative Works as a whole, provided Your use, 128 | reproduction, and distribution of the Work otherwise complies with 129 | the conditions stated in this License. 130 | 131 | 5. Submission of Contributions. Unless You explicitly state otherwise, 132 | any Contribution intentionally submitted for inclusion in the Work 133 | by You to the Licensor shall be under the terms and conditions of 134 | this License, without any additional terms or conditions. 135 | Notwithstanding the above, nothing herein shall supersede or modify 136 | the terms of any separate license agreement you may have executed 137 | with Licensor regarding such Contributions. 138 | 139 | 6. Trademarks. This License does not grant permission to use the trade 140 | names, trademarks, service marks, or product names of the Licensor, 141 | except as required for reasonable and customary use in describing the 142 | origin of the Work and reproducing the content of the NOTICE file. 143 | 144 | 7. Disclaimer of Warranty. Unless required by applicable law or 145 | agreed to in writing, Licensor provides the Work (and each 146 | Contributor provides its Contributions) on an "AS IS" BASIS, 147 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 148 | implied, including, without limitation, any warranties or conditions 149 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 150 | PARTICULAR PURPOSE. You are solely responsible for determining the 151 | appropriateness of using or redistributing the Work and assume any 152 | risks associated with Your exercise of permissions under this License. 153 | 154 | 8. Limitation of Liability. In no event and under no legal theory, 155 | whether in tort (including negligence), contract, or otherwise, 156 | unless required by applicable law (such as deliberate and grossly 157 | negligent acts) or agreed to in writing, shall any Contributor be 158 | liable to You for damages, including any direct, indirect, special, 159 | incidental, or consequential damages of any character arising as a 160 | result of this License or out of the use or inability to use the 161 | Work (including but not limited to damages for loss of goodwill, 162 | work stoppage, computer failure or malfunction, or any and all 163 | other commercial damages or losses), even if such Contributor 164 | has been advised of the possibility of such damages. 165 | 166 | 9. Accepting Warranty or Additional Liability. While redistributing 167 | the Work or Derivative Works thereof, You may choose to offer, 168 | and charge a fee for, acceptance of support, warranty, indemnity, 169 | or other liability obligations and/or rights consistent with this 170 | License. However, in accepting such obligations, You may act only 171 | on Your own behalf and on Your sole responsibility, not on behalf 172 | of any other Contributor, and only if You agree to indemnify, 173 | defend, and hold each Contributor harmless for any liability 174 | incurred by, or claims asserted against, such Contributor by reason 175 | of your accepting any such warranty or additional liability. 176 | 177 | END OF TERMS AND CONDITIONS 178 | 179 | APPENDIX: How to apply the Apache License to your work. 180 | 181 | To apply the Apache License to your work, attach the following 182 | boilerplate notice, with the fields enclosed by brackets "[]" 183 | replaced with your own identifying information. (Don't include 184 | the brackets!) The text should be enclosed in the appropriate 185 | comment syntax for the file format. We also recommend that a 186 | file or class name and description of purpose be included on the 187 | same "printed page" as the copyright notice for easier 188 | identification within third-party archives. 189 | 190 | Copyright [yyyy] [name of copyright owner] 191 | 192 | Licensed under the Apache License, Version 2.0 (the "License"); 193 | you may not use this file except in compliance with the License. 194 | You may obtain a copy of the License at 195 | 196 | http://www.apache.org/licenses/LICENSE-2.0 197 | 198 | Unless required by applicable law or agreed to in writing, software 199 | distributed under the License is distributed on an "AS IS" BASIS, 200 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 201 | See the License for the specific language governing permissions and 202 | limitations under the License. 203 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | all: README.html README.md dist 2 | 3 | SHELL := /bin/bash 4 | 5 | %.html: %.adoc 6 | asciidoctor -r asciidoctor-diagram -a toc $< 7 | 8 | README.md: README.adoc 9 | asciidoc -b docbook README.adoc 10 | iconv -t utf-8 README.xml | pandoc -f docbook -t markdown_strict | iconv -f utf-8 > README.md 11 | 12 | tests: 13 | nosetests 14 | 15 | dist: README.md 16 | python setup.py sdist 17 | python setup.py bdist_wheel --universal 18 | 19 | upload: 20 | twine upload dist/* 21 | 22 | clean: 23 | rm -f *.html images/*.png 24 | rm -rf dist build py_nltools.egg-info README.md README.xml 25 | -------------------------------------------------------------------------------- /README.adoc: -------------------------------------------------------------------------------- 1 | py-nltools 2 | ---------- 3 | 4 | A collection of abstraction layers and support functions that form the natural 5 | language processing foundation of the Zamia AI project: 6 | 7 | * `phonetics`: translation functions between various phonetic alphabets (IPA, X-SAMPA, X-ARPABET, ...) 8 | * `tts`: abstraction layer towards using eSpeak NG, MaryTTS, SVOX Pico TTS or a remote TTS server and sequitur g2p 9 | * `asr`: abstraction layer towards using kaldi-asr and pocketsphinx, models can be found here: http://goofy.zamia.org/voxforge/ 10 | * `sequiturclient`: g2p using sequitur 11 | * `pulseplayer`: audio playback through pulseaudio 12 | * `pulserecorder`: audio recording through pulseaudio 13 | * `tokenizer`: english, french and german word tokenizers aimed at spoken language applications 14 | * `threadpool`: simple thread pool implementation 15 | * `vad`: Voice Activity Detection finite state machine based on webrtc VAD 16 | * `macro_engine`: Simple macro engine aimed at generating natural language expansions 17 | 18 | I plan to add modules as I need them in the Zamia AI projects. Some modules like `phonetics` and `tokenizer` 19 | have some overlap with larger projects like NLTK or spaCy - my modules tend to be more hands-on and simple minded 20 | than these and therefore are in no way meant to replace them. 21 | 22 | ifndef::imagesdir[:imagesdir: images] 23 | 24 | ifndef::env-github[] 25 | [ditaa,"highlevel"] 26 | .... 27 | +-----------------------------------------------------------------------------------------------+ 28 | | nltools | 29 | | +-----------+ +-----------+ +------------+ +--------------+ | 30 | | | tokenizer | | phonetics | | threadpool | | macro_engine | | 31 | | +-----------+ +-----------+ +------------+ +--------------+ | 32 | | | 33 | | +-----------+ +-----------+ +-----------+ +-----------+ +-----------+ | 34 | | | tts | | asr | | vad | | g2p | | audio | | 35 | | +-----------+ +-----------+ +-----------+ +-----------+ +-----------+ | 36 | | | | | | | | 37 | +-----------------------------------------------------------------------------------------------+ 38 | | | | | | 39 | +--------+---------+ +------+----+ | | | 40 | | | | | | | | | 41 | v v v v v v v v 42 | +------+ +--------+ +------+ +-------+ +-----------+ +--------+ +----------+ +------------+ 43 | | mary | | eSpeak | | pico | | kaldi | | cmusphinx | | webrtc | | sequitur | | pulseaudio | 44 | +------+ +--------+ +------+ +-------+ +-----------+ +--------+ +----------+ +------------+ 45 | .... 46 | endif::env-github[] 47 | ifdef::env-github[] 48 | image::highlevel.png[Highlevel Diagram] 49 | endif::env-github[] 50 | 51 | Requirements 52 | ~~~~~~~~~~~~ 53 | 54 | *Note*: probably incomplete. 55 | 56 | * Python 2.7 57 | * for TTS one or more of: 58 | - MaryTTS, py-marytts 59 | - espeak-ng, py-espeak-ng 60 | - SVOX Pico TTS, py-picotts 61 | * for ASR one or more of: 62 | - kaldi-asr 5.1, py-kaldi-asr 63 | - pocketsphinx 64 | * sequitur 65 | * pulseaudio 66 | * webrtc 67 | 68 | License 69 | ~~~~~~~ 70 | 71 | My own code is Apache-2.0 licensed unless otherwise noted in the script's copyright 72 | headers. 73 | 74 | Some scripts and files are based on works of others, in those cases it is my 75 | intention to keep the original license intact. Please make sure to check the 76 | copyright headers inside for more information. 77 | 78 | Authors 79 | ~~~~~~~ 80 | 81 | Guenter Bartsch 82 | Paul Guyot 83 | 84 | -------------------------------------------------------------------------------- /examples/eliza.py: -------------------------------------------------------------------------------- 1 | #---------------------------------------------------------------------- 2 | # eliza.py 3 | # 4 | # a cheezy little Eliza knock-off by Joe Strout 5 | # with some updates by Jeff Epler 6 | # hacked into a module and updated by Jez Higgins 7 | #---------------------------------------------------------------------- 8 | 9 | import string 10 | import re 11 | import random 12 | 13 | class eliza: 14 | def __init__(self): 15 | self.keys = list(map(lambda x:re.compile(x[0], re.IGNORECASE),gPats)) 16 | self.values = list(map(lambda x:x[1],gPats)) 17 | 18 | #---------------------------------------------------------------------- 19 | # translate: take a string, replace any words found in dict.keys() 20 | # with the corresponding dict.values() 21 | #---------------------------------------------------------------------- 22 | def translate(self,str,dict): 23 | words = str.lower().split() 24 | keys = dict.keys(); 25 | for i in range(0,len(words)): 26 | if words[i] in keys: 27 | words[i] = dict[words[i]] 28 | return ' '.join(words) 29 | 30 | #---------------------------------------------------------------------- 31 | # respond: take a string, a set of regexps, and a corresponding 32 | # set of response lists; find a match, and return a randomly 33 | # chosen response from the corresponding list. 34 | #---------------------------------------------------------------------- 35 | def respond(self,str): 36 | # find a match among keys 37 | for i in range(0, len(self.keys)): 38 | match = self.keys[i].match(str) 39 | if match: 40 | # found a match ... stuff with corresponding value 41 | # chosen randomly from among the available options 42 | resp = random.choice(self.values[i]) 43 | # we've got a response... stuff in reflected text where indicated 44 | pos = resp.find('%') 45 | while pos > -1: 46 | num = int(resp[pos+1:pos+2]) 47 | resp = resp[:pos] + \ 48 | self.translate(match.group(num),gReflections) + \ 49 | resp[pos+2:] 50 | pos = resp.find('%') 51 | # fix munged punctuation at the end 52 | if resp[-2:] == '?.': resp = resp[:-2] + '.' 53 | if resp[-2:] == '??': resp = resp[:-2] + '?' 54 | return resp 55 | 56 | #---------------------------------------------------------------------- 57 | # gReflections, a translation table used to convert things you say 58 | # into things the computer says back, e.g. "I am" --> "you are" 59 | #---------------------------------------------------------------------- 60 | gReflections = { 61 | "am" : "are", 62 | "was" : "were", 63 | "i" : "you", 64 | "i'd" : "you would", 65 | "i've" : "you have", 66 | "i'll" : "you will", 67 | "my" : "your", 68 | "are" : "am", 69 | "you've": "I have", 70 | "you'll": "I will", 71 | "your" : "my", 72 | "yours" : "mine", 73 | "you" : "me", 74 | "me" : "you" 75 | } 76 | 77 | #---------------------------------------------------------------------- 78 | # gPats, the main response table. Each element of the list is a 79 | # two-element list; the first is a regexp, and the second is a 80 | # list of possible responses, with group-macros labelled as 81 | # %1, %2, etc. 82 | #---------------------------------------------------------------------- 83 | gPats = [ 84 | [r'I need (.*)', 85 | [ "Why do you need %1?", 86 | "Would it really help you to get %1?", 87 | "Are you sure you need %1?"]], 88 | 89 | [r'Why don\'?t you ([^\?]*)\??', 90 | [ "Do you really think I don't %1?", 91 | "Perhaps eventually I will %1.", 92 | "Do you really want me to %1?"]], 93 | 94 | [r'Why can\'?t I ([^\?]*)\??', 95 | [ "Do you think you should be able to %1?", 96 | "If you could %1, what would you do?", 97 | "I don't know -- why can't you %1?", 98 | "Have you really tried?"]], 99 | 100 | [r'I can\'?t (.*)', 101 | [ "How do you know you can't %1?", 102 | "Perhaps you could %1 if you tried.", 103 | "What would it take for you to %1?"]], 104 | 105 | [r'I am (.*)', 106 | [ "Did you come to me because you are %1?", 107 | "How long have you been %1?", 108 | "How do you feel about being %1?"]], 109 | 110 | [r'I\'?m (.*)', 111 | [ "How does being %1 make you feel?", 112 | "Do you enjoy being %1?", 113 | "Why do you tell me you're %1?", 114 | "Why do you think you're %1?"]], 115 | 116 | [r'Are you ([^\?]*)\??', 117 | [ "Why does it matter whether I am %1?", 118 | "Would you prefer it if I were not %1?", 119 | "Perhaps you believe I am %1.", 120 | "I may be %1 -- what do you think?"]], 121 | 122 | [r'What (.*)', 123 | [ "Why do you ask?", 124 | "How would an answer to that help you?", 125 | "What do you think?"]], 126 | 127 | [r'How (.*)', 128 | [ "How do you suppose?", 129 | "Perhaps you can answer your own question.", 130 | "What is it you're really asking?"]], 131 | 132 | [r'Because (.*)', 133 | [ "Is that the real reason?", 134 | "What other reasons come to mind?", 135 | "Does that reason apply to anything else?", 136 | "If %1, what else must be true?"]], 137 | 138 | [r'(.*) sorry (.*)', 139 | [ "There are many times when no apology is needed.", 140 | "What feelings do you have when you apologize?"]], 141 | 142 | [r'Hello(.*)', 143 | [ "Hello... I'm glad you could drop by today.", 144 | "Hi there... how are you today?", 145 | "Hello, how are you feeling today?"]], 146 | 147 | [r'I think (.*)', 148 | [ "Do you doubt %1?", 149 | "Do you really think so?", 150 | "But you're not sure %1?"]], 151 | 152 | [r'(.*) friend (.*)', 153 | [ "Tell me more about your friends.", 154 | "When you think of a friend, what comes to mind?", 155 | "Why don't you tell me about a childhood friend?"]], 156 | 157 | [r'Yes', 158 | [ "You seem quite sure.", 159 | "OK, but can you elaborate a bit?"]], 160 | 161 | [r'(.*) computer(.*)', 162 | [ "Are you really talking about me?", 163 | "Does it seem strange to talk to a computer?", 164 | "How do computers make you feel?", 165 | "Do you feel threatened by computers?"]], 166 | 167 | [r'Is it (.*)', 168 | [ "Do you think it is %1?", 169 | "Perhaps it's %1 -- what do you think?", 170 | "If it were %1, what would you do?", 171 | "It could well be that %1."]], 172 | 173 | [r'It is (.*)', 174 | [ "You seem very certain.", 175 | "If I told you that it probably isn't %1, what would you feel?"]], 176 | 177 | [r'Can you ([^\?]*)\??', 178 | [ "What makes you think I can't %1?", 179 | "If I could %1, then what?", 180 | "Why do you ask if I can %1?"]], 181 | 182 | [r'Can I ([^\?]*)\??', 183 | [ "Perhaps you don't want to %1.", 184 | "Do you want to be able to %1?", 185 | "If you could %1, would you?"]], 186 | 187 | [r'You are (.*)', 188 | [ "Why do you think I am %1?", 189 | "Does it please you to think that I'm %1?", 190 | "Perhaps you would like me to be %1.", 191 | "Perhaps you're really talking about yourself?"]], 192 | 193 | [r'You\'?re (.*)', 194 | [ "Why do you say I am %1?", 195 | "Why do you think I am %1?", 196 | "Are we talking about you, or me?"]], 197 | 198 | [r'I don\'?t (.*)', 199 | [ "Don't you really %1?", 200 | "Why don't you %1?", 201 | "Do you want to %1?"]], 202 | 203 | [r'I feel (.*)', 204 | [ "Good, tell me more about these feelings.", 205 | "Do you often feel %1?", 206 | "When do you usually feel %1?", 207 | "When you feel %1, what do you do?"]], 208 | 209 | [r'I have (.*)', 210 | [ "Why do you tell me that you've %1?", 211 | "Have you really %1?", 212 | "Now that you have %1, what will you do next?"]], 213 | 214 | [r'I would (.*)', 215 | [ "Could you explain why you would %1?", 216 | "Why would you %1?", 217 | "Who else knows that you would %1?"]], 218 | 219 | [r'Is there (.*)', 220 | [ "Do you think there is %1?", 221 | "It's likely that there is %1.", 222 | "Would you like there to be %1?"]], 223 | 224 | [r'My (.*)', 225 | [ "I see, your %1.", 226 | "Why do you say that your %1?", 227 | "When your %1, how do you feel?"]], 228 | 229 | [r'You (.*)', 230 | [ "We should be discussing you, not me.", 231 | "Why do you say that about me?", 232 | "Why do you care whether I %1?"]], 233 | 234 | [r'Why (.*)', 235 | [ "Why don't you tell me the reason why %1?", 236 | "Why do you think %1?" ]], 237 | 238 | [r'I want (.*)', 239 | [ "What would it mean to you if you got %1?", 240 | "Why do you want %1?", 241 | "What would you do if you got %1?", 242 | "If you got %1, then what would you do?"]], 243 | 244 | [r'(.*) mother(.*)', 245 | [ "Tell me more about your mother.", 246 | "What was your relationship with your mother like?", 247 | "How do you feel about your mother?", 248 | "How does this relate to your feelings today?", 249 | "Good family relations are important."]], 250 | 251 | [r'(.*) father(.*)', 252 | [ "Tell me more about your father.", 253 | "How did your father make you feel?", 254 | "How do you feel about your father?", 255 | "Does your relationship with your father relate to your feelings today?", 256 | "Do you have trouble showing affection with your family?"]], 257 | 258 | [r'(.*) child(.*)', 259 | [ "Did you have close friends as a child?", 260 | "What is your favorite childhood memory?", 261 | "Do you remember any dreams or nightmares from childhood?", 262 | "Did the other children sometimes tease you?", 263 | "How do you think your childhood experiences relate to your feelings today?"]], 264 | 265 | [r'(.*)\?', 266 | [ "Why do you ask that?", 267 | "Please consider whether you can answer your own question.", 268 | "Perhaps the answer lies within yourself?", 269 | "Why don't you tell me?"]], 270 | 271 | [r'quit', 272 | [ "Thank you for talking with me.", 273 | "Good-bye.", 274 | "Thank you, that will be $150. Have a good day!"]], 275 | 276 | [r'(.*)', 277 | [ "Please tell me more.", 278 | "Let's change focus a bit... Tell me about your family.", 279 | "Can you elaborate on that?", 280 | "Why do you say that %1?", 281 | "I see.", 282 | "Very interesting.", 283 | "%1.", 284 | "I see. And what does that tell you?", 285 | "How does that make you feel?", 286 | "How do you feel when you say that?"]] 287 | ] 288 | 289 | #---------------------------------------------------------------------- 290 | # command_interface 291 | #---------------------------------------------------------------------- 292 | def command_interface(): 293 | print('Therapist\n---------') 294 | print('Talk to the program by typing in plain English, using normal upper-') 295 | print('and lower-case letters and punctuation. Enter "quit" when done.') 296 | print('='*72) 297 | print('Hello. How are you feeling today?') 298 | 299 | s = '' 300 | therapist = eliza(); 301 | while s != 'quit': 302 | try: 303 | s = input('> ') 304 | except EOFError: 305 | s = 'quit' 306 | print(s) 307 | while s[-1] in '!.': 308 | s = s[:-1] 309 | print(therapist.respond(s)) 310 | 311 | 312 | if __name__ == "__main__": 313 | command_interface() 314 | -------------------------------------------------------------------------------- /examples/espeakng_tts.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | from nltools.tts import TTS 3 | 4 | tts = TTS(engine="espeak", voice="en") 5 | tts.say("hello from your pi") 6 | -------------------------------------------------------------------------------- /examples/live_recorder.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import logging 4 | import wave 5 | import struct 6 | import os 7 | import sys 8 | 9 | from datetime import date 10 | from optparse import OptionParser 11 | 12 | from nltools.asr import ASR 13 | from nltools.pulserecorder import PulseRecorder, MIX_MODE_BOTH, MIX_MODE_LEFT, MIX_MODE_RIGHT 14 | from nltools.vad import VAD 15 | from nltools import misc 16 | 17 | DEFAULT_VOLUME = 150 18 | SAMPLE_RATE = 16000 19 | DEFAULT_MIX_MODE = 'both' 20 | 21 | # 22 | # init 23 | # 24 | 25 | misc.init_app ('live_recorder') 26 | 27 | # 28 | # commandline parsing 29 | # 30 | 31 | parser = OptionParser("usage: %prog [options]") 32 | 33 | parser.add_option ("-m", "--mix-mode", dest='mix_mode', type='str', default=DEFAULT_MIX_MODE, 34 | help="mix mode (left, right, both - default: %s)" % DEFAULT_MIX_MODE) 35 | 36 | parser.add_option ("-V", "--volume", dest='volume', type='int', default=DEFAULT_VOLUME, 37 | help="volume, default: %d%%" % DEFAULT_VOLUME) 38 | 39 | parser.add_option ("-v", "--verbose", action="store_true", dest="verbose", 40 | help="enable verbose logging") 41 | 42 | (options, args) = parser.parse_args() 43 | 44 | if options.verbose: 45 | logging.basicConfig(level=logging.DEBUG) 46 | else: 47 | logging.basicConfig(level=logging.INFO) 48 | 49 | 50 | if options.mix_mode == 'left': 51 | mix_mode = MIX_MODE_LEFT 52 | elif options.mix_mode == 'right': 53 | mix_mode = MIX_MODE_RIGHT 54 | elif options.mix_mode == 'both': 55 | mix_mode = MIX_MODE_BOTH 56 | else: 57 | parser.print_usage() 58 | sys.exit(1) 59 | 60 | 61 | logging.info ("Initializing...") 62 | 63 | rec = PulseRecorder (volume=options.volume) 64 | vad = VAD() 65 | 66 | rec.start_recording(mix_mode=mix_mode) 67 | logging.info ("Please speak. (CTRL-C to exit)") 68 | 69 | cnt = 0 70 | wfs = None 71 | 72 | while True: 73 | 74 | samples = rec.get_samples() 75 | 76 | audio, finalize = vad.process_audio(samples) 77 | 78 | if not audio: 79 | continue 80 | 81 | logging.debug ("%8d got audio. finalize: %s" % (cnt, repr(finalize))) 82 | cnt =+ 1 83 | 84 | if not wfs: 85 | 86 | ds = date.strftime(date.today(), '%Y%m%d') 87 | audiofn = 'rec-%s.wav' % ds 88 | logging.debug('audiofn: %s' % audiofn) 89 | 90 | audiocnt = 0 91 | while True: 92 | audiocnt += 1 93 | audiofn = 'rec-%s-%03d.wav' % (ds, audiocnt) 94 | if not os.path.isfile(audiofn): 95 | break 96 | 97 | 98 | # create wav file 99 | 100 | wfs = wave.open(audiofn, 'wb') 101 | wfs.setnchannels(1) 102 | wfs.setsampwidth(2) 103 | wfs.setframerate(SAMPLE_RATE) 104 | 105 | logging.info('voice activity detected, recording to: %s' % audiofn) 106 | 107 | packed_audio = struct.pack('%sh' % len(audio), *audio) 108 | wfs.writeframes(packed_audio) 109 | 110 | if finalize: 111 | 112 | logging.info('recording to %s finished.' % audiofn) 113 | 114 | wfs.close() 115 | wfs = None 116 | 117 | 118 | -------------------------------------------------------------------------------- /examples/live_vad.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import logging 3 | logging.basicConfig(level=logging.INFO) 4 | from nltools.asr import ASR 5 | from nltools.pulserecorder import PulseRecorder 6 | from nltools.vad import VAD 7 | 8 | MODELDIR = '/opt/kaldi/model/kaldi-generic-en-tdnn_250' 9 | VOLUME = 150 10 | 11 | print ("Initializing...") 12 | 13 | rec = PulseRecorder (volume=VOLUME) 14 | asr = ASR(model_dir = MODELDIR) 15 | vad = VAD() 16 | 17 | rec.start_recording() 18 | print ("Please speak. (CTRL-C to exit)") 19 | 20 | while True: 21 | 22 | samples = rec.get_samples() 23 | 24 | audio, finalize = vad.process_audio(samples) 25 | 26 | if not audio: 27 | continue 28 | 29 | user_utt, confidence = asr.decode(audio, finalize) 30 | 31 | print ("\r%s " % user_utt, end='', flush=True) 32 | 33 | if finalize: 34 | print () 35 | 36 | -------------------------------------------------------------------------------- /examples/va_eliza.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import logging 3 | logging.basicConfig(level=logging.INFO) 4 | from enum import Enum 5 | from nltools.asr import ASR 6 | from nltools.pulserecorder import PulseRecorder 7 | from nltools.vad import VAD 8 | from nltools.tts import TTS 9 | from nltools.macro_engine import MacroEngine 10 | from nltools.misc import edit_distance 11 | from nltools.tokenizer import tokenize 12 | from eliza import eliza 13 | 14 | MODELDIR = '/opt/kaldi/model/kaldi-generic-en-tdnn_250' 15 | VOLUME = 150 16 | ED_THRESHOLD = 2 17 | 18 | class Intent(Enum): 19 | HELLO = 1 20 | LIGHT = 2 21 | RADIO = 3 22 | 23 | print ("Initializing...") 24 | 25 | radio_on = False 26 | lights_on = False 27 | asr = ASR(model_dir = MODELDIR) 28 | rec = PulseRecorder (volume=VOLUME) 29 | vad = VAD() 30 | tts = TTS(engine="espeak", voice="en") 31 | me = MacroEngine() 32 | eliza = eliza() 33 | 34 | utt_map = {} 35 | def add_utt (pattern, intent): 36 | for utterance, t in me.expand_macros('en', pattern): 37 | utt = ' '.join(utterance) 38 | utt_map[utt] = intent 39 | 40 | add_utt("(hi|hello|ok) computer", Intent.HELLO) 41 | add_utt("switch (on|off) the (light|lights)", Intent.LIGHT) 42 | add_utt("switch the (light|lights) (on|off)", Intent.LIGHT) 43 | add_utt("switch (on|off) the (music|radio)", Intent.RADIO) 44 | add_utt("switch the (music|radio) (on|off)", Intent.RADIO) 45 | 46 | rec.start_recording() 47 | print ("Please speak. (CTRL-C to exit)") 48 | 49 | while True: 50 | samples = rec.get_samples() 51 | audio, finalize = vad.process_audio(samples) 52 | if not audio: 53 | continue 54 | 55 | user_utt, c = asr.decode(audio, finalize) 56 | print ("\r%s " % user_utt, end='', flush=True) 57 | 58 | if finalize: 59 | print () 60 | 61 | best_dist = ED_THRESHOLD 62 | intent = None 63 | for utt in utt_map: 64 | dist = edit_distance (tokenize (utt, lang='en'), 65 | tokenize (user_utt, lang='en')) 66 | if (dist decoder 65 | 66 | if self._engine == ASR_ENGINE_NNET3: 67 | 68 | logging.debug ('loading ASR model %s from %s...' % (self._model_name, self._model_dir)) 69 | start_time = time.time() 70 | self.nnet3_model = KaldiNNet3OnlineModel ( self._model_dir, self._model_name, 71 | beam = kaldi_beam, 72 | acoustic_scale = kaldi_acoustic_scale, 73 | frame_subsampling_factor = kaldi_frame_subsampling_factor) 74 | logging.debug ('ASR model loaded. took %fs' % (time.time() - start_time)) 75 | 76 | elif self._engine == ASR_ENGINE_POCKETSPHINX: 77 | 78 | import pocketsphinx 79 | self.ps_config = pocketsphinx.Decoder.default_config() 80 | 81 | # determine CFG_N_TIED_STATES, CFG_WAVFILE_SRATE 82 | # cmusphinx-cont-voxforge-en-latest/etc/sphinx_train.cfg 83 | traincfg_fn = '%s/etc/sphinx_train.cfg' % model_dir 84 | n_tied_states = 6000 85 | self.ps_samplerate = 16000 86 | with open (traincfg_fn, 'r') as traincfg_f: 87 | for line in traincfg_f: 88 | if not line: 89 | break 90 | # $CFG_N_TIED_STATES = 6000; 91 | if 'CFG_N_TIED_STATES' in line: 92 | # logging.debug ('parsing train cfg line %s' % line) 93 | m = re.match (r"\$CFG_N_TIED_STATES\s+=\s+([0-9]+)\s*;", line.strip()) 94 | if m: 95 | n_tied_states = int(m.group(1)) 96 | # logging.debug ('matched, n_tied_states=%d' % n_tied_states) 97 | 98 | # $CFG_WAVFILE_SRATE = 16000.0; 99 | if 'CFG_WAVFILE_SRATE' in line: 100 | m = re.match (r"\$CFG_WAVFILE_SRATE\s+=\s+([0-9.]+)\s*;", line.strip()) 101 | if m: 102 | self.ps_samplerate = int(float(m.group(1))) 103 | 104 | self.ps_config.set_string('-hmm', '%s/model_parameters/%s.cd_cont_%d' % (model_dir, model_name, n_tied_states)) 105 | self.ps_config.set_float ('-lw', 10) 106 | self.ps_config.set_string('-feat', '1s_c_d_dd') 107 | self.ps_config.set_float ('-beam', 1e-80) 108 | self.ps_config.set_float ('-wbeam', 1e-40) 109 | self.ps_config.set_string('-dict', '%s/etc/%s.dic' % (model_dir, model_name)) 110 | self.ps_config.set_float ('-wip', 0.2) 111 | self.ps_config.set_string('-agc', 'none') 112 | self.ps_config.set_string('-varnorm', 'no') 113 | self.ps_config.set_string('-cmn', 'current') 114 | self.ps_config.set_string('-lm', '%s/etc/%s.lm.bin' % (model_dir, model_name)) 115 | 116 | self.ps_config.set_string('-logfn', "/dev/null") 117 | 118 | self.asr_in_utt = {} # stream_id -> Boolean 119 | 120 | else: 121 | raise Exception ('unknown ASR engine: %s' % self._engine) 122 | 123 | def decode (self, audio, do_finalize, sample_rate = DEFAULT_SAMPLE_RATE, stream_id = DEFAULT_STREAM_ID): 124 | 125 | if self._engine == ASR_ENGINE_NNET3: 126 | 127 | if not stream_id in self.asr_decoders: 128 | self.asr_decoders[stream_id] = KaldiNNet3OnlineDecoder (self.nnet3_model) 129 | 130 | decoder = self.asr_decoders[stream_id] 131 | decoder.decode(sample_rate, np.array(audio, dtype=np.float32), do_finalize) 132 | 133 | hstr, confidence = decoder.get_decoded_string() 134 | hstr = hstr.strip() 135 | 136 | elif self._engine == ASR_ENGINE_POCKETSPHINX: 137 | 138 | if sample_rate != self.ps_samplerate: 139 | raise Exception ('decode: samplerate does not match model: %d vs %d' % (sample_rate, self.ps_samplerate)) 140 | 141 | if not stream_id in self.asr_decoders: 142 | import pocketsphinx 143 | self.asr_decoders[stream_id] = pocketsphinx.Decoder(self.ps_config) 144 | self.asr_in_utt[stream_id] = False 145 | 146 | decoder = self.asr_decoders[stream_id] 147 | if not self.asr_in_utt[stream_id]: 148 | decoder.start_utt() 149 | self.asr_in_utt[stream_id] = True 150 | 151 | audios = struct.pack('<%dh' % len(audio), *audio) 152 | 153 | decoder.process_raw(audios, False, False) 154 | 155 | if not do_finalize: 156 | return None, 0.0 157 | 158 | decoder.end_utt() 159 | self.asr_in_utt[stream_id] = False 160 | 161 | hypothesis = decoder.hyp() 162 | logmath = decoder.get_logmath() 163 | hstr = hypothesis.hypstr.decode('utf8').strip() 164 | confidence = logmath.exp(hypothesis.prob) 165 | 166 | else: 167 | raise Exception ('unknown ASR engine: %s' % self._engine) 168 | 169 | return hstr, confidence 170 | 171 | 172 | @property 173 | def engine(self): 174 | return self._engine 175 | # @engine.setter 176 | # def engine(self, v): 177 | # self._engine = v 178 | 179 | @property 180 | def model_dir(self): 181 | return self._model_dir 182 | # @model_dir.setter 183 | # def model_dir(self, v): 184 | # self._model_dir = v 185 | 186 | @property 187 | def model_name(self): 188 | return self._model_name 189 | # @model_name.setter 190 | # def model_name(self, v): 191 | # self._model_name = v 192 | 193 | def decode_wav_file(self, wavfile): 194 | 195 | wavf = wave.open(wavfile, 'rb') 196 | 197 | # check format 198 | assert wavf.getnchannels()==1 199 | assert wavf.getsampwidth()==2 200 | assert wavf.getnframes()>0 201 | 202 | sample_rate = wavf.getframerate() 203 | 204 | # read the whole file into memory, for now 205 | num_frames = wavf.getnframes() 206 | frames = wavf.readframes(num_frames) 207 | 208 | samples = struct.unpack_from('<%dh' % num_frames, frames) 209 | 210 | wavf.close() 211 | 212 | return self.decode(samples, True, sample_rate) 213 | 214 | -------------------------------------------------------------------------------- /nltools/macro_engine.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | # 5 | # Copyright 2018 Guenter Bartsch 6 | # 7 | # Licensed under the Apache License, Version 2.0 (the "License"); 8 | # you may not use this file except in compliance with the License. 9 | # You may obtain a copy of the License at 10 | # 11 | # http://www.apache.org/licenses/LICENSE-2.0 12 | # 13 | # Unless required by applicable law or agreed to in writing, software 14 | # distributed under the License is distributed on an "AS IS" BASIS, 15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 | # See the License for the specific language governing permissions and 17 | # limitations under the License. 18 | # 19 | 20 | # 21 | # simple macro engine aimed at generating natural language expansions 22 | # 23 | # maintains dict of named macros for various languages 24 | # contains utility functions that expand macros to produce 25 | # training data input 26 | # 27 | 28 | import logging 29 | 30 | from copy import copy 31 | from past.builtins import basestring 32 | 33 | from nltools.tokenizer import tokenize 34 | 35 | class MacroEngine(object): 36 | 37 | def __init__(self): 38 | self.named_macros = {} 39 | 40 | def add_macro_expansion(self, name, expansion): 41 | if not name in self.named_macros: 42 | self.named_macros[name] = [] 43 | 44 | if isinstance(expansion, dict): 45 | exp = expansion 46 | else: 47 | exp = {'W': expansion} 48 | 49 | self.named_macros[name].append(exp) 50 | 51 | def expand_macros (self, lang, txt): 52 | 53 | logging.debug(u"expand macros : %s" % txt) 54 | 55 | implicit_macros = {} 56 | 57 | txt2 = '' 58 | 59 | i = 0 60 | while i0: 104 | 105 | parts1, cnt, r, mpos, macro_rs = todo.pop() 106 | 107 | if cnt >= len(parts1): 108 | done.append((r, mpos)) 109 | continue 110 | 111 | p1 = parts1[cnt] 112 | 113 | if cnt % 2 == 1: 114 | 115 | sub_parts = p1.split(':') 116 | 117 | if len(sub_parts) != 2: 118 | raise Exception ('syntax error in macro call %s' % repr(p1)) 119 | 120 | name = sub_parts[0] 121 | 122 | if name == 'empty': 123 | todo.append((parts, cnt+1, copy(r), mpos, copy(macro_rs))) 124 | else: 125 | 126 | vn = sub_parts[1] 127 | 128 | if name in macro_rs: 129 | macro = [ macro_rs[name] ] 130 | else: 131 | macro = self.named_macros.get(name, None) 132 | if not macro: 133 | macro = implicit_macros.get(name, None) 134 | if not macro: 135 | raise Exception ('unknown macro "%s" called' % name) 136 | 137 | for r3 in macro: 138 | r1 = copy(r) 139 | mpos1 = copy(mpos) 140 | macro_rs1 = copy(macro_rs) 141 | 142 | macro_rs1[name] = r3 143 | 144 | # take care of multiple invocactions of the same macro 145 | 146 | mpnn = 0 147 | while True: 148 | mpn = '%s_%d_start' % (name, mpnn) 149 | if not mpn in mpos1: 150 | break 151 | mpnn += 1 152 | 153 | mpos1['%s_%d_start' % (name, mpnn)] = len(r1) 154 | s3 = r3[vn] 155 | if isinstance (s3, basestring): 156 | s3 = tokenize (s3, lang=lang) 157 | r3[vn] = s3 158 | r1.extend(r3[vn]) 159 | mpos1['%s_%d_end' % (name, mpnn)] = len(r1) 160 | 161 | for vn3 in r3: 162 | mpos1['%s_%d_%s' % (name, mpnn, vn3.lower())] = r3[vn3] 163 | 164 | todo.append((parts, cnt+1, r1, mpos1, macro_rs1)) 165 | 166 | # if name == 'home_locations': 167 | # import pdb; pdb.set_trace() 168 | 169 | else: 170 | 171 | sub_parts = tokenize(p1, lang=lang, keep_punctuation=False) 172 | 173 | r = copy(r) 174 | r.extend(sub_parts) 175 | 176 | todo.append((parts, cnt+1, r, mpos, macro_rs)) 177 | 178 | return done 179 | 180 | -------------------------------------------------------------------------------- /nltools/misc.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | # 5 | # Copyright 2014, 2015, 2016, 2017 Guenter Bartsch 6 | # 7 | # Licensed under the Apache License, Version 2.0 (the "License"); 8 | # you may not use this file except in compliance with the License. 9 | # You may obtain a copy of the License at 10 | # 11 | # http://www.apache.org/licenses/LICENSE-2.0 12 | # 13 | # Unless required by applicable law or agreed to in writing, software 14 | # distributed under the License is distributed on an "AS IS" BASIS, 15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 | # See the License for the specific language governing permissions and 17 | # limitations under the License. 18 | # 19 | 20 | # 21 | # just a collection of random utility subprograms 22 | # 23 | 24 | import sys 25 | import os 26 | import subprocess 27 | try: 28 | import ConfigParser as configparser 29 | except ImportError: 30 | import configparser 31 | try: 32 | from imp import reload 33 | except ImportError: 34 | pass 35 | import shutil 36 | import errno 37 | import curses 38 | import curses.textpad 39 | import traceback 40 | import logging 41 | import code 42 | import signal 43 | 44 | from setproctitle import setproctitle 45 | from os.path import expanduser 46 | 47 | def load_config(configfn = '.nlprc', defaults={}): 48 | 49 | home_path = expanduser("~") 50 | 51 | config = configparser.ConfigParser(defaults) 52 | config.read("%s/%s" % (home_path, configfn)) 53 | 54 | return config 55 | 56 | def _debug(sig, frame): 57 | """Interrupt running process, and provide a python prompt for 58 | interactive debugging. 59 | 60 | source: http://stackoverflow.com/questions/132058/showing-the-stack-trace-from-a-running-python-application 61 | """ 62 | d={'_frame':frame} # Allow access to frame object. 63 | d.update(frame.f_globals) # Unless shadowed by global 64 | d.update(frame.f_locals) 65 | 66 | i = code.InteractiveConsole(d) 67 | message = "Signal received : entering python shell.\nTraceback:\n" 68 | message += ''.join(traceback.format_stack(frame)) 69 | i.interact(message) 70 | 71 | def init_app (proc_title): 72 | 73 | setproctitle (proc_title) 74 | 75 | if sys.version_info < (3, 0): 76 | reload(sys) 77 | sys.setdefaultencoding('utf-8') 78 | sys.stdout = os.fdopen(sys.stdout.fileno(), 'w', 0) 79 | 80 | # install signal handler so SIGUSR1 will enter pdb 81 | 82 | signal.signal(signal.SIGUSR1, _debug) # Register handler 83 | 84 | 85 | def compress_ws (s): 86 | 87 | vc = True 88 | 89 | res = '' 90 | 91 | for c in s: 92 | 93 | if c == ' ': 94 | vc = False 95 | else: 96 | if vc: 97 | res = res + c 98 | else: 99 | res = res + ' ' + c 100 | vc = True 101 | 102 | return res 103 | 104 | def run_command(command, capture_stderr=True): 105 | p = subprocess.Popen(command, 106 | stdout=subprocess.PIPE, 107 | stderr=subprocess.STDOUT if capture_stderr else subprocess.PIPE) 108 | return iter(p.stdout.readline, b'') 109 | 110 | tex_umlaut_map = { u'ä': '"a', u'ü': '"u', u'ö': '"o', u'Ä':'"A', u'Ü':'"U', u'Ö':'"O', u'ß':'"s' } 111 | 112 | def tex_encode (u): 113 | 114 | s = '' 115 | 116 | for c in u: 117 | 118 | if c in tex_umlaut_map: 119 | s += tex_umlaut_map[c] 120 | else: 121 | s += str(c) 122 | 123 | return s 124 | 125 | def tex_decode (s): 126 | 127 | u = '' 128 | 129 | pos = 0 130 | while (pos < len(s)): 131 | 132 | found = False 133 | 134 | for umlaut in tex_umlaut_map: 135 | v = tex_umlaut_map[umlaut] 136 | if s[pos:].startswith(v): 137 | u += umlaut 138 | pos += len(v) 139 | found = True 140 | break 141 | 142 | if not found: 143 | u += unicode(s[pos]) 144 | pos += 1 145 | 146 | return u 147 | 148 | def symlink(targetfn, linkfn): 149 | try: 150 | os.symlink(targetfn, linkfn) 151 | except OSError as e: 152 | if e.errno == errno.EEXIST: 153 | logging.debug('symlink %s -> %s already exists' % (targetfn, linkfn)) 154 | 155 | def mkdirs(path): 156 | try: 157 | os.makedirs(path) 158 | except OSError as exception: 159 | if exception.errno != errno.EEXIST: 160 | raise 161 | 162 | def copy_file (src, dst): 163 | logging.debug("copying %s to %s" % (src, dst)) 164 | shutil.copy(src, dst) 165 | 166 | 167 | def edit_distance (s, t): 168 | # https://en.wikipedia.org/wiki/Wagner%E2%80%93Fischer_algorithm 169 | 170 | # for all i and j, d[i,j] will hold the Levenshtein distance between 171 | # the first i words of s and the first j words of t; 172 | # note that d has (m+1)x(n+1) values 173 | 174 | m = len(s) 175 | n = len(t) 176 | 177 | d = [[0 for i in range(n+1)] for j in range(m+1)] 178 | 179 | for i in range (m+1): 180 | d[i][0] = i # the distance of any first seq to an empty second seq 181 | for j in range (n+1): 182 | d[0][j] = j # the distance of any second seq to an empty first seq 183 | 184 | for j in range (1, n+1): 185 | for i in range (1, m+1): 186 | 187 | if s[i-1] == t[j-1]: 188 | d[i][j] = d[i-1][j-1] # no operation required 189 | else: 190 | d[i][j] = min ([ 191 | d[i-1][j] + 1, # a deletion 192 | d[i][j-1] + 1, # an insertion 193 | d[i-1][j-1] + 1 # a substitution 194 | ]) 195 | 196 | return d[m][n] 197 | 198 | def limit_str(s, limit): 199 | 200 | l = len(s) 201 | 202 | if l<=limit: 203 | return s 204 | 205 | l = limit-3 206 | 207 | return s[:l] + '...' 208 | 209 | 210 | # 211 | # curses utils 212 | # 213 | 214 | def edit_popup (stdscr, title, s): 215 | 216 | my, mx = stdscr.getmaxyx() 217 | 218 | ww = mx * 9 / 10 219 | wh = 3 220 | 221 | wox = mx / 2 - ww/2 222 | woy = my / 2 - wh/2 223 | 224 | win = curses.newwin(wh, ww, woy, wox) 225 | win.box() 226 | win.addstr(0, 3, title) 227 | 228 | win.refresh() 229 | 230 | swin = win.derwin (1, ww-4, 1, 2) 231 | 232 | tb = curses.textpad.Textbox(swin, insert_mode=True) 233 | 234 | swin.insstr (0, 0, tex_encode(s)) 235 | 236 | swin.refresh() 237 | 238 | s = tex_decode(tb.edit()) 239 | 240 | return s.rstrip() 241 | 242 | def message_popup (stdscr, title, msg): 243 | 244 | my, mx = stdscr.getmaxyx() 245 | 246 | ww = len(title) 247 | 248 | lines = msg.split('\n') 249 | for line in lines: 250 | if len(line)>ww: 251 | ww = len(line) 252 | ww += 6 253 | wh = len(lines) + 2 254 | 255 | wox = mx / 2 - ww/2 256 | woy = my / 2 - wh/2 257 | 258 | win = curses.newwin(wh, ww, woy, wox) 259 | win.box() 260 | win.addstr(0, 3, title.encode('utf8')) 261 | 262 | win.refresh() 263 | 264 | swin = win.derwin (wh-2, ww-4, 1, 1) 265 | 266 | for i, line in enumerate(lines): 267 | swin.insstr (i, 0, line.encode('utf8')) 268 | 269 | swin.refresh() 270 | 271 | return swin 272 | 273 | def render_template(template_file, dst_file, **kwargs): 274 | """Copy template and substitute template strings 275 | 276 | File `template_file` is copied to `dst_file`. Then, each template variable 277 | is replaced by a value. Template variables are of the form 278 | 279 | {{val}} 280 | 281 | Example: 282 | 283 | Contents of template_file: 284 | 285 | VAR1={{val1}} 286 | VAR2={{val2}} 287 | VAR3={{val3}} 288 | 289 | render_template(template_file, output_file, val1="hello", val2="world") 290 | 291 | Contents of output_file: 292 | 293 | VAR1=hello 294 | VAR2=world 295 | VAR3={{val3}} 296 | 297 | :param template_file: Path to the template file. 298 | :param dst_file: Path to the destination file. 299 | :param kwargs: Keys correspond to template variables. 300 | :return: 301 | """ 302 | with open(template_file) as f: 303 | template_text = f.read() 304 | 305 | dst_text = template_text 306 | 307 | for key, value in kwargs.iteritems(): 308 | dst_text = dst_text .replace("{{" + key + "}}", value) 309 | 310 | with open(dst_file, "wt") as f: 311 | f.write(dst_text) 312 | -------------------------------------------------------------------------------- /nltools/phonetics.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | # 5 | # Copyright 2013, 2014, 2016, 2017 Guenter Bartsch 6 | # 7 | # Licensed under the Apache License, Version 2.0 (the "License"); 8 | # you may not use this file except in compliance with the License. 9 | # You may obtain a copy of the License at 10 | # 11 | # http://www.apache.org/licenses/LICENSE-2.0 12 | # 13 | # Unless required by applicable law or agreed to in writing, software 14 | # distributed under the License is distributed on an "AS IS" BASIS, 15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 | # See the License for the specific language governing permissions and 17 | # limitations under the License. 18 | # 19 | 20 | # 21 | # big phoneme table 22 | # 23 | # entries: 24 | # ( IPA, XSAMPA, MARY, ESPEAK ) 25 | # 26 | 27 | MAX_PHONEME_LENGTH = 2 28 | 29 | big_phoneme_table = [ 30 | 31 | # 32 | # stop 33 | # 34 | 35 | ( u'p' , 'p' , 'p', 'p' ), 36 | ( u'b' , 'b' , 'b', 'b' ), 37 | ( u't' , 't' , 't', 't' ), 38 | ( u'd' , 'd' , 'd', 'd' ), 39 | ( u'k' , 'k' , 'k', 'k' ), 40 | ( u'g' , 'g' , 'g', 'g' ), 41 | ( u'ʔ' , '?' , '?', '?' ), 42 | 43 | # 44 | # 2 consonants 45 | # 46 | 47 | ( u'pf' , 'pf' , 'pf' , 'pf' ), 48 | ( u'ts' , 'ts' , 'ts' , 'ts' ), 49 | ( u'tʃ' , 'tS' , 'tS' , 'tS' ), 50 | ( u'dʒ' , 'dZ' , 'dZ' , 'dZ' ), 51 | 52 | # 53 | # fricative 54 | # 55 | 56 | ( u'f' , 'f' , 'f' , 'f' ), 57 | ( u'v' , 'v' , 'v' , 'v' ), 58 | ( u'θ' , 'T' , 'T' , 'T' ), 59 | ( u'ð' , 'D' , 'D' , 'D' ), 60 | ( u's' , 's' , 's' , 's' ), 61 | ( u'z' , 'z' , 'z' , 'z' ), 62 | ( u'ʃ' , 'S' , 'S' , 'S' ), 63 | ( u'ʒ' , 'Z' , 'Z' , 'Z' ), 64 | ( u'ç' , 'C' , 'C' , 'C' ), 65 | ( u'j' , 'j' , 'j' , 'j' ), 66 | ( u'x' , 'x' , 'x' , 'x' ), 67 | ( u'ʁ' , 'R' , 'R' , 'R' ), 68 | ( u'h' , 'h' , 'h' , 'h' ), 69 | ( u'ɥ' , 'H' , 'H' , 'H' ), 70 | 71 | # 72 | # nasal 73 | # 74 | 75 | ( u'm' , 'm' , 'm' , 'm' ), 76 | ( u'n' , 'n' , 'n' , 'n' ), 77 | ( u'ɳ' , 'N' , 'N' , 'N' ), 78 | ( u'ɲ' , 'J' , 'J' , 'J' ), 79 | 80 | # 81 | # liquid 82 | # 83 | 84 | ( u'l' , 'l' , 'l' , 'l' ), 85 | ( u'r' , 'r' , 'r' , 'r' ), 86 | 87 | # 88 | # glide 89 | # 90 | 91 | ( u'w' , 'w' , 'w', 'w' ), 92 | # see above ( u'j' , 'j' , 'j' ), 93 | 94 | # 95 | # vowels: monophongs 96 | # 97 | 98 | # front 99 | ( u'i' , 'i' , 'i' , 'i' ), 100 | ( u'ɪ' , 'I' , 'I' , 'I' ), 101 | ( u'y' , 'y' , 'y' , 'y' ), 102 | ( u'ʏ' , 'Y' , 'Y' , 'y' ), 103 | ( u'e' , 'e' , 'e' , 'e' ), 104 | ( u'ø' , '2' , '2' , 'W' ), 105 | ( u'œ' , '9' , '9' , 'W' ), 106 | ( u'œ̃' , '9~' , '9~' , 'W~' ), 107 | ( u'ɛ' , 'E' , 'E' , 'E' ), 108 | ( u'ɛ̃' , 'E~' , 'E~' , 'E~' ), 109 | ( u'æ' , '{' , '{' , 'a' ), 110 | ( u'a' , 'a' , 'a' , 'a' ), 111 | 112 | # central 113 | ( u'ʌ' , 'V' , 'V' , 'A' ), 114 | ( u'ə' , '@' , '@' , '@' ), 115 | ( u'ɐ' , '6' , '6' , '@' ), 116 | ( u'ɜ' , '3' , 'r=', '3' ), 117 | 118 | # back 119 | ( u'u' , 'u' , 'u' , 'u' ), 120 | ( u'ʊ' , 'U' , 'U' , 'U' ), 121 | ( u'o' , 'o' , 'o' , 'o' ), 122 | ( u'ɔ' , 'O' , 'O' , 'O' ), 123 | ( u'ɔ̃' , 'O~' , 'O~' , 'O~' ), 124 | ( u'ɑ' , 'A' , 'A' , 'A' ), 125 | ( u'ɑ̃' , 'A~' , 'A~' , 'A~' ), 126 | ( u'ɒ' , 'Q' , 'Q' , 'Q' ), 127 | 128 | # diphtongs 129 | 130 | ( u'aɪ' , 'aI' , 'aI' , 'aI' ), 131 | ( u'ɔɪ' , 'OI' , 'OI' , 'OI' ), 132 | ( u'aʊ' , 'aU' , 'aU' , 'aU' ), 133 | ( u'ɔʏ' , 'OY' , 'OY' , 'OY' ), 134 | 135 | # 136 | # misc 137 | # 138 | ( u'ː' , ':' , ':' , ':' ), 139 | ( u'-' , '-' , '-' , '-' ), 140 | ( u'\'' , '\'' , '\'' , '\'' ), 141 | 142 | # 143 | # noise 144 | # 145 | 146 | ( u'#' , '#' , '#' ), 147 | ] 148 | 149 | IPA_normalization = { 150 | u':' : u'ː', 151 | u'?' : u'ʔ', 152 | u'ɾ' : u'ʁ', 153 | u'ɡ' : u'g', 154 | u'ŋ' : u'ɳ', 155 | u' ' : None, 156 | u'(' : None, 157 | u')' : None, 158 | u'\u02c8' : u'\'', 159 | u'\u032f' : None, 160 | u'\u0329' : None, 161 | u'\u02cc' : None, 162 | u'\u200d' : None, 163 | u'\u0279' : None, 164 | u'\u0361' : None, 165 | } 166 | 167 | IPA_vowels = set([ 168 | u'i' , 169 | u'ɪ' , 170 | u'y' , 171 | u'ʏ' , 172 | u'e' , 173 | u'ø' , 174 | u'œ' , 175 | u'ɛ' , 176 | u'æ' , 177 | u'a' , 178 | 179 | # central 180 | u'ʌ' , 181 | u'ə' , 182 | u'ɐ' , 183 | u'ɜ' , 184 | 185 | # back 186 | u'u' , 187 | u'ʊ' , 188 | u'o' , 189 | u'ɔ' , 190 | u'ɑ' , 191 | u'ɒ' , 192 | 193 | # diphtongs 194 | 195 | u'aɪ' , 196 | u'ɔɪ' , 197 | u'aʊ' , 198 | u'ɔʏ' ]) 199 | 200 | XSAMPA_normalization = { 201 | ' ': None, 202 | '0': 'O', 203 | ',': None, 204 | } 205 | 206 | def _normalize (s, norm_table): 207 | 208 | buf = "" 209 | 210 | for c in s: 211 | 212 | if c in norm_table: 213 | 214 | x = norm_table[c] 215 | if x: 216 | buf += x 217 | else: 218 | buf += c 219 | 220 | return buf 221 | 222 | def _translate (graph, s, f_idx, t_idx, spaces=False): 223 | 224 | buf = "" 225 | i = 0 226 | l = len(s) 227 | 228 | while i < l: 229 | 230 | found = False 231 | 232 | for pl in range(MAX_PHONEME_LENGTH, 0, -1): 233 | 234 | if i + pl > l: 235 | continue 236 | 237 | substr = s[i : i+pl ] 238 | 239 | #print u"i: %s, pl: %d, substr: '%s'" % (i, pl, substr) 240 | 241 | for pe in big_phoneme_table: 242 | p_f = pe[f_idx] 243 | p_t = pe[t_idx] 244 | 245 | if substr == p_f: 246 | buf += p_t 247 | i += pl 248 | if i l: 490 | continue 491 | 492 | substr = s[i : i+pl ] 493 | 494 | #print u"i: %s, pl: %d, substr: '%s'" % (i, pl, substr) 495 | 496 | for pe in xs2xa_table: 497 | p_f = pe[0] 498 | p_t = pe[1] 499 | 500 | if substr == p_f: 501 | if len(buf)>0: 502 | buf += ' ' 503 | buf += p_t 504 | i += pl 505 | found = True 506 | break 507 | 508 | if found: 509 | break 510 | 511 | if not found: 512 | 513 | p = s[i] 514 | 515 | msg = u"xsampa2xarpabet: graph:'%s' - s:'%s' Phoneme not found: '%s' (%d) '%s'" % (graph, s, p, ord(p), s[i:]) 516 | 517 | raise Exception (msg.encode('UTF8')) 518 | 519 | return buf 520 | 521 | 522 | -------------------------------------------------------------------------------- /nltools/pulseplayer.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | # 5 | # Copyright 2013, 2014, 2016, 2017, 2018 Guenter Bartsch 6 | # 7 | # Licensed under the Apache License, Version 2.0 (the "License"); 8 | # you may not use this file except in compliance with the License. 9 | # You may obtain a copy of the License at 10 | # 11 | # http://www.apache.org/licenses/LICENSE-2.0 12 | # 13 | # Unless required by applicable law or agreed to in writing, software 14 | # distributed under the License is distributed on an "AS IS" BASIS, 15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 | # See the License for the specific language governing permissions and 17 | # limitations under the License. 18 | # 19 | # 20 | # simple pulseaudio playback client 21 | # 22 | 23 | from io import BytesIO 24 | import wave 25 | import copy 26 | import ctypes 27 | import wave 28 | import sys 29 | import logging 30 | 31 | from builtins import str as text 32 | 33 | from threading import Thread, Lock, Condition 34 | 35 | pa = ctypes.cdll.LoadLibrary('libpulse-simple.so.0') 36 | 37 | PA_STREAM_PLAYBACK = 1 38 | PA_SAMPLE_S16LE = 3 39 | BUFFSIZE = 1024 40 | 41 | # class struct_pa_sample_spec(ctypes.Structure): 42 | # __slots__ = [ 43 | # 'format', 44 | # 'rate', 45 | # 'channels', 46 | # ] 47 | # 48 | # struct_pa_sample_spec._fields_ = [ 49 | # ('format', ctypes.c_int), 50 | # ('rate', ctypes.c_uint32), 51 | # ('channels', ctypes.c_uint8), 52 | # ] 53 | # pa_sample_spec = struct_pa_sample_spec # /usr/include/pulse/sample.h:174 54 | 55 | 56 | class pa_sample_spec(ctypes.Structure): 57 | _fields_ = [ 58 | ('format', ctypes.c_int), 59 | ('rate', ctypes.c_uint32), 60 | ('channels', ctypes.c_uint8), 61 | ] 62 | 63 | pa_simple_new = pa.pa_simple_new 64 | pa_simple_new.restype = ctypes.c_void_p # pointer(pa_simple) 65 | pa_simple_new.argtypes = [ 66 | ctypes.c_char_p, # server 67 | ctypes.c_char_p, # name, 68 | ctypes.c_int, # dir, 69 | ctypes.c_char_p, # dev, 70 | ctypes.c_char_p, # stream_name, 71 | ctypes.POINTER( pa_sample_spec ), # ss, 72 | ctypes.c_void_p, # pointer( pa_channel_map ), # map, 73 | ctypes.c_void_p, # pointer( pa_buffer_attr ), # attr, 74 | ctypes.POINTER(ctypes.c_int), # error 75 | ] 76 | 77 | pa_simple_write = pa.pa_simple_write 78 | pa_simple_write.restype = ctypes.c_int 79 | pa_simple_write.argtypes = [ 80 | ctypes.c_void_p, # s 81 | ctypes.c_void_p, # data, 82 | ctypes.c_size_t, # bytes, 83 | ctypes.POINTER(ctypes.c_int), # error 84 | ] 85 | 86 | pa_simple_drain = pa.pa_simple_drain 87 | pa_simple_drain.restype = ctypes.c_int 88 | pa_simple_drain.argtypes = [ 89 | ctypes.c_void_p, # s 90 | ctypes.POINTER(ctypes.c_int), # error 91 | ] 92 | 93 | pa_simple_free = pa.pa_simple_free 94 | pa_simple_free.restype = None 95 | pa_simple_free.argtypes = [ ctypes.c_void_p ] 96 | 97 | class PulsePlayer: 98 | 99 | def __init__(self, name): 100 | self.name = text(name) 101 | self.playing = False 102 | self.terminate = False 103 | self.thread = None 104 | self.lock = Lock() 105 | self.cond = Condition(self.lock) 106 | 107 | def _play_loop(self): 108 | 109 | logging.debug("_play_loop starts, a_sound: %d bytes" % len(self.a_sound)) 110 | 111 | while not self.terminate: 112 | #latency = pa.pa_simple_get_latency(s, error) 113 | #if latency == -1: 114 | # raise Exception('Getting latency failed!') 115 | 116 | #print('{0} usec'.format(latency)) 117 | 118 | # Reading frames and writing to the stream. 119 | buf = self.wf.readframes(BUFFSIZE) 120 | if not buf: 121 | break 122 | 123 | # logging.debug("_play_loop len: %d self.s: %s" % (len(buf), repr(self.s))) 124 | 125 | if pa_simple_write(self.s, buf, len(buf), ctypes.byref(self.error)): 126 | raise Exception('Could not play file, error: %d!' % self.error.value) 127 | 128 | self.wf.close() 129 | 130 | if pa_simple_drain(self.s, ctypes.byref(self.error)): 131 | raise Exception('Could not simple drain!') 132 | 133 | # Freeing resources and closing connection. 134 | logging.debug ('pa.pa_simple_free %s...' % repr(self.s)) 135 | pa_simple_free(self.s) 136 | 137 | self.lock.acquire() 138 | try: 139 | self.playing = False 140 | self.cond.notifyAll() 141 | finally: 142 | self.lock.release() 143 | 144 | def play(self, a_sound, async=True): 145 | 146 | logging.debug("play starts, async: %s" % repr(async)) 147 | 148 | self.lock.acquire() 149 | try: 150 | self.terminate = True 151 | while self.playing: 152 | self.cond.wait() 153 | 154 | if self.thread: 155 | self.thread.join() 156 | self.thread = None 157 | 158 | self.terminate = False 159 | self.playing = True 160 | self.a_sound = copy.copy(a_sound) 161 | 162 | self.wf = wave.open(BytesIO(self.a_sound), 'rb') 163 | 164 | self.ss = pa_sample_spec() 165 | 166 | self.ss.rate = self.wf.getframerate() 167 | self.ss.channels = self.wf.getnchannels() 168 | self.ss.format = PA_SAMPLE_S16LE 169 | 170 | # logging.debug("frame rate: %d, channels: %d" % (self.ss.rate, self.ss.channels)) 171 | 172 | self.error = ctypes.c_int(0) 173 | 174 | self.s = pa_simple_new( 175 | None, # Default server. 176 | self.name.encode('utf8'),# Application's name. 177 | PA_STREAM_PLAYBACK, # Stream for playback. 178 | None, # Default device. 179 | b'playback', # Stream's description. 180 | ctypes.byref(self.ss), # Sample format. 181 | None, # Default channel map. 182 | None, # Default buffering attributes. 183 | ctypes.byref(self.error) # Ignore error code. 184 | ) 185 | if not self.s: 186 | raise Exception('Could not create pulse audio stream: {0}!'.format( 187 | pa.strerror(ctypes.byref(self.error)))) 188 | 189 | logging.debug ('pa_simple_new done, self.s: %s' % repr(self.s)) 190 | 191 | finally: 192 | self.lock.release() 193 | 194 | self.thread = Thread(target=self._play_loop) 195 | self.thread.start() 196 | 197 | if not async: 198 | # wait for player to finish 199 | self.lock.acquire() 200 | try: 201 | while self.playing: 202 | self.cond.wait() 203 | 204 | self.thread.join() 205 | self.thread = None 206 | finally: 207 | self.lock.release() 208 | 209 | -------------------------------------------------------------------------------- /nltools/pulserecorder.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | # 5 | # Copyright 2013, 2014, 2016, 2017, 2018 Guenter Bartsch 6 | # 7 | # Licensed under the Apache License, Version 2.0 (the "License"); 8 | # you may not use this file except in compliance with the License. 9 | # You may obtain a copy of the License at 10 | # 11 | # http://www.apache.org/licenses/LICENSE-2.0 12 | # 13 | # Unless required by applicable law or agreed to in writing, software 14 | # distributed under the License is distributed on an "AS IS" BASIS, 15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 | # See the License for the specific language governing permissions and 17 | # limitations under the License. 18 | # 19 | # 20 | # simple pulseaudio recording client 21 | # 22 | # based on: http://freshfoo.com/blog/pulseaudio_monitoring 23 | 24 | import ctypes 25 | import threading 26 | import logging 27 | import time 28 | 29 | import numpy as np 30 | from builtins import str as text, range 31 | from nltools.vad import BUFFER_DURATION 32 | 33 | SOURCE_TIMEOUT = 30 # 3 seconds 34 | 35 | PA_INVALID_INDEX = 4294967295 # ((uint32_t) -1) 36 | 37 | pa = ctypes.cdll.LoadLibrary('libpulse.so.0') 38 | 39 | class pa_proplist(ctypes.Structure): 40 | pass 41 | pa_encoding = ctypes.c_int # enum 42 | pa_encoding_t = pa_encoding 43 | class pa_format_info(ctypes.Structure): 44 | pass 45 | pa_format_info._fields_ = [ 46 | ('encoding', pa_encoding_t), 47 | ('plist', ctypes.POINTER(pa_proplist)), 48 | ] 49 | class pa_context(ctypes.Structure): 50 | pass 51 | pa_context._fields_ = [ ] 52 | pa_context_notify_cb_t = ctypes.CFUNCTYPE(None, ctypes.POINTER(pa_context), ctypes.c_void_p) 53 | pa_context_success_cb_t = ctypes.CFUNCTYPE(None, ctypes.POINTER(pa_context), ctypes.c_int, ctypes.c_void_p) 54 | 55 | pa_sample_format = ctypes.c_int # enum 56 | pa_sample_format_t = pa_sample_format 57 | pa_format_info_set_sample_format = pa.pa_format_info_set_sample_format 58 | pa_format_info_set_sample_format.restype = None 59 | pa_format_info_set_sample_format.argtypes = [ctypes.POINTER(pa_format_info), pa_sample_format_t] 60 | class pa_sink_port_info(ctypes.Structure): 61 | pass 62 | pa_sink_port_info._fields_ = [ 63 | ('name', ctypes.c_char_p), 64 | ('description', ctypes.c_char_p), 65 | ('priority', ctypes.c_uint32), 66 | ('available', ctypes.c_int), 67 | ] 68 | class pa_sink_info(ctypes.Structure): 69 | pass 70 | class pa_sample_spec(ctypes.Structure): 71 | pass 72 | pa_sample_spec._fields_ = [ 73 | ('format', pa_sample_format_t), 74 | ('rate', ctypes.c_uint32), 75 | ('channels', ctypes.c_uint8), 76 | ] 77 | class pa_source_info(ctypes.Structure): 78 | pass 79 | pa_channel_position = ctypes.c_int # enum 80 | pa_channel_position_t = pa_channel_position 81 | class pa_channel_map(ctypes.Structure): 82 | pass 83 | pa_channel_map._fields_ = [ 84 | ('channels', ctypes.c_uint8), 85 | ('map', pa_channel_position_t * 32), 86 | ] 87 | class pa_cvolume(ctypes.Structure): 88 | pass 89 | pa_volume_t = ctypes.c_uint32 90 | pa_cvolume._fields_ = [ 91 | ('channels', ctypes.c_uint8), 92 | ('values', pa_volume_t * 32), 93 | ] 94 | pa_source_flags = ctypes.c_int # enum 95 | pa_source_flags_t = pa_source_flags 96 | pa_source_state = ctypes.c_int # enum 97 | pa_source_state_t = pa_source_state 98 | class pa_source_port_info(ctypes.Structure): 99 | pass 100 | pa_source_port_info._fields_ = [ 101 | ('name', ctypes.c_char_p), 102 | ('description', ctypes.c_char_p), 103 | ('priority', ctypes.c_uint32), 104 | ('available', ctypes.c_int), 105 | ] 106 | pa_source_info._fields_ = [ 107 | ('name', ctypes.c_char_p), 108 | ('index', ctypes.c_uint32), 109 | ('description', ctypes.c_char_p), 110 | ('sample_spec', pa_sample_spec), 111 | ('channel_map', pa_channel_map), 112 | ('owner_module', ctypes.c_uint32), 113 | ('volume', pa_cvolume), 114 | ('mute', ctypes.c_int), 115 | ('monitor_of_sink', ctypes.c_uint32), 116 | ('monitor_of_sink_name', ctypes.c_char_p), 117 | ('latency', ctypes.c_uint64), 118 | ('driver', ctypes.c_char_p), 119 | ('flags', pa_source_flags_t), 120 | ('proplist', ctypes.POINTER(pa_proplist)), 121 | ('configured_latency', ctypes.c_uint64), 122 | ('base_volume', pa_volume_t), 123 | ('state', pa_source_state_t), 124 | ('n_volume_steps', ctypes.c_uint32), 125 | ('card', ctypes.c_uint32), 126 | ('n_ports', ctypes.c_uint32), 127 | ('ports', ctypes.POINTER(ctypes.POINTER(pa_source_port_info))), 128 | ('active_port', ctypes.POINTER(pa_source_port_info)), 129 | ('n_formats', ctypes.c_uint8), 130 | ('formats', ctypes.POINTER(ctypes.POINTER(pa_format_info))), 131 | ] 132 | pa_source_info_cb_t = ctypes.CFUNCTYPE(None, ctypes.POINTER(pa_context), ctypes.POINTER(pa_source_info), ctypes.c_int, ctypes.c_void_p) 133 | class pa_stream(ctypes.Structure): 134 | pass 135 | pa_stream._fields_ = [ 136 | ] 137 | pa_stream_request_cb_t = ctypes.CFUNCTYPE(None, ctypes.POINTER(pa_stream), ctypes.c_size_t, ctypes.c_void_p) 138 | 139 | class pa_threaded_mainloop(ctypes.Structure): 140 | pass 141 | pa_threaded_mainloop._fields_ = [ 142 | ] 143 | pa_threaded_mainloop_new = pa.pa_threaded_mainloop_new 144 | pa_threaded_mainloop_new.restype = ctypes.POINTER(pa_threaded_mainloop) 145 | pa_threaded_mainloop_new.argtypes = [] 146 | 147 | class pa_mainloop_api(ctypes.Structure): 148 | pass 149 | pa_threaded_mainloop_get_api = pa.pa_threaded_mainloop_get_api 150 | pa_threaded_mainloop_get_api.restype = ctypes.POINTER(pa_mainloop_api) 151 | pa_threaded_mainloop_get_api.argtypes = [ctypes.POINTER(pa_threaded_mainloop)] 152 | 153 | pa_context_new = pa.pa_context_new 154 | pa_context_new.restype = ctypes.POINTER(pa_context) 155 | pa_context_new.argtypes = [ctypes.POINTER(pa_mainloop_api), ctypes.c_char_p] 156 | 157 | pa_context_set_state_callback = pa.pa_context_set_state_callback 158 | pa_context_set_state_callback.restype = None 159 | pa_context_set_state_callback.argtypes = [ctypes.POINTER(pa_context), pa_context_notify_cb_t, ctypes.c_void_p] 160 | 161 | pa_context_flags = ctypes.c_int # enum 162 | pa_context_flags_t = pa_context_flags 163 | 164 | class pa_spawn_api(ctypes.Structure): 165 | pass 166 | 167 | pa_context_connect = pa.pa_context_connect 168 | pa_context_connect.restype = ctypes.c_int 169 | pa_context_connect.argtypes = [ctypes.POINTER(pa_context), ctypes.c_char_p, pa_context_flags_t, ctypes.POINTER(pa_spawn_api)] 170 | 171 | pa_threaded_mainloop_start = pa.pa_threaded_mainloop_start 172 | pa_threaded_mainloop_start.restype = ctypes.c_int 173 | pa_threaded_mainloop_start.argtypes = [ctypes.POINTER(pa_threaded_mainloop)] 174 | 175 | pa_threaded_mainloop_lock = pa.pa_threaded_mainloop_lock 176 | pa_threaded_mainloop_lock.restype = None 177 | pa_threaded_mainloop_lock.argtypes = [ctypes.POINTER(pa_threaded_mainloop)] 178 | 179 | pa_context_disconnect = pa.pa_context_disconnect 180 | pa_context_disconnect.restype = None 181 | pa_context_disconnect.argtypes = [ctypes.POINTER(pa_context)] 182 | 183 | pa_context_unref = pa.pa_context_unref 184 | pa_context_unref.restype = None 185 | pa_context_unref.argtypes = [ctypes.POINTER(pa_context)] 186 | 187 | pa_threaded_mainloop_unlock = pa.pa_threaded_mainloop_unlock 188 | pa_threaded_mainloop_unlock.restype = None 189 | pa_threaded_mainloop_unlock.argtypes = [ctypes.POINTER(pa_threaded_mainloop)] 190 | 191 | pa_threaded_mainloop_stop = pa.pa_threaded_mainloop_stop 192 | pa_threaded_mainloop_stop.restype = None 193 | pa_threaded_mainloop_stop.argtypes = [ctypes.POINTER(pa_threaded_mainloop)] 194 | 195 | pa_threaded_mainloop_free = pa.pa_threaded_mainloop_free 196 | pa_threaded_mainloop_free.restype = None 197 | pa_threaded_mainloop_free.argtypes = [ctypes.POINTER(pa_threaded_mainloop)] 198 | 199 | pa_context_get_state = pa.pa_context_get_state 200 | pa_context_get_state.restype = ctypes.c_int 201 | pa_context_get_state.argtypes = [ctypes.POINTER(pa_context)] 202 | 203 | PA_CONTEXT_NOFLAGS = 0 204 | PA_CONTEXT_NOFAIL = 2 205 | PA_CONTEXT_NOAUTOSPAWN = 1 206 | 207 | PA_CONTEXT_UNCONNECTED = 0 208 | PA_CONTEXT_CONNECTING = 1 209 | PA_CONTEXT_AUTHORIZING = 2 210 | PA_CONTEXT_READY = 4 211 | PA_CONTEXT_FAILED = 5 212 | PA_CONTEXT_TERMINATED = 6 213 | 214 | class pa_operation(ctypes.Structure): 215 | pass 216 | pa_context_get_source_info_list = pa.pa_context_get_source_info_list 217 | pa_context_get_source_info_list.restype = ctypes.POINTER(pa_operation) 218 | pa_context_get_source_info_list.argtypes = [ctypes.POINTER(pa_context), pa_source_info_cb_t, ctypes.c_void_p] 219 | 220 | PA_VOLUME_NORM = 65536 221 | 222 | pa_context_set_source_volume_by_index = pa.pa_context_set_source_volume_by_index 223 | pa_context_set_source_volume_by_index.restype = ctypes.POINTER(pa_operation) 224 | pa_context_set_source_volume_by_index.argtypes = [ctypes.POINTER(pa_context), ctypes.c_uint32, ctypes.POINTER(pa_cvolume), pa_context_success_cb_t, ctypes.c_void_p] 225 | 226 | pa_operation_unref = pa.pa_operation_unref 227 | pa_operation_unref.restype = None 228 | pa_operation_unref.argtypes = [ctypes.POINTER(pa_operation)] 229 | 230 | PA_SAMPLE_INVALID = -1 231 | PA_SAMPLE_U8 = 0 232 | PA_SAMPLE_ALAW = 1 233 | PA_SAMPLE_ULAW = 2 234 | PA_SAMPLE_S16LE = 3 235 | PA_SAMPLE_S16BE = 4 236 | PA_SAMPLE_FLOAT32LE = 5 237 | PA_SAMPLE_FLOAT32BE = 6 238 | PA_SAMPLE_S32LE = 7 239 | PA_SAMPLE_S32BE = 8 240 | PA_SAMPLE_S24LE = 9 241 | PA_SAMPLE_S24BE = 10 242 | PA_SAMPLE_S24_32LE = 11 243 | PA_SAMPLE_S24_32BE = 12 244 | PA_SAMPLE_MAX = 13 245 | 246 | pa_stream_new = pa.pa_stream_new 247 | pa_stream_new.restype = ctypes.POINTER(pa_stream) 248 | pa_stream_new.argtypes = [ctypes.POINTER(pa_context), ctypes.c_char_p, ctypes.POINTER(pa_sample_spec), ctypes.POINTER(pa_channel_map)] 249 | 250 | pa_stream_set_read_callback = pa.pa_stream_set_read_callback 251 | pa_stream_set_read_callback.restype = None 252 | pa_stream_set_read_callback.argtypes = [ctypes.POINTER(pa_stream), pa_stream_request_cb_t, ctypes.c_void_p] 253 | 254 | PA_STREAM_ADJUST_LATENCY = 8192 255 | 256 | pa_stream_flags = ctypes.c_int # enum 257 | pa_stream_flags_t = pa_stream_flags 258 | class pa_buffer_attr(ctypes.Structure): 259 | pass 260 | pa_buffer_attr._fields_ = [ 261 | ('maxlength', ctypes.c_uint32), 262 | ('tlength', ctypes.c_uint32), 263 | ('prebuf', ctypes.c_uint32), 264 | ('minreq', ctypes.c_uint32), 265 | ('fragsize', ctypes.c_uint32), 266 | ] 267 | 268 | pa_stream_connect_record = pa.pa_stream_connect_record 269 | pa_stream_connect_record.restype = ctypes.c_int 270 | pa_stream_connect_record.argtypes = [ctypes.POINTER(pa_stream), ctypes.c_char_p, ctypes.POINTER(pa_buffer_attr), pa_stream_flags_t] 271 | 272 | pa_stream_peek = pa.pa_stream_peek 273 | pa_stream_peek.restype = ctypes.c_int 274 | pa_stream_peek.argtypes = [ctypes.POINTER(pa_stream), ctypes.POINTER(ctypes.c_void_p), ctypes.POINTER(ctypes.c_size_t)] 275 | 276 | pa_stream_drop = pa.pa_stream_drop 277 | pa_stream_drop.restype = ctypes.c_int 278 | pa_stream_drop.argtypes = [ctypes.POINTER(pa_stream)] 279 | 280 | def null_cb(a=None, b=None, c=None, d=None): 281 | return 282 | 283 | MIX_MODE_BOTH = 0 284 | MIX_MODE_LEFT = 1 285 | MIX_MODE_RIGHT = 2 286 | 287 | DEFAULT_VOLUME = 100 288 | DEFAULT_RATE = 16000 289 | DEFAULT_NAME = b'Python PulseRecorder' 290 | DEFAULT_FRAMES_PER_BUFFER = int(DEFAULT_RATE * BUFFER_DURATION / 1000) 291 | DEFAULT_MIX_MODE = MIX_MODE_BOTH 292 | 293 | class PulseRecorder(object): 294 | 295 | def __init__(self, volume=DEFAULT_VOLUME, rate=DEFAULT_RATE, source_name=None): 296 | self.match_source_name = source_name 297 | self.rate = rate 298 | self.volume = volume 299 | self.source_idx = -1 300 | self.source_score = 0 301 | self.source_log = False 302 | self.source_name = '' 303 | self.source_description = '' 304 | 305 | # Wrap callback methods in appropriate ctypefunc instances so 306 | # that the Pulseaudio C API can call them 307 | self._context_notify_cb = pa_context_notify_cb_t(self.context_notify_cb) 308 | self._source_info_cb = pa_source_info_cb_t(self.source_info_cb) 309 | self._stream_read_cb = pa_stream_request_cb_t(self.stream_read_cb) 310 | self._null_cb = pa_context_success_cb_t(null_cb) 311 | 312 | # lock/cond for buffers 313 | 314 | self._lock = threading.Lock() 315 | self._cond = threading.Condition(self._lock) 316 | 317 | def start_recording(self, frames_per_buffer = DEFAULT_FRAMES_PER_BUFFER, mix_mode = DEFAULT_MIX_MODE): 318 | 319 | logging.debug("start_recording...") 320 | 321 | self._frames_per_buffer = frames_per_buffer 322 | self._mix_mode = mix_mode 323 | self._record_stereo = mix_mode != MIX_MODE_BOTH 324 | self._buffers = [] 325 | self._cur_buf_cnt = 0 326 | self.source_idx = -1 327 | self.source_score = 0 328 | self.source_log = False 329 | self.source_name = '' 330 | self.source_description = '' 331 | 332 | self._buffers.append(np.empty(self._frames_per_buffer, dtype=np.int16)) 333 | 334 | self._mainloop = pa_threaded_mainloop_new() 335 | _mainloop_api = pa_threaded_mainloop_get_api(self._mainloop) 336 | self._context = pa_context_new(_mainloop_api, DEFAULT_NAME) 337 | 338 | pa_context_set_state_callback(self._context, self._context_notify_cb, None) 339 | pa_context_connect(self._context, None, 0, None) 340 | 341 | pa_threaded_mainloop_start(self._mainloop) 342 | 343 | # wait for audio source detection 344 | cnt = 0 345 | while (self.source_idx < 0) and (cnt < SOURCE_TIMEOUT): 346 | cnt += 1 347 | time.sleep (0.1) 348 | if self.source_idx < 0: 349 | raise Exception ("Pulserecorder: no suitable input source found.") 350 | 351 | 352 | def stop_recording(self): 353 | 354 | logging.debug("stop_recording...") 355 | 356 | pa_threaded_mainloop_lock(self._mainloop) 357 | pa_context_disconnect(self._context) 358 | pa_context_unref(self._context) 359 | pa_threaded_mainloop_unlock(self._mainloop) 360 | 361 | pa_threaded_mainloop_stop(self._mainloop) 362 | pa_threaded_mainloop_free(self._mainloop) 363 | 364 | self.source_idx = -1 365 | 366 | def context_notify_cb(self, context, _): 367 | state = pa_context_get_state(context) 368 | 369 | if state == PA_CONTEXT_READY: 370 | logging.debug("Pulseaudio connection ready...") 371 | o = pa_context_get_source_info_list(context, self._source_info_cb, None) 372 | pa_operation_unref(o) 373 | 374 | elif state == PA_CONTEXT_FAILED : 375 | logging.error("Connection failed") 376 | 377 | elif state == PA_CONTEXT_TERMINATED: 378 | logging.debug("Connection terminated") 379 | 380 | def source_info_cb(self, context, source_info_p, eol, __): 381 | logging.debug("source_info_cb... eol: %d" % eol) 382 | 383 | if eol: 384 | if not self.source_log: 385 | logging.info(u'audio source: %s' % self.source_description.decode('utf8','ignore')) 386 | logging.debug(u'name: %s' % text(self.source_name) ) 387 | self.source_log = True 388 | 389 | if self.source_idx < 0: 390 | logging.error ("Pulserecorder: no suitable input source found.") 391 | 392 | # 393 | # set volume first 394 | # 395 | 396 | cvol = pa_cvolume() 397 | cvol.channels = 1 398 | cvol.values[0] = int((self.volume * PA_VOLUME_NORM) / 100) 399 | 400 | operation = pa_context_set_source_volume_by_index (self._context, self.source_idx, cvol, self._null_cb, None) 401 | pa_operation_unref(operation) 402 | 403 | logging.debug('recording from %s' % self.source_name) 404 | 405 | samplespec = pa_sample_spec() 406 | samplespec.channels = 2 if self._record_stereo else 1 407 | samplespec.format = PA_SAMPLE_S16LE 408 | samplespec.rate = self.rate 409 | 410 | pa_stream = pa_stream_new(context, b"pulserecorder", samplespec, None) 411 | pa_stream_set_read_callback(pa_stream, 412 | self._stream_read_cb, 413 | self.source_idx) 414 | 415 | # flags = PA_STREAM_NOFLAGS 416 | flags = PA_STREAM_ADJUST_LATENCY 417 | 418 | # buffer_attr = None 419 | fragsize = self._frames_per_buffer*2 420 | if self._record_stereo: 421 | fragsize *= 2 422 | buffer_attr = pa_buffer_attr(-1, -1, -1, -1, fragsize=fragsize) 423 | 424 | pa_stream_connect_record(pa_stream, 425 | self.source_name, 426 | buffer_attr, 427 | flags) 428 | 429 | if not source_info_p: 430 | return 431 | 432 | source_info = source_info_p.contents 433 | 434 | logging.debug('index : %d' % source_info.index) 435 | logging.debug('name : %s' % source_info.name) 436 | logging.debug('description : %s' % source_info.description) 437 | logging.debug('monitor of : %d' % source_info.monitor_of_sink) 438 | 439 | if source_info.monitor_of_sink != PA_INVALID_INDEX: 440 | logging.debug("ignoring source: monitor") 441 | return 442 | 443 | score = 1 444 | 445 | if self.match_source_name and (text(self.match_source_name) in text(source_info.description)): 446 | score += 100 447 | 448 | 449 | # microphone source auto-detection magic 450 | 451 | # import pdb; pdb.set_trace() 452 | 453 | if source_info.ports: 454 | score += 1 455 | 456 | mic_port = False 457 | for pi in range(source_info.n_ports): 458 | if text('mic') in text(source_info.ports[pi].contents.name): 459 | logging.debug("mic port found") 460 | score += 1 461 | break 462 | 463 | logging.debug('source score: %d, highest score so far: %d' % (score, self.source_score)) 464 | 465 | if score > self.source_score: 466 | 467 | self.source_idx = source_info.index 468 | self.source_score = score 469 | self.source_name = source_info.name 470 | self.source_description = source_info.description 471 | 472 | 473 | def stream_read_cb(self, stream, length, index_incr): 474 | data = ctypes.c_void_p() 475 | pa_stream_peek(stream, data, ctypes.c_ulong(length)) 476 | data = ctypes.cast(data, ctypes.POINTER(ctypes.c_ubyte)) 477 | 478 | self._lock.acquire() 479 | 480 | bytes_per_sample = 4 if self._record_stereo else 2 481 | num_samples = int(length / bytes_per_sample) 482 | 483 | for i in range(num_samples): 484 | 485 | if self._mix_mode == MIX_MODE_BOTH: 486 | off_low = 0 487 | off_high = 1 488 | elif self._mix_mode == MIX_MODE_LEFT: 489 | off_low = 0 490 | off_high = 1 491 | elif self._mix_mode == MIX_MODE_RIGHT: 492 | off_low = 2 493 | off_high = 3 494 | 495 | sample = data[i*bytes_per_sample +off_low ] + 256 * data[i*bytes_per_sample+off_high] 496 | 497 | self._buffers[len(self._buffers)-1][self._cur_buf_cnt] = sample 498 | self._cur_buf_cnt += 1 499 | 500 | # buffer full? 501 | if self._cur_buf_cnt >= self._frames_per_buffer: 502 | 503 | self._buffers.append(np.empty(self._frames_per_buffer, dtype=np.int16)) 504 | self._cur_buf_cnt = 0 505 | 506 | self._cond.notifyAll() 507 | 508 | 509 | self._lock.release() 510 | 511 | pa_stream_drop(stream) 512 | 513 | 514 | def get_samples(self): 515 | 516 | self._lock.acquire() 517 | 518 | buf = None 519 | while len(self._buffers) < 2: 520 | self._cond.wait() 521 | 522 | buf = self._buffers.pop(0) 523 | 524 | self._lock.release() 525 | 526 | return buf 527 | 528 | -------------------------------------------------------------------------------- /nltools/sequiturclient.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | # 5 | # Copyright 2016, 2017 Guenter Bartsch 6 | # 7 | # Licensed under the Apache License, Version 2.0 (the "License"); 8 | # you may not use this file except in compliance with the License. 9 | # You may obtain a copy of the License at 10 | # 11 | # http://www.apache.org/licenses/LICENSE-2.0 12 | # 13 | # Unless required by applicable law or agreed to in writing, software 14 | # distributed under the License is distributed on an "AS IS" BASIS, 15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 | # See the License for the specific language governing permissions and 17 | # limitations under the License. 18 | # 19 | # 20 | # crude sequitur g2p interface 21 | # 22 | 23 | import logging 24 | import tempfile 25 | import traceback 26 | 27 | import misc 28 | 29 | from phonetics import xsampa2ipa 30 | 31 | def sequitur_gen_ipa(modelfn, word): 32 | 33 | ipa = u'' 34 | 35 | with tempfile.NamedTemporaryFile() as f: 36 | 37 | f.write((u'%s\n' % word).encode('utf8')) 38 | f.flush() 39 | 40 | cmd = ['g2p.py', '--encoding=UTF8', '--model', modelfn, '--apply', f.name] 41 | 42 | res = misc.run_command(cmd) 43 | 44 | logging.debug('%s' % ' '.join(cmd)) 45 | 46 | for l in res: 47 | 48 | line = l.strip() 49 | 50 | logging.debug('%s' % line) 51 | 52 | if 'stack usage:' in line: 53 | continue 54 | 55 | if word in line.decode('utf8', errors='ignore'): 56 | parts = line.split('\t') 57 | 58 | if len(parts) < 2: 59 | continue 60 | 61 | xs = parts[1] 62 | # print 'XS', xs 63 | 64 | ipa = xsampa2ipa(word, xs) 65 | 66 | return ipa 67 | 68 | def sequitur_gen_ipa_multi(modelfn, words): 69 | 70 | ipa_map ={} 71 | 72 | with tempfile.NamedTemporaryFile() as f: 73 | 74 | for word in words: 75 | f.write((u'%s\n' % word).encode('utf8')) 76 | f.flush() 77 | 78 | cmd = ['g2p.py', '--encoding=UTF8', '--model', modelfn, '--apply', f.name] 79 | 80 | res = misc.run_command(cmd, capture_stderr=False) 81 | 82 | logging.debug('%s' % ' '.join(cmd)) 83 | 84 | for l in res: 85 | 86 | line = l.strip() 87 | 88 | logging.debug('%s' % line) 89 | 90 | if 'stack usage:' in line: 91 | continue 92 | 93 | parts = line.decode('utf8', errors='ignore').split('\t') 94 | 95 | if len(parts) < 2: 96 | continue 97 | 98 | try: 99 | word = parts[0] 100 | if word in words: 101 | 102 | xs = parts[1] 103 | # print 'XS', xs 104 | 105 | ipa = xsampa2ipa(word, xs) 106 | ipa_map[word] = ipa 107 | except: 108 | logging.error("Error processing line %s:" % line) 109 | logging.error(traceback.format_exc()) 110 | 111 | return ipa_map 112 | 113 | -------------------------------------------------------------------------------- /nltools/threadpool.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | # 5 | # Copyright 2017 Guenter Bartsch 6 | # 7 | # Licensed under the Apache License, Version 2.0 (the "License"); 8 | # you may not use this file except in compliance with the License. 9 | # You may obtain a copy of the License at 10 | # 11 | # http://www.apache.org/licenses/LICENSE-2.0 12 | # 13 | # Unless required by applicable law or agreed to in writing, software 14 | # distributed under the License is distributed on an "AS IS" BASIS, 15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 | # See the License for the specific language governing permissions and 17 | # limitations under the License. 18 | # 19 | # 20 | # A simple thread pool implementation 21 | # 22 | from __future__ import print_function 23 | 24 | import traceback 25 | import logging 26 | 27 | from Queue import Queue, Empty 28 | from threading import Thread, Lock 29 | 30 | class Worker(Thread): 31 | """Thread executing tasks from a given tasks queue""" 32 | def __init__(self, tasks, idx): 33 | Thread.__init__(self) 34 | self.tasks = tasks 35 | self.idx = idx 36 | #self.daemon = True 37 | self.finish = False 38 | self.start() 39 | 40 | def run(self): 41 | while not self.finish: 42 | 43 | # print "worker #%2d" % self.idx 44 | 45 | try: 46 | func, args, kargs = self.tasks.get(True, 0.1) 47 | try: 48 | func(*args, **kargs) 49 | except: 50 | logging.error('ThreadPool Worker caught exception: %s' % traceback.format_exc()) 51 | traceback.print_exc() 52 | finally: 53 | self.tasks.task_done() 54 | 55 | except Empty: 56 | # print "worker #%2d empty" % self.idx 57 | pass 58 | 59 | 60 | class ThreadPool: 61 | """Pool of threads consuming tasks from a queue""" 62 | def __init__(self, num_threads): 63 | self.tasks = Queue() 64 | self.terminal_lock = Lock() 65 | self.workers = [] 66 | for idx in range(num_threads): 67 | self.workers.append(Worker(self.tasks, idx)) 68 | 69 | def add_task(self, func, *args, **kargs): 70 | """Add a task to the queue""" 71 | self.tasks.put((func, args, kargs)) 72 | 73 | def print_synced(self, s): 74 | self.terminal_lock.acquire() 75 | print(s) 76 | self.terminal_lock.release() 77 | 78 | def shutdown(self): 79 | print("shutdown: tasks.join...") 80 | self.tasks.join() 81 | print("shutdown: tasks.join...done. finishing workers...") 82 | # for worker in self.workers: 83 | # worker.finish = True 84 | # worker.join() 85 | 86 | print("shutdown complete.") 87 | 88 | 89 | -------------------------------------------------------------------------------- /nltools/tts.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | # 5 | # Copyright 2016, 2017, 2018 Guenter Bartsch 6 | # 7 | # Licensed under the Apache License, Version 2.0 (the "License"); 8 | # you may not use this file except in compliance with the License. 9 | # You may obtain a copy of the License at 10 | # 11 | # http://www.apache.org/licenses/LICENSE-2.0 12 | # 13 | # Unless required by applicable law or agreed to in writing, software 14 | # distributed under the License is distributed on an "AS IS" BASIS, 15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 | # See the License for the specific language governing permissions and 17 | # limitations under the License. 18 | # 19 | 20 | # 21 | # Abstraction layer for multiple TTS engines (Mary TTS, SVOX Pico TTS and eSpeak NG at the moment) 22 | # can run those locally or act as a client for our HTTP TTS server 23 | # 24 | 25 | import traceback 26 | import json 27 | import logging 28 | import requests 29 | import urllib 30 | 31 | from base64 import b64encode 32 | from nltools.pulseplayer import PulsePlayer 33 | from nltools.phonetics import ipa2mary, mary2ipa, ipa2xsampa, xsampa2ipa 34 | from espeakng import ESpeakNG 35 | from marytts import MaryTTS 36 | 37 | MARY_VOICES = { 38 | 39 | 'en_US': { 'male': [ "cmu-rms-hsmm", "dfki-spike", "dfki-obadiah", "dfki-obadiah-hsmm", "cmu-bdl-hsmm"], 40 | 'female': [ "cmu-slt-hsmm", "dfki-poppy", "dfki-poppy-hsmm", "dfki-prudence", "dfki-prudence-hsmm" ] 41 | }, 42 | 43 | 'de_DE': { 'male': ["bits3", "bits3-hsmm", "dfki-pavoque-neutral", "dfki-pavoque-neutral-hsmm", "dfki-pavoque-styles"], 44 | 'female': ["bits1-hsmm"] 45 | } 46 | } 47 | DEFAULT_MARY_VOICE = 'cmu-rms-hsmm' 48 | DEFAULT_MARY_LOCALE = 'en_US' 49 | 50 | ESPEAK_VOICES = ['english-us', 'de'] 51 | 52 | class TTS(object): 53 | 54 | def __init__(self, 55 | host_tts = 'local', 56 | port_tts = 8300, 57 | locale = 'en_US', 58 | engine = 'mary', 59 | voice = 'cmu-rms-hsmm', 60 | pitch = 50, # 0-99 61 | speed = 175): # approx. words per minute 62 | 63 | self._host_tts = host_tts 64 | self._port_tts = port_tts 65 | self._locale = locale 66 | self._engine = engine 67 | self._voice = voice 68 | self._pitch = pitch 69 | self._speed = speed 70 | 71 | if host_tts == 'local': 72 | self.player = PulsePlayer('Local TTS Client') 73 | self.espeak = ESpeakNG() 74 | self.marytts = MaryTTS() 75 | self.picotts = None # lazy-loading to reduce package dependencies 76 | 77 | @property 78 | def locale(self): 79 | return self._locale 80 | @locale.setter 81 | def locale(self, v): 82 | self._locale = v 83 | 84 | @property 85 | def engine(self): 86 | return self._engine 87 | @engine.setter 88 | def engine(self, v): 89 | self._engine = v 90 | 91 | @property 92 | def voice(self): 93 | return self._voice 94 | @voice.setter 95 | def voice(self, v): 96 | self._voice = v 97 | 98 | @property 99 | def pitch(self): 100 | return self._pitch 101 | @pitch.setter 102 | def pitch(self, v): 103 | self._pitch = v 104 | 105 | @property 106 | def speed(self): 107 | return self._speed 108 | @speed.setter 109 | def speed(self, v): 110 | self._speed = v 111 | 112 | def synthesize(self, txt, mode='txt'): 113 | 114 | if self._host_tts == 'local': 115 | 116 | # import pdb; pdb.set_trace() 117 | 118 | wav = None 119 | 120 | if self.engine == 'mary': 121 | 122 | self.marytts.voice = self._voice 123 | self.marytts.locale = self._locale 124 | 125 | if mode == 'txt': 126 | wav = self.marytts.synth_wav (txt) 127 | elif mode == 'ipa': 128 | xs = ipa2mary ('ipa', txt) 129 | wav = self.marytts.synth_wav (xs, fmt='xs') 130 | else: 131 | raise Exception ("unknown mary mode '%s'" % mode) 132 | 133 | elif self.engine == 'espeak': 134 | 135 | if mode == 'txt': 136 | 137 | self.espeak.voice = self._voice 138 | self.espeak.speed = self._speed 139 | self.espeak.pitch = self._pitch 140 | wav = self.espeak.synth_wav (txt) 141 | # logging.debug ('synthesize: %s %s -> %s' % (txt, mode, repr(wav))) 142 | 143 | elif mode == 'ipa': 144 | xs = ipa2xsampa ('ipa', txt) 145 | logging.debug ('synthesize: %s %s -> %s' % (txt, mode, repr(xs))) 146 | wav = self.espeak.synth_wav (xs, fmt='xs') 147 | 148 | elif self.engine == 'pico': 149 | 150 | if mode == 'txt': 151 | 152 | if not self.picotts: 153 | from picotts import PicoTTS 154 | self.picotts = PicoTTS() 155 | 156 | self.picotts.voice = self._voice 157 | wav = self.picotts.synth_wav (txt) 158 | # logging.debug ('synthesize: %s %s -> %s' % (txt, mode, repr(wav))) 159 | 160 | else: 161 | raise Exception ("unknown pico mode '%s'" % mode) 162 | else: 163 | 164 | raise Exception ("unknown engine '%s'" % self.engine) 165 | 166 | else: 167 | 168 | args = {'l': self._locale, 169 | 'v': self._voice, 170 | 'e': self._engine, 171 | 'm': mode, 172 | 't': txt.encode('utf8')} 173 | url = 'http://%s:%s/tts/synth?%s' % (self._host_tts, self._port_tts, urllib.urlencode(args)) 174 | 175 | response = requests.get(url) 176 | 177 | if response.status_code != 200: 178 | return None 179 | 180 | wav = response.content 181 | 182 | if wav: 183 | logging.debug ('synthesize: %s %s -> WAV' % (txt, mode)) 184 | else: 185 | logging.error ('synthesize: %s %s -> NO WAV' % (txt, mode)) 186 | 187 | return wav 188 | 189 | def play_wav (self, wav, async=False): 190 | 191 | if self._host_tts == 'local': 192 | 193 | if wav: 194 | self.player.play(wav, async) 195 | else: 196 | raise Exception ('no wav given') 197 | 198 | else: 199 | 200 | url = 'http://%s:%s/tts/play' % (self._host_tts, self._port_tts) 201 | 202 | if async: 203 | url += '?async=t' 204 | 205 | response = requests.post(url, data=wav) 206 | 207 | def say (self, utterance, async=False): 208 | 209 | wav = self.synthesize(utterance) 210 | self.play_wav(wav, async=async) 211 | 212 | def say_ipa (self, ipa, async=False): 213 | 214 | wav = self.synthesize(ipa, mode='ipa') 215 | self.play_wav(wav, async=async) 216 | 217 | def gen_ipa (self, word): 218 | 219 | if self._host_tts == 'local': 220 | 221 | if self.engine == 'mary': 222 | 223 | self.marytts.voice = self._voice 224 | self.marytts.locale = self._locale 225 | 226 | mp = self.marytts.g2p (word) 227 | return mary2ipa(word, mp) 228 | 229 | elif self.engine == 'espeak': 230 | 231 | self.espeak.voice = self._voice 232 | e_ipa = self.espeak.g2p (word, ipa='2') 233 | xs = ipa2xsampa(word, e_ipa) 234 | ipa = xsampa2ipa(word, xs) 235 | 236 | logging.debug (u'espeak g2p: %s -> %s -> %s -> %s' % (word, e_ipa, xs, ipa)) 237 | 238 | return ipa 239 | 240 | elif self.engine == 'sequitur': 241 | 242 | if not self.voice in SEQUITUR_MODELS: 243 | raise Exception ("no sequitur model for voice '%s'" % self.voice) 244 | 245 | return sequitur_gen_ipa (SEQUITUR_MODELS[self.voice], word) 246 | 247 | else: 248 | raise Exception ("unknown engine '%s'" % self.engine) 249 | 250 | 251 | else: 252 | args = {'l': self._locale, 253 | 'v': self._voice, 254 | 'e': self._engine, 255 | 't': word.encode('utf8')} 256 | url = 'http://%s:%s/tts/g2p?%s' % (self._host_tts, self._port_tts, urllib.urlencode(args)) 257 | 258 | response = requests.get(url) 259 | 260 | if response.status_code != 200: 261 | return None 262 | 263 | return response.json()['ipa'] 264 | 265 | -------------------------------------------------------------------------------- /nltools/vad.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | # 5 | # Copyright 2013, 2014, 2016, 2017, 2018 Guenter Bartsch 6 | # 7 | # Licensed under the Apache License, Version 2.0 (the "License"); 8 | # you may not use this file except in compliance with the License. 9 | # You may obtain a copy of the License at 10 | # 11 | # http://www.apache.org/licenses/LICENSE-2.0 12 | # 13 | # Unless required by applicable law or agreed to in writing, software 14 | # distributed under the License is distributed on an "AS IS" BASIS, 15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 | # See the License for the specific language governing permissions and 17 | # limitations under the License. 18 | # 19 | 20 | # 21 | # Voice Activity Detection (VAD) state machine 22 | # 23 | 24 | import logging 25 | import webrtcvad 26 | 27 | SAMPLE_RATE = 16000 28 | BUFFER_DURATION = 30 # ms 29 | RING_BUF_ENTRIES = 5 * 60 * 1000 / BUFFER_DURATION # 5 minutes max 30 | 31 | MIN_UTT_LENGTH = 0.4 # seconds 32 | MAX_UTT_LENGTH = 12 # seconds 33 | MAX_UTT_GAP = 0.7 # seconds 34 | 35 | STATE_IDLE = 0 36 | 37 | STATE_PRE_SPEECH = 1 38 | STATE_PRE_GAP = 2 39 | 40 | STATE_SPEECH = 3 41 | STATE_GAP = 4 42 | 43 | STATE_IGNORE = 5 44 | STATE_IGNORE_GAP = 6 45 | 46 | FRAME_STAT_CNT = 300 47 | LOW_VOLUME_THRESH = 100 48 | HIGH_VOLUME_THRESH = 25000 49 | 50 | class VAD(object): 51 | 52 | def __init__(self, aggressiveness=2, sample_rate=SAMPLE_RATE, 53 | min_utt_length = MIN_UTT_LENGTH, 54 | max_utt_length = MAX_UTT_LENGTH, 55 | max_utt_gap = MAX_UTT_GAP): 56 | 57 | 58 | self.sample_rate = sample_rate 59 | 60 | self.vad = webrtcvad.Vad() 61 | self.vad.set_mode(aggressiveness) 62 | 63 | self.state = STATE_IDLE 64 | self.buf = [] 65 | self.buf_sent = 0 66 | 67 | self.min_buf_entries = int(min_utt_length * 1000) / BUFFER_DURATION 68 | self.max_buf_entries = int(max_utt_length * 1000) / BUFFER_DURATION 69 | self.max_gap = int(max_utt_gap * 1000) / BUFFER_DURATION 70 | 71 | self.frame_cnt = 0 72 | self.avg_vol_sum = 0.0 73 | self.avg_vol_cnt = 0 74 | 75 | def _return_audio (self, finalize): 76 | 77 | res = [] 78 | 79 | buf_max = len(self.buf)-1 80 | 81 | while self.buf_sent <= buf_max: 82 | res.extend(self.buf[self.buf_sent].tolist()) 83 | self.buf_sent += 1 84 | 85 | return res, finalize 86 | 87 | def process_audio (self, audio): 88 | 89 | cur_frame = audio 90 | 91 | # give feedback if volume too low / too high 92 | if self.frame_cnt <= FRAME_STAT_CNT: 93 | 94 | for sample in audio: 95 | self.avg_vol_sum += abs(sample) 96 | self.avg_vol_cnt += 1 97 | 98 | self.frame_cnt += 1 99 | if self.frame_cnt == FRAME_STAT_CNT: 100 | 101 | # import pdb; pdb.set_trace() 102 | 103 | self.avg_vol_sum /= float(self.avg_vol_cnt) 104 | 105 | if self.avg_vol_sum < LOW_VOLUME_THRESH: 106 | logging.error ('VAD: audio volume too low or wrong source?') 107 | elif self.avg_vol_sum > HIGH_VOLUME_THRESH: 108 | logging.error ('VAD: audio volume too high or wrong source?') 109 | 110 | vad_res = self.vad.is_speech(audio.tobytes(), self.sample_rate) 111 | 112 | if self.state == STATE_IDLE: 113 | if vad_res: 114 | self.state = STATE_PRE_SPEECH 115 | self.buf = [ cur_frame ] 116 | self.buf_sent = 0 117 | 118 | elif self.state == STATE_PRE_SPEECH: 119 | self.buf.append(cur_frame) 120 | if vad_res: 121 | if len (self.buf) > self.min_buf_entries: 122 | logging.debug ("*** SPEECH DETECTED at frame %3d ***" % len(self.buf)) 123 | self.state = STATE_SPEECH 124 | 125 | else: 126 | self.state = STATE_PRE_GAP 127 | self.gap_start = len(self.buf) 128 | 129 | elif self.state == STATE_PRE_GAP: 130 | self.buf.append(cur_frame) 131 | 132 | if vad_res: 133 | self.state = STATE_PRE_SPEECH 134 | 135 | else: 136 | gap_len = len(self.buf) - self.gap_start 137 | if gap_len > self.max_gap: 138 | logging.debug ("*** PRE GAP (%d) TOO LONG at frame %3d ***" % (gap_len, len(self.buf))) 139 | self.state = STATE_IDLE 140 | 141 | elif self.state == STATE_SPEECH: 142 | self.buf.append(cur_frame) 143 | 144 | # check if attention span is over 145 | if len (self.buf) > self.max_buf_entries: 146 | logging.debug ("*** START OF IGNORE at frame %3d ***" % len(self.buf)) 147 | self.state = STATE_IGNORE 148 | return self._return_audio(True) 149 | 150 | else: 151 | if not vad_res: 152 | logging.debug ("*** START OF GAP at frame %3d ***" % len(self.buf)) 153 | self.state = STATE_GAP 154 | self.gap_start = len(self.buf) 155 | return self._return_audio(False) 156 | 157 | elif self.state == STATE_GAP: 158 | self.buf.append(cur_frame) 159 | 160 | gap_len = len(self.buf) - self.gap_start 161 | if vad_res: 162 | self.state = STATE_SPEECH 163 | logging.debug ("*** END OF GAP (%d < %d) at frame %3d ***" % (gap_len, self.max_gap, len(self.buf))) 164 | return self._return_audio(False) 165 | 166 | else: 167 | if gap_len > self.max_gap: 168 | logging.debug ("*** GAP (%d > %d) TOO LONG at frame %3d ***" % (gap_len, self.max_gap, len(self.buf))) 169 | self.state = STATE_IDLE 170 | return self._return_audio(True) 171 | else: 172 | return self._return_audio(False) 173 | 174 | elif self.state == STATE_IGNORE: 175 | self.buf.append(cur_frame) 176 | if not vad_res: 177 | self.state = STATE_IGNORE_GAP 178 | self.gap_start = len(self.buf) 179 | 180 | elif self.state == STATE_IGNORE_GAP: 181 | self.buf.append(cur_frame) 182 | if vad_res: 183 | self.state = STATE_IGNORE 184 | else: 185 | gap_len = len(self.buf) - self.gap_start 186 | if gap_len > self.max_gap: 187 | logging.debug ("*** end of ignore at frame %3d ***" % len(self.buf)) 188 | self.state = STATE_IDLE 189 | 190 | return None, False 191 | 192 | -------------------------------------------------------------------------------- /run_tests.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | nosetests 4 | 5 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup 2 | 3 | setup( 4 | name = 'py-nltools', 5 | version = '0.4.0', 6 | description = 'A collection of basic python modules for spoken natural language processing', 7 | long_description = open('README.adoc').read(), 8 | author = 'Guenter Bartsch', 9 | author_email = 'guenter@zamia.org', 10 | maintainer = 'Guenter Bartsch', 11 | maintainer_email = 'guenter@zamia.org', 12 | url = 'https://github.com/gooofy/py-nltools', 13 | packages = ['nltools'], 14 | install_requires = [ 15 | 'num2words', 'py-marytts', 'py-picotts', 'py-espeak-ng', 'pocketsphinx', 'py-kaldi-asr', 'numpy', 'webrtcvad', 'setproctitle' 16 | ], 17 | classifiers = [ 18 | 'Operating System :: POSIX :: Linux', 19 | 'License :: OSI Approved :: Apache Software License', 20 | 'Programming Language :: Python :: 2', 21 | 'Programming Language :: Python :: 2.7', 22 | 'Programming Language :: Python :: 3', 23 | 'Programming Language :: Python :: 3.5', 24 | 'Intended Audience :: Developers', 25 | 'Topic :: Multimedia :: Sound/Audio :: Speech', 26 | 'Topic :: Scientific/Engineering :: Artificial Intelligence' 27 | ], 28 | license = 'Apache', 29 | keywords = 'natural language processing tokenizer nlp tts asr speech synthesis recognition', 30 | ) 31 | 32 | -------------------------------------------------------------------------------- /tests/test_asr.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | # 5 | # Copyright 2014, 2016, 2017, 2018 Guenter Bartsch 6 | # 7 | # Licensed under the Apache License, Version 2.0 (the "License"); 8 | # you may not use this file except in compliance with the License. 9 | # You may obtain a copy of the License at 10 | # 11 | # http://www.apache.org/licenses/LICENSE-2.0 12 | # 13 | # Unless required by applicable law or agreed to in writing, software 14 | # distributed under the License is distributed on an "AS IS" BASIS, 15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 | # See the License for the specific language governing permissions and 17 | # limitations under the License. 18 | # 19 | 20 | import unittest 21 | import logging 22 | import wave 23 | import struct 24 | 25 | from nltools.asr import ASR, ASR_ENGINE_NNET3, ASR_ENGINE_POCKETSPHINX 26 | from nltools import misc 27 | 28 | TEST_WAVE_EN = 'tests/foo.wav' 29 | TEST_WAVE_EN_TS = 'ah indeed' 30 | TEST_WAVE_EN_TS_PS = 'aha in dayton' 31 | 32 | POCKETSPHINX_MODELDIR = 'models/cmusphinx-cont-generic-en-latest' 33 | POCKETSPHINX_MODELNAME = 'voxforge' 34 | 35 | class TestASR (unittest.TestCase): 36 | 37 | def test_asr_kaldi(self): 38 | 39 | asr = ASR(engine = ASR_ENGINE_NNET3) 40 | 41 | wavf = wave.open(TEST_WAVE_EN, 'rb') 42 | 43 | # check format 44 | self.assertEqual(wavf.getnchannels(), 1) 45 | self.assertEqual(wavf.getsampwidth(), 2) 46 | 47 | # process file in 250ms chunks 48 | 49 | chunk_frames = 250 * wavf.getframerate() / 1000 50 | tot_frames = wavf.getnframes() 51 | 52 | num_frames = 0 53 | while num_frames < tot_frames: 54 | 55 | finalize = False 56 | if (num_frames + chunk_frames) < tot_frames: 57 | nframes = chunk_frames 58 | else: 59 | nframes = tot_frames - num_frames 60 | finalize = True 61 | 62 | frames = wavf.readframes(nframes) 63 | num_frames += nframes 64 | samples = struct.unpack_from('<%dh' % nframes, frames) 65 | 66 | s, l = asr.decode(samples, finalize, wavf.getframerate()) 67 | 68 | wavf.close() 69 | 70 | self.assertEqual(s.strip(), TEST_WAVE_EN_TS) 71 | 72 | def test_asr_kaldi_wavefile(self): 73 | asr = ASR(engine = ASR_ENGINE_NNET3) 74 | s, l = asr.decode_wav_file(TEST_WAVE_EN) 75 | self.assertEqual(s.strip(), TEST_WAVE_EN_TS) 76 | 77 | def test_asr_pocketsphinx(self): 78 | 79 | asr = ASR(engine = ASR_ENGINE_POCKETSPHINX, model_dir = POCKETSPHINX_MODELDIR, model_name = POCKETSPHINX_MODELNAME) 80 | 81 | wavf = wave.open(TEST_WAVE_EN, 'rb') 82 | 83 | # check format 84 | self.assertEqual(wavf.getnchannels(), 1) 85 | self.assertEqual(wavf.getsampwidth(), 2) 86 | 87 | # process file in 250ms chunks 88 | 89 | chunk_frames = 250 * wavf.getframerate() / 1000 90 | tot_frames = wavf.getnframes() 91 | 92 | num_frames = 0 93 | while num_frames < tot_frames: 94 | 95 | finalize = False 96 | if (num_frames + chunk_frames) < tot_frames: 97 | nframes = chunk_frames 98 | else: 99 | nframes = tot_frames - num_frames 100 | finalize = True 101 | 102 | frames = wavf.readframes(nframes) 103 | num_frames += nframes 104 | samples = struct.unpack_from('<%dh' % nframes, frames) 105 | 106 | s, l = asr.decode(samples, finalize, wavf.getframerate()) 107 | 108 | if not finalize: 109 | self.assertEqual(s, None) 110 | 111 | wavf.close() 112 | 113 | self.assertEqual(s.strip(), TEST_WAVE_EN_TS_PS) 114 | 115 | def test_asr_pocketsphinx_wavefile(self): 116 | asr = ASR(engine = ASR_ENGINE_POCKETSPHINX, model_dir = POCKETSPHINX_MODELDIR, model_name = POCKETSPHINX_MODELNAME) 117 | s, l = asr.decode_wav_file(TEST_WAVE_EN) 118 | self.assertEqual(s.strip(), TEST_WAVE_EN_TS_PS) 119 | 120 | 121 | if __name__ == "__main__": 122 | 123 | # logging.basicConfig(level=logging.ERROR) 124 | logging.basicConfig(level=logging.DEBUG) 125 | 126 | unittest.main() 127 | 128 | -------------------------------------------------------------------------------- /tests/test_macro_engine.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | # 5 | # Copyright 2018 Guenter Bartsch 6 | # 7 | # Licensed under the Apache License, Version 2.0 (the "License"); 8 | # you may not use this file except in compliance with the License. 9 | # You may obtain a copy of the License at 10 | # 11 | # http://www.apache.org/licenses/LICENSE-2.0 12 | # 13 | # Unless required by applicable law or agreed to in writing, software 14 | # distributed under the License is distributed on an "AS IS" BASIS, 15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 | # See the License for the specific language governing permissions and 17 | # limitations under the License. 18 | # 19 | 20 | import unittest 21 | import logging 22 | 23 | from nltools.macro_engine import MacroEngine 24 | from nltools import misc 25 | 26 | class TestME (unittest.TestCase): 27 | 28 | def test_implicit_me(self): 29 | 30 | me = MacroEngine() 31 | 32 | expansions = me.expand_macros('en', "(a|b|c) (c|d|e) foo") 33 | 34 | logging.debug(repr(expansions)) 35 | 36 | self.assertEqual(len(expansions), 9) 37 | self.assertEqual(u" ".join(expansions[0][0]), u"c e foo") 38 | 39 | def test_explicit_me(self): 40 | 41 | me = MacroEngine() 42 | 43 | me.add_macro_expansion("prefix", u"") 44 | me.add_macro_expansion("prefix", u"please") 45 | me.add_macro_expansion("prefix", u"computer") 46 | me.add_macro_expansion("location", u"living room") 47 | me.add_macro_expansion("location", u"bedroom") 48 | me.add_macro_expansion("location", u"kitchen") 49 | 50 | 51 | expansions = me.expand_macros('en', "{prefix:W} switch (on|off) the light in the {location:W}") 52 | 53 | logging.debug(repr(expansions)) 54 | 55 | self.assertEqual(len(expansions), 18) 56 | self.assertEqual(u" ".join(expansions[0][0]), u"computer switch off the light in the kitchen") 57 | 58 | if __name__ == "__main__": 59 | 60 | # logging.basicConfig(level=logging.ERROR) 61 | logging.basicConfig(level=logging.DEBUG) 62 | 63 | unittest.main() 64 | 65 | -------------------------------------------------------------------------------- /tests/test_misc.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | # 5 | # Copyright 2013, 2014, 2016, 2017, 2018 Guenter Bartsch 6 | # 7 | # Licensed under the Apache License, Version 2.0 (the "License"); 8 | # you may not use this file except in compliance with the License. 9 | # You may obtain a copy of the License at 10 | # 11 | # http://www.apache.org/licenses/LICENSE-2.0 12 | # 13 | # Unless required by applicable law or agreed to in writing, software 14 | # distributed under the License is distributed on an "AS IS" BASIS, 15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 | # See the License for the specific language governing permissions and 17 | # limitations under the License. 18 | # 19 | 20 | import shutil 21 | import tempfile 22 | import os.path 23 | import unittest 24 | import logging 25 | 26 | from nltools import misc 27 | from nltools.tokenizer import tokenize 28 | 29 | class TestMisc (unittest.TestCase): 30 | 31 | def setUp(self): 32 | self.test_dir = tempfile.mkdtemp() 33 | 34 | def tearDown(self): 35 | shutil.rmtree(self.test_dir) 36 | 37 | def test_load_config(self): 38 | 39 | cfg = misc.load_config('.speechrc') 40 | 41 | host = cfg.get('tts', 'host') 42 | 43 | self.assertEqual (host, 'local') 44 | 45 | 46 | def test_compress_ws(self): 47 | 48 | self.assertEqual (misc.compress_ws(u' abc cde 12 '), u' abc cde 12') 49 | 50 | def test_run_command(self): 51 | 52 | txt = '' 53 | for line in misc.run_command(['uname', '-a']): 54 | txt += line.strip() 55 | 56 | self.assertEqual('Linux' in txt, True) 57 | 58 | def test_tex(self): 59 | 60 | self.assertEqual(misc.tex_decode('"uber'), u'\xfcber') 61 | self.assertEqual(misc.tex_decode('da"s'), u'daß') 62 | 63 | self.assertEqual(misc.tex_encode(u'über'), '"uber') 64 | self.assertEqual(misc.tex_encode(u'daß'), 'da"s') 65 | 66 | def test_edit_distance(self): 67 | 68 | self.assertEqual (misc.edit_distance('hubba', 'hubba'), 0) 69 | self.assertEqual (misc.edit_distance('hubba', 'hubb'), 1) 70 | self.assertEqual (misc.edit_distance('hubba', 'hub'), 2) 71 | self.assertEqual (misc.edit_distance('hubba', 'bba'), 2) 72 | 73 | self.assertEqual (misc.edit_distance( 74 | tokenize(u'die leistung wurde zurückverlangt'), 75 | tokenize(u'die leistung wurde zurückverlangt')), 0) 76 | self.assertEqual (misc.edit_distance( 77 | tokenize(u'die leistung wurde'), 78 | tokenize(u'die leistung wurde zurückverlangt')), 1) 79 | self.assertEqual (misc.edit_distance( 80 | tokenize(u'DIE LEISTUNG'), 81 | tokenize(u'DIE LEISTUNG WURDE ZURÜCKVERLANGT')), 2) 82 | self.assertEqual (misc.edit_distance( 83 | tokenize(u'DIE'), 84 | tokenize(u'DIE LEISTUNG WURDE ZURÜCKVERLANGT')), 3) 85 | self.assertEqual (misc.edit_distance( 86 | tokenize(u'DIE ZURÜCKVERLANGT'), 87 | tokenize(u'DIE LEISTUNG WURDE ZURÜCKVERLANGT')), 2) 88 | self.assertEqual (misc.edit_distance( 89 | tokenize(u'DIE LEISTUNG WURDE ZURÜCKVERLANGT'), 90 | tokenize(u'LEISTUNG WURDE ZURÜCKVERLANGT')), 1) 91 | self.assertEqual (misc.edit_distance( 92 | tokenize(u'DIE LEISTUNG WURDE ZURÜCKVERLANGT'), 93 | tokenize(u'WURDE ZURÜCKVERLANGT')), 2) 94 | self.assertEqual (misc.edit_distance( 95 | tokenize(u'DIE LEISTUNG WURDE ZURÜCKVERLANGT'), 96 | tokenize(u'ZURÜCKVERLANGT')), 3) 97 | self.assertEqual (misc.edit_distance( 98 | tokenize(u'DIE LEISTUNG WURDE ZURÜCKVERLANGT'), 99 | tokenize(u'')), 4) 100 | self.assertEqual (misc.edit_distance( 101 | tokenize(u'DIE LEISTUNG WURDE ZURÜCKVERLANGT'), 102 | tokenize(u'LEISTUNG FOO ZURÜCKVERLANGT')), 2) 103 | self.assertEqual (misc.edit_distance( 104 | tokenize(u'SIE IST FÜR DIE LEISTUNG DANKBAR'), 105 | tokenize(u'SIE STRITTIG LEISTUNG DANKBAR')), 3) 106 | 107 | def test_limit_str(self): 108 | 109 | self.assertEqual(misc.limit_str('1234567890', 10), '1234567890') 110 | self.assertEqual(misc.limit_str('1234567890', 9), '123456...') 111 | 112 | def test_render_template(self): 113 | # given 114 | template_text = """VAR1={{val1}} 115 | VAR2={{val2}} 116 | """ 117 | 118 | val1 = "v1" 119 | val2 = "v2" 120 | 121 | expected_text = """VAR1=%s 122 | VAR2=%s 123 | """ % (val1, val2) 124 | 125 | src_path = os.path.join(str(self.test_dir), "src.txt") 126 | dst_path = os.path.join(str(self.test_dir), "dst.txt") 127 | 128 | with open(src_path, "wt") as f: 129 | f.write(template_text) 130 | 131 | # when 132 | misc.render_template(src_path, dst_path, val1=val1, val2=val2) 133 | 134 | # then 135 | with open(dst_path) as f: 136 | actual_text = f.read() 137 | 138 | self.assertEqual(expected_text, actual_text) 139 | 140 | 141 | if __name__ == "__main__": 142 | 143 | logging.basicConfig(level=logging.ERROR) 144 | 145 | unittest.main() 146 | 147 | -------------------------------------------------------------------------------- /tests/test_phonetics.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | # 5 | # Copyright 2013, 2014, 2016, 2017 Guenter Bartsch 6 | # 7 | # Licensed under the Apache License, Version 2.0 (the "License"); 8 | # you may not use this file except in compliance with the License. 9 | # You may obtain a copy of the License at 10 | # 11 | # http://www.apache.org/licenses/LICENSE-2.0 12 | # 13 | # Unless required by applicable law or agreed to in writing, software 14 | # distributed under the License is distributed on an "AS IS" BASIS, 15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 | # See the License for the specific language governing permissions and 17 | # limitations under the License. 18 | # 19 | 20 | import logging 21 | import unittest 22 | from nltools.phonetics import ipa2xsampa, ipa2mary, xsampa2xarpabet, xs2xa_table, xsampa2ipa 23 | 24 | class TestPhoneticAlphabets (unittest.TestCase): 25 | 26 | def setUp(self): 27 | self.seq = range(10) 28 | 29 | def test_ipa(self): 30 | 31 | res = ipa2xsampa ("EISENBAHN", u"ˈaɪ̯zən̩ˌbaːn") 32 | #print "res: %s" % res 33 | self.assertEqual (res, "'aIz@nba:n") 34 | 35 | res = ipa2xsampa ("DIPHTONGTEST", u"aɪɔɪaʊɜ'") 36 | #print "res: %s" % res 37 | self.assertEqual (res, "aIOIaU3") 38 | 39 | res = ipa2xsampa ("BON", u"bɔ̃") 40 | #print "res: %s" % res 41 | self.assertEqual (res, "bO~") 42 | 43 | res = ipa2xsampa ("RESTAURANT", u"ʁɛstɔʁɑ̃") 44 | #print "res: %s" % res 45 | self.assertEqual (res, "REstORA~") 46 | 47 | res = ipa2xsampa ("VIN", u"vɛ̃") 48 | #print "res: %s" % res 49 | self.assertEqual (res, "vE~") 50 | 51 | res = ipa2xsampa ("BRUN", u"bʁœ̃") 52 | #print "res: %s" % res 53 | self.assertEqual (res, "bR9~") 54 | 55 | res = ipa2xsampa ("POIGNANT", u"pwaɲɑ̃") 56 | #print "res: %s" % res 57 | self.assertEqual (res, "pwaJA~") 58 | 59 | res = ipa2mary ("EISENBAHN", u"ˈaɪ̯zən̩ˌbaːn") 60 | #print "res: %s" % res 61 | self.assertEqual (res, "'aIz@nba:n") 62 | 63 | res = ipa2mary ("DIPHTONGTEST", u"aɪɔɪaʊɜ'") 64 | #print "res: %s" % res 65 | self.assertEqual (res, "aIOIaUr='") 66 | 67 | res = ipa2mary ("BON", u"bɔ̃") 68 | #print "res: %s" % res 69 | self.assertEqual (res, "bO~") 70 | 71 | res = ipa2mary ("RESTAURANT", u"ʁɛstɔʁɑ̃") 72 | #print "res: %s" % res 73 | self.assertEqual (res, "REstORA~") 74 | 75 | res = ipa2mary ("VIN", u"vɛ̃") 76 | #print "res: %s" % res 77 | self.assertEqual (res, "vE~") 78 | 79 | res = ipa2mary ("BRUN", u"bʁœ̃") 80 | #print "res: %s" % res 81 | self.assertEqual (res, "bR9~") 82 | 83 | res = ipa2mary ("POIGNANT", u"pwaɲɑ̃") 84 | #print "res: %s" % res 85 | self.assertEqual (res, "pwaJA~") 86 | 87 | res = xsampa2ipa(u"entrée A~ t R e", u"A~ t R e") 88 | #print "res: %s" % res 89 | self.assertEqual (res, u"ɑ̃tʁe") 90 | 91 | def test_xarpa(self): 92 | 93 | res = xsampa2xarpabet ("JAHRHUNDERTE", "ja:6-'hUn-d6-t@") 94 | #print "res: %s" % res 95 | self.assertEqual (res, "Y AAH EX HH UU N D EX T AX") 96 | 97 | res = xsampa2xarpabet ("ABGESCHRIEBEN", "'ap-g@-SRi:-b@n") 98 | #print "res: %s" % res 99 | self.assertEqual (res, "AH P G AX SH RR IIH B AX N") 100 | 101 | res = xsampa2xarpabet ("ZUGEGRIFFEN", "'tsu:-g@-gRI-f@n") 102 | #print "res: %s" % res 103 | self.assertEqual (res, "TS UUH G AX G RR IH F AX N") 104 | 105 | res = xsampa2xarpabet ("AUSLEGUNG", "'aU-sle:-gUN") 106 | #print "res: %s" % res 107 | self.assertEqual (res, "AW S L EEH G UU NG") 108 | 109 | res = xsampa2xarpabet ("BON", "bO~") 110 | #print "res: %s" % res 111 | self.assertEqual (res, "B ON") 112 | 113 | res = xsampa2xarpabet ("RESTAURANT", "REstORA~") 114 | #print "res: %s" % res 115 | self.assertEqual (res, "RR EH S T OO RR AN") 116 | 117 | res = xsampa2xarpabet ("VIN", u"vE~") 118 | #print "res: %s" % res 119 | self.assertEqual (res, "V EN") 120 | 121 | res = xsampa2xarpabet ("BRUN", u"bR9~") 122 | #print "res: %s" % res 123 | self.assertEqual (res, "B RR OEN") 124 | 125 | res = xsampa2xarpabet ("POIGNANT", u"pwaJA~") 126 | #print "res: %s" % res 127 | self.assertEqual (res, "P W AH NJ AN") 128 | 129 | def test_xarpa_unique(self): 130 | 131 | # all xarpa transcriptions have to be unique 132 | 133 | uniq_xs = set() 134 | uniq_xa = set() 135 | 136 | for entry in xs2xa_table: 137 | xs = entry[0] 138 | xa = entry[1] 139 | #print (u"xs: %s, xa: %s" % (xs, xa)).encode('utf8') 140 | self.assertFalse (xa in uniq_xa) 141 | uniq_xa.add(xa) 142 | self.assertFalse (xs in uniq_xs) 143 | uniq_xs.add(xs) 144 | 145 | if __name__ == "__main__": 146 | 147 | logging.basicConfig(level=logging.ERROR) 148 | 149 | unittest.main() 150 | 151 | -------------------------------------------------------------------------------- /tests/test_pulseplayer.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | # 5 | # Copyright 2013, 2014, 2016, 2017 Guenter Bartsch 6 | # 7 | # Licensed under the Apache License, Version 2.0 (the "License"); 8 | # you may not use this file except in compliance with the License. 9 | # You may obtain a copy of the License at 10 | # 11 | # http://www.apache.org/licenses/LICENSE-2.0 12 | # 13 | # Unless required by applicable law or agreed to in writing, software 14 | # distributed under the License is distributed on an "AS IS" BASIS, 15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 | # See the License for the specific language governing permissions and 17 | # limitations under the License. 18 | # 19 | 20 | import unittest 21 | import logging 22 | 23 | from nltools.pulseplayer import PulsePlayer 24 | 25 | class TestPulsePlayer (unittest.TestCase): 26 | 27 | def test_playback(self): 28 | 29 | player = PulsePlayer('nltools unittest') 30 | 31 | with open('foo.wav', 'rb') as wavf: 32 | wav = wavf.read() 33 | 34 | player.play(wav) 35 | 36 | if __name__ == "__main__": 37 | 38 | logging.basicConfig(level=logging.ERROR) 39 | 40 | unittest.main() 41 | 42 | -------------------------------------------------------------------------------- /tests/test_pulserecorder.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | # 5 | # Copyright 2013, 2014, 2016, 2017 Guenter Bartsch 6 | # 7 | # Licensed under the Apache License, Version 2.0 (the "License"); 8 | # you may not use this file except in compliance with the License. 9 | # You may obtain a copy of the License at 10 | # 11 | # http://www.apache.org/licenses/LICENSE-2.0 12 | # 13 | # Unless required by applicable law or agreed to in writing, software 14 | # distributed under the License is distributed on an "AS IS" BASIS, 15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 | # See the License for the specific language governing permissions and 17 | # limitations under the License. 18 | # 19 | 20 | import unittest 21 | import logging 22 | import time 23 | 24 | from nltools.pulserecorder import PulseRecorder 25 | 26 | SOURCE = 'Monitor' 27 | SAMPLERATE = 16000 28 | VOLUME = 120 29 | 30 | class TestPulseRecorder (unittest.TestCase): 31 | 32 | def test_rec(self): 33 | 34 | recorder = PulseRecorder(source_name=SOURCE, rate=SAMPLERATE, volume=VOLUME) 35 | recorder.start_recording(1000) 36 | time.sleep(1) 37 | recorder.stop_recording() 38 | 39 | samples = recorder.get_samples() 40 | 41 | logging.debug(repr(samples)) 42 | 43 | self.assertGreater (len(samples), 900) 44 | 45 | 46 | if __name__ == "__main__": 47 | 48 | logging.basicConfig(level=logging.DEBUG) 49 | 50 | unittest.main() 51 | 52 | -------------------------------------------------------------------------------- /tests/test_sequitur.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | # 5 | # Copyright 2014, 2016, 2017, 2018 Guenter Bartsch 6 | # 7 | # Licensed under the Apache License, Version 2.0 (the "License"); 8 | # you may not use this file except in compliance with the License. 9 | # You may obtain a copy of the License at 10 | # 11 | # http://www.apache.org/licenses/LICENSE-2.0 12 | # 13 | # Unless required by applicable law or agreed to in writing, software 14 | # distributed under the License is distributed on an "AS IS" BASIS, 15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 | # See the License for the specific language governing permissions and 17 | # limitations under the License. 18 | # 19 | 20 | import unittest 21 | import logging 22 | 23 | from nltools.sequiturclient import sequitur_gen_ipa 24 | 25 | MODELFN = 'models/sequitur-dict-de.ipa-latest' 26 | 27 | G2P_TESTS = [ 28 | (u'gelbseidenen', u"'g\u025blb-za\u026a-d\u0259-n\u0259n" ), 29 | (u'unmute', u"'\u0294\u028an-mu\u02d0-t\u0259" ), 30 | (u'übereilt', u"\u0294y\u02d0-b\u0250-'\u0294a\u026alt" ), 31 | ] 32 | 33 | class TestSequitur (unittest.TestCase): 34 | 35 | def test_g2p(self): 36 | 37 | for word, ipa in G2P_TESTS: 38 | 39 | sq_ipa = sequitur_gen_ipa (MODELFN, word) 40 | 41 | self.assertEqual (sq_ipa, ipa) 42 | 43 | 44 | if __name__ == "__main__": 45 | 46 | logging.basicConfig(level=logging.ERROR) 47 | # logging.basicConfig(level=logging.DEBUG) 48 | 49 | unittest.main() 50 | 51 | -------------------------------------------------------------------------------- /tests/test_tokenizer.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | # 5 | # Copyright 2017, 2018 Guenter Bartsch 6 | # 7 | # Licensed under the Apache License, Version 2.0 (the "License"); 8 | # you may not use this file except in compliance with the License. 9 | # You may obtain a copy of the License at 10 | # 11 | # http://www.apache.org/licenses/LICENSE-2.0 12 | # 13 | # Unless required by applicable law or agreed to in writing, software 14 | # distributed under the License is distributed on an "AS IS" BASIS, 15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 | # See the License for the specific language governing permissions and 17 | # limitations under the License. 18 | # 19 | 20 | import unittest 21 | import logging 22 | 23 | from nltools.tokenizer import * 24 | 25 | class TestTokenizer (unittest.TestCase): 26 | 27 | def setUp(self): 28 | self.seq = range(10) 29 | 30 | # FIXME 31 | # def test_latin1(self): 32 | # self.assertTrue (detect_latin1('/home/ai/voxforge/de/audio/ralfherzog-20071220-de34/etc/prompts-original')) 33 | # self.assertFalse (detect_latin1('/home/ai/voxforge/de/audio/mjw-20110527-dyg/etc/prompts-original')) 34 | 35 | def test_tokenize_special(self): 36 | 37 | self.assertEqual (tokenize(u"„kamel“"), [u'kamel']) 38 | self.assertEqual (tokenize(u"$test"), [u'dollar', u'test']) 39 | 40 | def test_tokenize_wrt(self): 41 | 42 | self.assertEqual (tokenize(u"foo circa bar"), [u'foo', u'circa', u'bar']) 43 | self.assertEqual (tokenize(u"foo ok bar"), [u'foo', u'okay', u'bar']) 44 | self.assertEqual (tokenize(u"fook ok baokr"), [u'fook', u'okay', u'baokr']) 45 | self.assertEqual (tokenize(u"o.k.bar"), [u'okay', u'bar']) 46 | self.assertEqual (tokenize(u"foo o. k.bar"), [u'foo', u'okay', u'bar']) 47 | 48 | def test_tokenize_punctuation(self): 49 | 50 | self.assertEqual (tokenize(u"abc, def. zzz! ( abc