├── .gitignore
├── LICENSE.txt
├── README-en.md
├── README.md
├── bin
    └── _temp.txt
├── conv
    ├── asr
    │   └── audio-to-text files.txt
    ├── rec
    │   ├── recorded wav files.txt
    │   ├── winxp_shutdown.wav
    │   └── winxp_startup.wav
    └── tts
    │   └── text-to-speech files.txt
├── env
    ├── do_pack_by_pyinstaller.bat
    ├── do_setup_by_pip.bat
    └── pyinstaller_pack_fw.spec
├── gui
    └── pzh-speech.fbp
├── img
    ├── ico logo source.txt
    ├── pzh-speech.ico
    └── pzh-speech.png
└── src
    ├── main.py
    └── win.py


/.gitignore:
--------------------------------------------------------------------------------
 1 | #
 2 | # NOTE! Don't add files that are generated in specific
 3 | # subdirectories here. Add them in the ".gitignore" file
 4 | # in that subdirectory instead.
 5 | #
 6 | # NOTE! Please use 'git-ls-files -i --exclude-standard'
 7 | # command after changing this file, to see if there are
 8 | # any tracked files which get ignored after the change.
 9 | 
10 | # Python bytecode
11 | *.pyc
12 | __pycache__
13 | 
14 | /.idea
15 | 
16 | 
17 | 
18 | 
19 | 


--------------------------------------------------------------------------------
/LICENSE.txt:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2017-2018, Jay Heng
 2 | All rights reserved.
 3 | 
 4 | Redistribution and use in source and binary forms, with or without
 5 | modification, are permitted provided that the following conditions are met:
 6 | 
 7 | * Redistributions of source code must retain the above copyright notice, this
 8 |   list of conditions and the following disclaimer.
 9 | 
10 | * Redistributions in binary form must reproduce the above copyright notice,
11 |   this list of conditions and the following disclaimer in the documentation
12 |   and/or other materials provided with the distribution.
13 | 
14 | * Neither the name of elapsedtimer nor the names of its
15 |   contributors may be used to endorse or promote products derived from
16 |   this software without specific prior written permission.
17 | 
18 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
22 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


--------------------------------------------------------------------------------
/README-en.md:
--------------------------------------------------------------------------------
 1 | # pzh-speech
 2 | A tiny audio speech (.wav) utility tool (GUI) based on Python2.7+wxPython4.0+PyAudio+Matplotlib+SpeechRecognition(PocketSphinx)+pyttsx3(eSpeak) | 一款支持多引擎的wav格式语音处理小工具（音频录播与波形显示，语音识别，文语合成） 
 3 | 
 4 | [![GitHub release](https://img.shields.io/github/release/JayHeng/pzh-py-speech.svg)](https://github.com/JayHeng/pzh-py-speech/releases/latest) [![GitHub commits](https://img.shields.io/github/commits-since/JayHeng/pzh-py-speech/v1.0.0.svg)](https://github.com/JayHeng/pzh-py-speech/compare/v1.0.0...master) [![GitHub license](https://img.shields.io/github/license/JayHeng/pzh-py-speech.svg)](https://github.com/JayHeng/pzh-py-speech/blob/master/LICENSE.txt)
 5 | 
 6 | English | [中文](./README.md)
 7 | 
 8 | <img src="http://henjay724.com/image/cnblogs/JaysPySPEECH_overview_github.PNG" style="zoom:100%" />
 9 | 
10 | ### How to build :
11 | ********************
12 | 　　First of all, you should install all Non-Python packages listed in [《痞子衡语音处理助手-开发环境搭建》](https://www.cnblogs.com/henjay724/p/9542690.html), then follow below steps:
13 | ```text
14 |   1. Install Python2.7.15 x86 version
15 |   2. Confirm that the directory "\Python27\" and "\Python27\Scripts\" are in the system environment variable path after the installation is completed
16 |   3. Double click "\pzh-py-speech\env\do_setup_by_pip.bat" to install the Python library on which pzh-speech depends
17 |   4. Double click "\pzh-py-speech\env\do_pack_by_pyinstaller.bat" to regenerate the pzh-speech.exe
18 |   5. Open "\pzh-py-speech\bin\pzh-speech.exe" to use it
19 | ```
20 | 
21 | ### Tool Features :
22 | ********************
23 | * View the waveform of selected .wav file
24 | * Record sound from microphone to .wav file (\conv\rec)
25 | * Play selected .wav file
26 | * ASR: Recognize selected .wav file to text (\conv\asr)
27 | * TTS: Translate input text to speech
28 | * TTW: Translate input text to .wav file (\conv\tts)
29 | * Both English and Chinese are supported in ASR,TTS,TTW
30 | * Design detail: [《痞子衡语音处理助手诞生记(全七篇)》](https://www.cnblogs.com/henjay724/p/9541867.html)
31 | 
32 | Known issues:
33 | * The audio data are all 0x00s if BitDepth is set as 8bits when recording sound (it seems to be PyAudio issue)
34 | * Sometimes Application will hang up if language is set as Chinese when using TTS (it may be MSSpeech_TTS_xxx language package installation issue)
35 | 
36 | ### License :
37 | ********************
38 | 　　This package is licensed under the BSD three-clause license. See the LICENSE.txt file for details.
39 | 
40 | Copyright © 2017-2018 Jay Heng.


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # 痞子衡语音处理助手
 2 | A tiny audio speech (.wav) utility tool (GUI) based on Python2.7+wxPython4.0+PyAudio+Matplotlib+SpeechRecognition(PocketSphinx)+pyttsx3(eSpeak) | 一款支持多引擎的wav格式语音处理小工具（音频录播与波形显示，语音识别，文语合成） 
 3 | 
 4 | [![GitHub release](https://img.shields.io/github/release/JayHeng/pzh-py-speech.svg)](https://github.com/JayHeng/pzh-py-speech/releases/latest) [![GitHub commits](https://img.shields.io/github/commits-since/JayHeng/pzh-py-speech/v1.0.0.svg)](https://github.com/JayHeng/pzh-py-speech/compare/v1.0.0...master) [![GitHub license](https://img.shields.io/github/license/JayHeng/pzh-py-speech.svg)](https://github.com/JayHeng/pzh-py-speech/blob/master/LICENSE.txt)
 5 | 
 6 | [English](./README-en.md) | 中文
 7 | 
 8 | <img src="http://henjay724.com/image/cnblogs/JaysPySPEECH_overview_github.PNG" style="zoom:100%" />
 9 | 
10 | ### 1. 二次开发与重编 :
11 | ********************
12 | 　　参考这篇文章 [《痞子衡语音处理助手-开发环境搭建》](https://www.cnblogs.com/henjay724/p/9542690.html) 安装所有非Python相关的开发工具, 然后按照如下步骤继续安装Python环境:  
13 | ```text
14 |   1. 安装Python2.7.15 x86 version  
15 |   2. 确认系统路径包含"\Python27\" 和 "\Python27\Scripts\"  
16 |   3. 双击"\pzh-py-speech\env\do_setup_by_pip.bat"脚本安装所有依赖的第三方Python库  
17 |   4. 双击"\pzh-py-speech\env\do_pack_by_pyinstaller.bat"脚本重新生成pzh-speech.exe  
18 |   5. 双击"\pzh-py-speech\bin\pzh-speech.exe"运行  
19 | ```
20 | 
21 | ### 2. 软件功能 :
22 | ********************
23 | * 支持查看所选的.wav文件波形图  
24 | * 支持从麦克风录制声音进.wav文件 (\conv\rec)  
25 | * 支持播放所选的.wav文件  
26 | * ASR: 支持识别.wav文件里的内容并保存到文本文件 (\conv\asr)  
27 | * TTS: 支持将输入的文本内容转换成语音播放  
28 | * TTW: 支持将输入的文本内容转换成.wav文件 (\conv\tts)  
29 | * 支持两种语言（中英）的上述ASR,TTS,TTW处理  
30 | * 软件设计细节详见: [《痞子衡语音处理助手诞生记(全七篇)》](https://www.cnblogs.com/henjay724/p/9541867.html)
31 | 
32 | 已知问题:  
33 | * 在录制声音时，如果BitDepth设为8bits，录制的音频数据全是0x00 (应该是PyAudio库的问题)  
34 | * 在处理TTS时，如果语言设置为中文，有时候软件会停止执行 (可能是MSSpeech_TTS_xxx language包的安装问题)  
35 | 
36 | ### 3. License :
37 | ********************
38 | 　　软件采用BSD three-clause license， 更多许可证细节详见LICENSE.txt。
39 | 
40 | Copyright © 2017-2018 Jay Heng.


--------------------------------------------------------------------------------
/bin/_temp.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JayHeng/pzh-py-speech/5eefe3f6720e12af4e3bcdc3144063a3e6c1331d/bin/_temp.txt


--------------------------------------------------------------------------------
/conv/asr/audio-to-text files.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JayHeng/pzh-py-speech/5eefe3f6720e12af4e3bcdc3144063a3e6c1331d/conv/asr/audio-to-text files.txt


--------------------------------------------------------------------------------
/conv/rec/recorded wav files.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JayHeng/pzh-py-speech/5eefe3f6720e12af4e3bcdc3144063a3e6c1331d/conv/rec/recorded wav files.txt


--------------------------------------------------------------------------------
/conv/rec/winxp_shutdown.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JayHeng/pzh-py-speech/5eefe3f6720e12af4e3bcdc3144063a3e6c1331d/conv/rec/winxp_shutdown.wav


--------------------------------------------------------------------------------
/conv/rec/winxp_startup.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JayHeng/pzh-py-speech/5eefe3f6720e12af4e3bcdc3144063a3e6c1331d/conv/rec/winxp_startup.wav


--------------------------------------------------------------------------------
/conv/tts/text-to-speech files.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JayHeng/pzh-py-speech/5eefe3f6720e12af4e3bcdc3144063a3e6c1331d/conv/tts/text-to-speech files.txt


--------------------------------------------------------------------------------
/env/do_pack_by_pyinstaller.bat:
--------------------------------------------------------------------------------
1 | pyinstaller.exe pyinstaller_pack_fw.spec
2 | copy .\dist\pzh-speech.exe ..\bin
3 | rd /q /s .\build
4 | rd /q /s .\dist


--------------------------------------------------------------------------------
/env/do_setup_by_pip.bat:
--------------------------------------------------------------------------------
1 | pip.exe install wxPython==4.0.3
2 | pip.exe install PyAudio==0.2.11
3 | pip.exe install matplotlib==2.2.3
4 | pip.exe install numpy==1.15.0
5 | pip.exe install SpeechRecognition==3.8.1
6 | pip.exe install pocketsphinx==0.1.15
7 | pip.exe install pyttsx3==2.7
8 | 
9 | pip.exe install PyInstaller


--------------------------------------------------------------------------------
/env/pyinstaller_pack_fw.spec:
--------------------------------------------------------------------------------
 1 | # -*- mode: python -*-
 2 | 
 3 | block_cipher = None
 4 | 
 5 | 
 6 | a = Analysis(['..\\src\\main.py',
 7 |               '..\\src\\win.py'],
 8 |              binaries=[],
 9 |              datas=[],
10 |              hiddenimports=[],
11 |              hookspath=[],
12 |              runtime_hooks=[],
13 |              excludes=[],
14 |              win_no_prefer_redirects=False,
15 |              win_private_assemblies=False,
16 |              cipher=block_cipher)
17 | pyz = PYZ(a.pure, a.zipped_data,
18 |              cipher=block_cipher)
19 | exe = EXE(pyz,
20 |           a.scripts,
21 |           a.binaries,
22 |           a.zipfiles,
23 |           a.datas,
24 |           name='pzh-speech',
25 |           debug=False,
26 |           strip=False,
27 |           upx=True,
28 |           runtime_tmpdir=None,
29 |           console=False , icon='..\\img\\pzh-speech.ico')
30 | 


--------------------------------------------------------------------------------
/img/ico logo source.txt:
--------------------------------------------------------------------------------
1 | Get logo from https://cmusphinx.github.io/


--------------------------------------------------------------------------------
/img/pzh-speech.ico:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JayHeng/pzh-py-speech/5eefe3f6720e12af4e3bcdc3144063a3e6c1331d/img/pzh-speech.ico


--------------------------------------------------------------------------------
/img/pzh-speech.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JayHeng/pzh-py-speech/5eefe3f6720e12af4e3bcdc3144063a3e6c1331d/img/pzh-speech.png


--------------------------------------------------------------------------------
/src/main.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*
  2 | import wx
  3 | import sys, os
  4 | reload(sys)
  5 | sys.setdefaultencoding('utf-8')
  6 | import subprocess
  7 | import win
  8 | import numpy
  9 | import matplotlib
 10 | from matplotlib.backends.backend_wxagg import FigureCanvasWxAgg as FigureCanvas
 11 | from matplotlib.figure import Figure
 12 | import matplotlib.tri as Tri
 13 | 
 14 | import wave
 15 | import pyaudio
 16 | import speech_recognition
 17 | import pyttsx3
 18 | 
 19 | MAX_AUDIO_CHANNEL = 8
 20 | #unit: inch
 21 | PLOT_PANEL_WIDTH = 720
 22 | PLOT_PANEL_HEIGHT = 360
 23 | #unit: percent
 24 | PLOT_AXES_WIDTH_TITLE = 0.05
 25 | PLOT_AXES_HEIGHT_LABEL = 0.075
 26 | AUDIO_CHUNK_SIZE = 1024
 27 | 
 28 | AUDIO_PLAY_STATE_START = 0
 29 | AUDIO_PLAY_STATE_PLAY = 1
 30 | AUDIO_PLAY_STATE_PAUSE = 2
 31 | AUDIO_PLAY_STATE_RESUME = 3
 32 | AUDIO_PLAY_STATE_END = 4
 33 | 
 34 | class wavCursor(object):
 35 |     def __init__(self, ax, x, y):
 36 |         self.ax = ax
 37 |         self.vline = ax.axvline(color='blue', alpha=1)
 38 |         self.hline = ax.axhline(color='blue', alpha=1)
 39 |         self.marker, = ax.plot([0],[0], marker="o", color="crimson", zorder=3)
 40 |         self.x = x
 41 |         self.y = y
 42 |         self.xlim = self.x[len(self.x)-1]
 43 |         self.text = ax.text(0.7, 0.9, '', bbox=dict(facecolor='yellow', alpha=0.5))
 44 | 
 45 |     def moveMouse(self, event):
 46 |         if not event.inaxes:
 47 |             return
 48 |         x, y = event.xdata, event.ydata
 49 |         if x > self.xlim:
 50 |             x = self.xlim
 51 |         index = numpy.searchsorted(self.x, [x])[0]
 52 |         x = self.x[index]
 53 |         y = self.y[index]
 54 |         self.vline.set_xdata(x)
 55 |         self.hline.set_ydata(y)
 56 |         self.marker.set_data([x],[y])
 57 |         self.text.set_text('x=%1.2f, y=%1.2f' % (x, y))
 58 |         self.text.set_position((x,y))
 59 |         self.ax.figure.canvas.draw_idle()
 60 | 
 61 | class wavCanvasPanel(wx.Panel):
 62 | 
 63 |     def __init__(self, parent):
 64 |         wx.Panel.__init__(self, parent)
 65 |         dpi = 60
 66 |         width = PLOT_PANEL_WIDTH / dpi
 67 |         height = PLOT_PANEL_HEIGHT / dpi
 68 |         self.wavFigure = Figure(figsize=[width,height], dpi=dpi, facecolor='#404040')
 69 |         self.wavCanvas = FigureCanvas(self, -1, self.wavFigure)
 70 |         self.wavSizer = wx.BoxSizer(wx.VERTICAL)
 71 |         self.wavSizer.Add(self.wavCanvas, 1, wx.EXPAND|wx.ALL)
 72 |         self.SetSizerAndFit(self.wavSizer)
 73 |         self.wavAxes = [None] * MAX_AUDIO_CHANNEL
 74 |         self.wavCursor = [None] * MAX_AUDIO_CHANNEL
 75 | 
 76 |     def fromstring(self, wavData, alignedByte):
 77 |         if alignedByte <= 8:
 78 |             src = numpy.ndarray(len(wavData), numpy.dtype('>i1'), wavData)
 79 |             dest = numpy.zeros(len(wavData) / alignedByte, numpy.dtype('>i8'))
 80 |             for i in range(alignedByte):
 81 |                 dest.view(dtype='>i1')[alignedByte-1-i::8] = src.view(dtype='>i1')[i::alignedByte]
 82 |             [hex(x) for x in dest]
 83 |             return True, dest
 84 |         else:
 85 |             return False, wavData
 86 | 
 87 |     def readWave(self, wavPath, wavInfo):
 88 |         if os.path.isfile(wavPath):
 89 |             # Open the wav file to get wave data and parameters
 90 |             wavFile =  wave.open(wavPath, "rb")
 91 |             wavParams = wavFile.getparams()
 92 |             wavChannels = wavParams[0]
 93 |             wavSampwidth = wavParams[1]
 94 |             wavFramerate = wavParams[2]
 95 |             wavFrames = wavParams[3]
 96 |             wavInfo.SetStatusText('Opened Audio Info = ' +
 97 |                                   'Channels:' + str(wavChannels) +
 98 |                                   ', SampWidth:' + str(wavSampwidth) + 'Byte' +
 99 |                                   ', SampRate:' + str(wavFramerate) + 'kHz' +
100 |                                   ', FormatTag:' + wavParams[4])
101 |             wavData = wavFile.readframes(wavFrames)
102 |             wavFile.close()
103 |             # Transpose the wav data if wave has multiple channels
104 |             if wavSampwidth == 1:
105 |                 dtype = numpy.int8
106 |             elif wavSampwidth == 2:
107 |                 dtype = numpy.int16
108 |             elif wavSampwidth == 3:
109 |                 dtype = None
110 |             elif wavSampwidth == 4:
111 |                 dtype = numpy.float32
112 |             else:
113 |                 return 0, 0, 0
114 |             if dtype != None:
115 |                 retData = numpy.fromstring(wavData, dtype = dtype)
116 |             else:
117 |                 # Implement int24 manually
118 |                 status, retData = self.fromstring(wavData, 3)
119 |                 if not status:
120 |                     return 0, 0, 0
121 |             if wavChannels != 1:
122 |                 retData.shape = -1, wavChannels
123 |                 retData = retData.T
124 |             # Calculate and arange wave time
125 |             retTime = numpy.arange(0, wavFrames) * (1.0 / wavFramerate)
126 |             retChannels = wavChannels
127 |             return retChannels, retData, retTime
128 |         else:
129 |             return 0, 0, 0
130 | 
131 |     def showWave(self, wavPath, wavDomain, wavInfo):
132 |         self.wavFigure.clear()
133 |         waveChannels, waveData, waveTime = self.readWave(wavPath, wavInfo)
134 |         if waveChannels != 0:
135 |             # Note: only show max supported channel if actual channel > max supported channel
136 |             if waveChannels > MAX_AUDIO_CHANNEL:
137 |                 waveChannels = MAX_AUDIO_CHANNEL
138 |             # Polt the waveform of each channel in sequence
139 |             for i in range(waveChannels):
140 |                 left = PLOT_AXES_HEIGHT_LABEL
141 |                 bottom = (1.0 / waveChannels) * (waveChannels - 1 - i) + PLOT_AXES_HEIGHT_LABEL
142 |                 height = 1.0 / waveChannels - (PLOT_AXES_WIDTH_TITLE + PLOT_AXES_HEIGHT_LABEL)
143 |                 width = 1 - left - 0.05
144 |                 self.wavAxes[i] = self.wavFigure.add_axes([left, bottom, width, height], facecolor='k')
145 |                 if waveChannels == 1:
146 |                     waveAmplitude = waveData
147 |                 else:
148 |                     waveAmplitude = waveData[i]
149 |                 if wavDomain == 'Time':
150 |                     self.wavAxes[i].set_prop_cycle(color='#00F279', lw=[1])
151 |                     self.wavAxes[i].set_xlabel('time (s)', color='w')
152 |                     self.wavAxes[i].plot(waveTime, waveAmplitude)
153 |                     self.wavCursor[i] = wavCursor(self.wavAxes[i], waveTime, waveAmplitude)
154 |                 elif wavDomain == 'Frequency':
155 |                     self.wavAxes[i].set_prop_cycle(color='red', lw=[1])
156 |                     self.wavAxes[i].set_xlabel('Frequency (Hz)', color='w')
157 |                     waveMagnitude = numpy.absolute(numpy.fft.rfft(waveAmplitude))
158 |                     waveFreq = numpy.fft.rfftfreq(len(waveTime), numpy.diff(waveTime)[0])
159 |                     self.wavAxes[i].plot(waveFreq, waveMagnitude)
160 |                     self.wavCursor[i] = wavCursor(self.wavAxes[i], waveFreq, waveMagnitude)
161 |                 self.wavAxes[i].set_ylabel('value', color='w')
162 |                 self.wavAxes[i].grid()
163 |                 self.wavAxes[i].tick_params(labelcolor='w')
164 |                 self.wavAxes[i].set_title('Audio Channel ' + str(i), color='w')
165 |                 self.wavCanvas.mpl_connect('motion_notify_event', self.wavCursor[i].moveMouse)
166 |         # Note!!!: draw() must be called if figure has been cleared once
167 |         self.wavCanvas.draw()
168 | 
169 |     def showWelcome (self):
170 |         self.wavFigure.clear()
171 |         # Get Code from below link
172 |         # https://matplotlib.org/gallery/images_contours_and_fields/tripcolor_demo.html#sphx-glr-gallery-images-contours-and-fields-tripcolor-demo-py
173 |         n_angles = 36
174 |         n_radii = 8
175 |         min_radius = 0.25
176 |         radii = numpy.linspace(min_radius, 0.95, n_radii)
177 |         angles = numpy.linspace(0, 2 * numpy.pi, n_angles, endpoint=False)
178 |         angles = numpy.repeat(angles[..., numpy.newaxis], n_radii, axis=1)
179 |         angles[:, 1::2] += numpy.pi / n_angles
180 |         x = (radii * numpy.cos(angles)).flatten()
181 |         y = (radii * numpy.sin(angles)).flatten()
182 |         z = (numpy.cos(radii) * numpy.cos(3 * angles)).flatten()
183 |         triang = Tri.Triangulation(x, y)
184 |         triang.set_mask(numpy.hypot(x[triang.triangles].mean(axis=1),
185 |                                     y[triang.triangles].mean(axis=1))
186 |                         < min_radius)
187 |         welcomeAxes = self.wavFigure.add_axes([0.13,0.2,0.7,0.7], facecolor='#404040')
188 |         #welcomeAxes.set_aspect('equal')
189 |         welcomeAxes.tripcolor(triang, z, shading='flat')
190 |         # Set some properties
191 |         welcomeAxes.set_title('Welcome to use pzh-speech', color='w')
192 |         welcomeAxes.set_xticks([])
193 |         welcomeAxes.set_yticks([])
194 |         welcomeAxes.spines['top'].set_visible(False)
195 |         welcomeAxes.spines['right'].set_visible(False)
196 |         welcomeAxes.spines['bottom'].set_visible(False)
197 |         welcomeAxes.spines['left'].set_visible(False)
198 |         self.wavCanvas.draw()
199 | 
200 | class mainWin(win.speech_win):
201 | 
202 |     def __init__(self, parent):
203 |         win.speech_win.__init__(self, parent)
204 |         icon = wx.Icon()
205 |         icon.CopyFromBitmap(wx.Bitmap( u"../img/pzh-speech.ico", wx.BITMAP_TYPE_ANY))
206 |         self.SetIcon(icon)
207 |         self.wavPanel = wavCanvasPanel(self.m_panel_plot)
208 |         self.m_genericDirCtrl_audioDir.SetFilter("Audio files (*.wav)|*.wav")
209 |         self.isRecording = False
210 |         # Start -> Play -> Pause -> Resume -> End
211 |         self.playState = AUDIO_PLAY_STATE_START
212 |         self.statusBar.SetFieldsCount(1)
213 |         self.wavPanel.showWelcome()
214 |         self.ttsObj = None
215 | 
216 |     def viewAudio( self, event ):
217 |         self.wavPath =  self.m_genericDirCtrl_audioDir.GetFilePath()
218 |         wavDomain = self.m_choice_domain.GetString(self.m_choice_domain.GetSelection())
219 |         self.wavPanel.showWave(self.wavPath, wavDomain, self.statusBar)
220 |         if self.playState != AUDIO_PLAY_STATE_START:
221 |             self.playState = AUDIO_PLAY_STATE_END
222 |             self.m_button_play.SetLabel('Play Start')
223 | 
224 |     def recordAudioCallback(self, in_data, frame_count, time_info, status):
225 |         if not self.isRecording:
226 |             status = pyaudio.paComplete
227 |         else:
228 |             self.wavFrames.append(in_data)
229 |             status = pyaudio.paContinue
230 |         return (in_data, status)
231 | 
232 |     def recordAudio( self, event ):
233 |         if not self.isRecording:
234 |             self.isRecording = True
235 |             self.m_button_record.SetLabel('Record Stop')
236 |             # Get the wave parameter from user settings
237 |             fileName = self.m_textCtrl_recFileName.GetLineText(0)
238 |             if fileName == '':
239 |                 fileName = 'rec_untitled1.wav'
240 |             self.wavPath = os.path.join(os.path.dirname(os.path.abspath(os.path.dirname(__file__))), 'conv', 'rec', fileName)
241 |             self.wavSampRate = int(self.m_choice_sampRate.GetString(self.m_choice_sampRate.GetSelection()))
242 |             channels = self.m_choice_channels.GetString(self.m_choice_channels.GetSelection())
243 |             if channels == 'Mono':
244 |                 self.wavChannels = 1
245 |             else: #if channels == 'Stereo':
246 |                 self.wavChannels = 2
247 |             bitDepth = int(self.m_choice_bitDepth.GetString(self.m_choice_bitDepth.GetSelection()))
248 |             if bitDepth == 8:
249 |                 self.wavBitFormat = pyaudio.paInt8
250 |             elif bitDepth == 24:
251 |                 self.wavBitFormat = pyaudio.paInt24
252 |             elif bitDepth == 32:
253 |                 self.wavBitFormat = pyaudio.paFloat32
254 |             else:
255 |                 self.wavBitFormat = pyaudio.paInt16
256 |             # Record audio according to wave parameters
257 |             self.wavFrames = []
258 |             self.wavPyaudio = pyaudio.PyAudio()
259 |             self.wavStream = self.wavPyaudio.open(format=self.wavBitFormat,
260 |                                                   channels=self.wavChannels,
261 |                                                   rate=self.wavSampRate,
262 |                                                   input=True,
263 |                                                   frames_per_buffer=AUDIO_CHUNK_SIZE,
264 |                                                   stream_callback=self.recordAudioCallback)
265 |             self.wavStream.start_stream()
266 |         else:
267 |             self.isRecording = False
268 |             self.m_button_record.SetLabel('Record Start')
269 |             self.wavStream.stop_stream()
270 |             self.wavStream.close()
271 |             self.wavPyaudio.terminate()
272 |             # Save the wave data into file
273 |             wavFile = wave.open(self.wavPath, 'wb')
274 |             wavFile.setnchannels(self.wavChannels)
275 |             wavFile.setsampwidth(self.wavPyaudio.get_sample_size(self.wavBitFormat))
276 |             wavFile.setframerate(self.wavSampRate)
277 |             wavFile.writeframes(b''.join(self.wavFrames))
278 |             wavFile.close()
279 | 
280 |     def playAudioCallback(self, in_data, frame_count, time_info, status):
281 |         if self.playState == AUDIO_PLAY_STATE_PLAY or self.playState == AUDIO_PLAY_STATE_RESUME:
282 |             data = self.wavFile.readframes(frame_count)
283 |             if self.wavFile.getnframes() == self.wavFile.tell():
284 |                 status = pyaudio.paComplete
285 |                 self.playState = AUDIO_PLAY_STATE_END
286 |                 self.m_button_play.SetLabel('Play Start')
287 |             else:
288 |                 status = pyaudio.paContinue
289 |             return (data, status)
290 |         else:
291 |             # Note!!!:
292 |             data = numpy.zeros(frame_count*self.wavFile.getnchannels()).tostring()
293 |             return (data, pyaudio.paContinue)
294 | 
295 |     def playAudio( self, event ):
296 |         if os.path.isfile(self.wavPath):
297 |             if self.playState == AUDIO_PLAY_STATE_END:
298 |                 self.playState = AUDIO_PLAY_STATE_START
299 |                 self.wavStream.stop_stream()
300 |                 self.wavStream.close()
301 |                 self.wavPyaudio.terminate()
302 |                 self.wavFile.close()
303 |             if self.playState == AUDIO_PLAY_STATE_START:
304 |                 self.playState = AUDIO_PLAY_STATE_PLAY
305 |                 self.m_button_play.SetLabel('Play Pause')
306 |                 self.wavFile =  wave.open(self.wavPath, "rb")
307 |                 self.wavPyaudio = pyaudio.PyAudio()
308 |                 self.wavStream = self.wavPyaudio.open(format=self.wavPyaudio.get_format_from_width(self.wavFile.getsampwidth()),
309 |                                                       channels=self.wavFile.getnchannels(),
310 |                                                       rate=self.wavFile.getframerate(),
311 |                                                       output=True,
312 |                                                       stream_callback=self.playAudioCallback)
313 |                 self.wavStream.start_stream()
314 |             elif self.playState == AUDIO_PLAY_STATE_PLAY or self.playState == AUDIO_PLAY_STATE_RESUME:
315 |                 self.playState = AUDIO_PLAY_STATE_PAUSE
316 |                 self.m_button_play.SetLabel('Play Resume')
317 |             elif self.playState == AUDIO_PLAY_STATE_PAUSE:
318 |                 self.playState = AUDIO_PLAY_STATE_RESUME
319 |                 self.m_button_play.SetLabel('Play Pause')
320 |             else:
321 |                 pass
322 | 
323 |     def getLanguageSelection(self):
324 |         languageType = self.m_choice_lang.GetString(self.m_choice_lang.GetSelection())
325 |         if languageType == 'Mandarin Chinese':
326 |             languageType = 'zh-CN'
327 |             languageName = 'Chinese'
328 |         else: # languageType == 'US English':
329 |             languageType = 'en-US'
330 |             languageName = 'English'
331 |         return languageType, languageName
332 | 
333 |     def audioSpeechRecognition( self, event ):
334 |         if os.path.isfile(self.wavPath):
335 |             asrObj = speech_recognition.Recognizer()
336 |             with speech_recognition.AudioFile(self.wavPath) as source:
337 |                 # Read the entire audio file
338 |                 speechAudio = asrObj.record(source)
339 |             self.m_textCtrl_asrttsText.Clear()
340 |             # Get language type
341 |             languageType, languageName = self.getLanguageSelection()
342 |             engineType = self.m_choice_asrEngine.GetString(self.m_choice_asrEngine.GetSelection())
343 |             if engineType == 'CMU Sphinx':
344 |                 # Recognize speech using Sphinx
345 |                 try:
346 |                     speechText = asrObj.recognize_sphinx(speechAudio, language=languageType)
347 |                     self.m_textCtrl_asrttsText.write(speechText)
348 |                     self.statusBar.SetStatusText("ASR Conversation Info: Successfully")
349 |                     fileName = self.m_textCtrl_asrFileName.GetLineText(0)
350 |                     if fileName == '':
351 |                         fileName = 'asr_untitled1.txt'
352 |                     asrFilePath = os.path.join(os.path.dirname(os.path.abspath(os.path.dirname(__file__))), 'conv', 'asr', fileName)
353 |                     asrFileObj = open(asrFilePath, 'wb')
354 |                     asrFileObj.write(speechText)
355 |                     asrFileObj.close()
356 |                 except speech_recognition.UnknownValueError:
357 |                     self.statusBar.SetStatusText("ASR Conversation Info: Sphinx could not understand audio")
358 |                 except speech_recognition.RequestError as e:
359 |                     self.statusBar.SetStatusText("ASR Conversation Info: Sphinx error; {0}".format(e))
360 |             else:
361 |                 self.statusBar.SetStatusText("ASR Conversation Info: Unavailable ASR Engine")
362 | 
363 |     def refreshVoice( self, event ):
364 |         languageType, languageName = self.getLanguageSelection()
365 |         engineType = self.m_choice_ttsEngine.GetString(self.m_choice_ttsEngine.GetSelection())
366 |         if engineType == 'pyttsx3 - SAPI5':
367 |             if self.ttsObj == None:
368 |                  self.ttsObj = pyttsx3.init()
369 |             voices = self.ttsObj.getProperty('voices')
370 |             voiceItems = [None] * len(voices)
371 |             itemIndex = 0
372 |             for voice in voices:
373 |                 voiceId = voice.id.lower()
374 |                 voiceName = voice.name.lower()
375 |                 if (voiceId.find(languageType.lower()) != -1) or (voiceName.find(languageName.lower()) != -1):
376 |                     voiceItems[itemIndex] = voice.name
377 |                     itemIndex += 1
378 |             voiceItems = voiceItems[0:itemIndex]
379 |             self.m_choice_voice.Clear()
380 |             self.m_choice_voice.SetItems(voiceItems)
381 |         else:
382 |             voiceItem = ['N/A']
383 |             self.m_choice_voice.Clear()
384 |             self.m_choice_voice.SetItems(voiceItem)
385 | 
386 |     def textToWav(self, text, language):
387 |         fileName = self.m_textCtrl_ttsFileName.GetLineText(0)
388 |         if fileName == '':
389 |             fileName = 'tts_untitled1.wav'
390 |         ttsFilePath = os.path.join(os.path.dirname(os.path.abspath(os.path.dirname(__file__))), 'conv', 'tts', fileName)
391 |         ttwEngineType = self.m_choice_ttwEngine.GetString(self.m_choice_ttwEngine.GetSelection())
392 |         if ttwEngineType == 'eSpeak TTS':
393 |             ttsTextFile = os.path.join(os.path.abspath(os.path.dirname(__file__)), 'ttsTextTemp.txt')
394 |             ttsTextFileObj = open(ttsTextFile, 'wb')
395 |             ttsTextFileObj.write(text)
396 |             ttsTextFileObj.close()
397 |             try:
398 |                 #espeak_path = "C:/tools_mcu/eSpeak/command_line/espeak.exe"
399 |                 #subprocess.call([espeak_path, "-v"+languageType[0:2], text])
400 |                 gender = self.m_choice_gender.GetString(self.m_choice_gender.GetSelection())
401 |                 gender = gender.lower()[0] + '3'
402 |                 subprocess.call(["espeak", "-v"+language[0:2]+'+'+gender, "-f"+ttsTextFile, "-w"+ttsFilePath])
403 |             except:
404 |                 self.statusBar.SetStatusText("TTW Conversation Info: eSpeak is not installed or its path is not added into system environment")
405 |             os.remove(ttsTextFile)
406 |         else:
407 |             self.statusBar.SetStatusText("TTW Conversation Info: Unavailable TTW Engine")
408 | 
409 |     def textToSpeech( self, event ):
410 |         languageType, languageName = self.getLanguageSelection()
411 |         # Get text from m_textCtrl_asrttsText
412 |         lines = self.m_textCtrl_asrttsText.GetNumberOfLines()
413 |         if lines != 0:
414 |             data = ''
415 |             for i in range(0, lines):
416 |                 data += self.m_textCtrl_asrttsText.GetLineText(i)
417 |         else:
418 |             return
419 |         ttsEngineType = self.m_choice_ttsEngine.GetString(self.m_choice_ttsEngine.GetSelection())
420 |         if ttsEngineType == 'pyttsx3 - SAPI5':
421 |             if self.ttsObj == None:
422 |                  self.ttsObj = pyttsx3.init()
423 |             hasVoice = False
424 |             voices = self.ttsObj.getProperty('voices')
425 |             voiceSel = self.m_choice_voice.GetString(self.m_choice_voice.GetSelection())
426 |             for voice in voices:
427 |                 #print ('id = {} \nname = {} \nlanguages = {} \n'.format(voice.id, voice.name, voice.languages))
428 |                 voiceId = voice.id.lower()
429 |                 voiceName = voice.name.lower()
430 |                 if (voiceId.find(languageType.lower()) != -1) or (voiceName.find(languageName.lower()) != -1):
431 |                     if (voiceSel == '') or (voiceSel == voice.name):
432 |                         hasVoice = True
433 |                         break
434 |             if hasVoice:
435 |                 self.ttsObj.setProperty('voice', voice.id)
436 |                 self.ttsObj.say(data)
437 |                 self.statusBar.SetStatusText("TTS Conversation Info: Run and Wait")
438 |                 self.ttsObj.runAndWait()
439 |                 self.statusBar.SetStatusText("TTS Conversation Info: Successfully")
440 |             else:
441 |                 self.statusBar.SetStatusText("TTS Conversation Info: Language is not supported by current PC")
442 |             self.textToWav(data, languageType)
443 |         else:
444 |             self.statusBar.SetStatusText("TTS Conversation Info: Unavailable TTS Engine")
445 | 
446 |     def clearAsrTtsText( self, event ):
447 |         self.m_textCtrl_asrttsText.Clear()
448 | 
449 |     def showHomepageInfo( self, event ):
450 |         messageText = (('Code: \n    https://github.com/JayHeng/pzh-py-speech.git \n') +
451 |                        ('Doc: \n    https://www.cnblogs.com/henjay724/p/9541867.html \n'))
452 |         wx.MessageBox(messageText, "Homepage", wx.OK | wx.ICON_INFORMATION)
453 | 
454 |     def showAboutInfo( self, event ):
455 |         messageText = (('Author: Jay Heng \n') +
456 |                        ('Email: hengjie1989@foxmail.com \n'))
457 |         wx.MessageBox(messageText, "About", wx.OK | wx.ICON_INFORMATION)
458 | 
459 | if __name__ == '__main__':
460 |     app = wx.App()
461 | 
462 |     main_win = mainWin(None)
463 |     main_win.SetTitle(u"pzh-speech v1.1.0")
464 |     main_win.Show()
465 | 
466 |     app.MainLoop()
467 | 


--------------------------------------------------------------------------------
/src/win.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | 
  3 | ###########################################################################
  4 | ## Python code generated with wxFormBuilder (version Oct 26 2018)
  5 | ## http://www.wxformbuilder.org/
  6 | ##
  7 | ## PLEASE DO *NOT* EDIT THIS FILE!
  8 | ###########################################################################
  9 | 
 10 | import wx
 11 | import wx.xrc
 12 | 
 13 | ###########################################################################
 14 | ## Class speech_win
 15 | ###########################################################################
 16 | 
 17 | class speech_win ( wx.Frame ):
 18 | 
 19 | 	def __init__( self, parent ):
 20 | 		wx.Frame.__init__ ( self, parent, id = wx.ID_ANY, title = u"pzh-speech", pos = wx.DefaultPosition, size = wx.Size( 942,694 ), style = wx.DEFAULT_FRAME_STYLE|wx.TAB_TRAVERSAL )
 21 | 
 22 | 		self.SetSizeHints( wx.DefaultSize, wx.DefaultSize )
 23 | 		self.SetBackgroundColour( wx.Colour( 64, 64, 64 ) )
 24 | 
 25 | 		self.menubar = wx.MenuBar( 0 )
 26 | 		self.m_menu_help = wx.Menu()
 27 | 		self.m_menuItem_homePage = wx.MenuItem( self.m_menu_help, wx.ID_ANY, u"Home Page", wx.EmptyString, wx.ITEM_NORMAL )
 28 | 		self.m_menu_help.Append( self.m_menuItem_homePage )
 29 | 
 30 | 		self.m_menuItem_about = wx.MenuItem( self.m_menu_help, wx.ID_ANY, u"About Author", wx.EmptyString, wx.ITEM_NORMAL )
 31 | 		self.m_menu_help.Append( self.m_menuItem_about )
 32 | 
 33 | 		self.menubar.Append( self.m_menu_help, u"Help" )
 34 | 
 35 | 		self.SetMenuBar( self.menubar )
 36 | 
 37 | 		win_sizer = wx.WrapSizer( wx.HORIZONTAL, wx.WRAPSIZER_DEFAULT_FLAGS )
 38 | 
 39 | 		edit_win = wx.BoxSizer( wx.VERTICAL )
 40 | 
 41 | 		dir_win = wx.WrapSizer( wx.HORIZONTAL, wx.WRAPSIZER_DEFAULT_FLAGS )
 42 | 
 43 | 		self.m_staticText_selectAudioFile = wx.StaticText( self, wx.ID_ANY, u"Select Audio File:", wx.DefaultPosition, wx.DefaultSize, 0 )
 44 | 		self.m_staticText_selectAudioFile.Wrap( -1 )
 45 | 
 46 | 		self.m_staticText_selectAudioFile.SetForegroundColour( wx.SystemSettings.GetColour( wx.SYS_COLOUR_APPWORKSPACE ) )
 47 | 
 48 | 		dir_win.Add( self.m_staticText_selectAudioFile, 0, wx.ALL, 5 )
 49 | 
 50 | 		self.m_genericDirCtrl_audioDir = wx.GenericDirCtrl( self, wx.ID_ANY, wx.EmptyString, wx.DefaultPosition, wx.Size( 180,250 ), wx.DIRCTRL_SHOW_FILTERS|wx.HSCROLL, wx.EmptyString, 0 )
 51 | 
 52 | 		self.m_genericDirCtrl_audioDir.ShowHidden( False )
 53 | 		self.m_genericDirCtrl_audioDir.SetForegroundColour( wx.SystemSettings.GetColour( wx.SYS_COLOUR_WINDOW ) )
 54 | 		self.m_genericDirCtrl_audioDir.SetBackgroundColour( wx.SystemSettings.GetColour( wx.SYS_COLOUR_WINDOW ) )
 55 | 
 56 | 		dir_win.Add( self.m_genericDirCtrl_audioDir, 1, wx.EXPAND |wx.ALL, 5 )
 57 | 
 58 | 		self.m_staticText_null0 = wx.StaticText( self, wx.ID_ANY, wx.EmptyString, wx.DefaultPosition, wx.Size( 30,-1 ), 0 )
 59 | 		self.m_staticText_null0.Wrap( -1 )
 60 | 
 61 | 		dir_win.Add( self.m_staticText_null0, 0, wx.ALL, 5 )
 62 | 
 63 | 		self.m_button_play = wx.Button( self, wx.ID_ANY, u"Play Start", wx.DefaultPosition, wx.Size( 110,-1 ), 0 )
 64 | 		dir_win.Add( self.m_button_play, 0, wx.ALL, 5 )
 65 | 
 66 | 
 67 | 		edit_win.Add( dir_win, 1, wx.EXPAND, 5 )
 68 | 
 69 | 		rec_win = wx.WrapSizer( wx.HORIZONTAL, wx.WRAPSIZER_DEFAULT_FLAGS )
 70 | 
 71 | 		self.m_staticText_null1 = wx.StaticText( self, wx.ID_ANY, wx.EmptyString, wx.DefaultPosition, wx.DefaultSize, 0 )
 72 | 		self.m_staticText_null1.Wrap( -1 )
 73 | 
 74 | 		rec_win.Add( self.m_staticText_null1, 0, wx.ALL, 5 )
 75 | 
 76 | 		self.m_staticText_recordAudioFile = wx.StaticText( self, wx.ID_ANY, u"Record Audio File:", wx.DefaultPosition, wx.Size( 180,-1 ), 0 )
 77 | 		self.m_staticText_recordAudioFile.Wrap( -1 )
 78 | 
 79 | 		self.m_staticText_recordAudioFile.SetForegroundColour( wx.SystemSettings.GetColour( wx.SYS_COLOUR_APPWORKSPACE ) )
 80 | 
 81 | 		rec_win.Add( self.m_staticText_recordAudioFile, 0, wx.ALL, 5 )
 82 | 
 83 | 		self.m_staticText_channels = wx.StaticText( self, wx.ID_ANY, u"Channels:", wx.DefaultPosition, wx.Size( 60,-1 ), 0 )
 84 | 		self.m_staticText_channels.Wrap( -1 )
 85 | 
 86 | 		self.m_staticText_channels.SetForegroundColour( wx.SystemSettings.GetColour( wx.SYS_COLOUR_APPWORKSPACE ) )
 87 | 
 88 | 		rec_win.Add( self.m_staticText_channels, 0, wx.ALL, 5 )
 89 | 
 90 | 		m_choice_channelsChoices = [ u"Mono", u"Stereo" ]
 91 | 		self.m_choice_channels = wx.Choice( self, wx.ID_ANY, wx.DefaultPosition, wx.Size( 80,-1 ), m_choice_channelsChoices, 0 )
 92 | 		self.m_choice_channels.SetSelection( 1 )
 93 | 		rec_win.Add( self.m_choice_channels, 0, wx.ALL, 5 )
 94 | 
 95 | 		self.m_staticText_sampRate = wx.StaticText( self, wx.ID_ANY, u"Samp Rate:", wx.DefaultPosition, wx.Size( 60,-1 ), 0 )
 96 | 		self.m_staticText_sampRate.Wrap( -1 )
 97 | 
 98 | 		self.m_staticText_sampRate.SetForegroundColour( wx.SystemSettings.GetColour( wx.SYS_COLOUR_APPWORKSPACE ) )
 99 | 
100 | 		rec_win.Add( self.m_staticText_sampRate, 0, wx.ALL, 5 )
101 | 
102 | 		m_choice_sampRateChoices = [ u"44100", u"22050", u"16000", u"11025" ]
103 | 		self.m_choice_sampRate = wx.Choice( self, wx.ID_ANY, wx.DefaultPosition, wx.Size( 80,-1 ), m_choice_sampRateChoices, 0 )
104 | 		self.m_choice_sampRate.SetSelection( 0 )
105 | 		rec_win.Add( self.m_choice_sampRate, 0, wx.ALL, 5 )
106 | 
107 | 		self.m_staticText_hz = wx.StaticText( self, wx.ID_ANY, u"Hz", wx.DefaultPosition, wx.Size( 20,-1 ), 0 )
108 | 		self.m_staticText_hz.Wrap( -1 )
109 | 
110 | 		self.m_staticText_hz.SetForegroundColour( wx.SystemSettings.GetColour( wx.SYS_COLOUR_APPWORKSPACE ) )
111 | 
112 | 		rec_win.Add( self.m_staticText_hz, 0, wx.ALL, 5 )
113 | 
114 | 		self.m_staticText_bitDepth = wx.StaticText( self, wx.ID_ANY, u"Bit Depth:", wx.DefaultPosition, wx.Size( 60,-1 ), 0 )
115 | 		self.m_staticText_bitDepth.Wrap( -1 )
116 | 
117 | 		self.m_staticText_bitDepth.SetForegroundColour( wx.SystemSettings.GetColour( wx.SYS_COLOUR_APPWORKSPACE ) )
118 | 
119 | 		rec_win.Add( self.m_staticText_bitDepth, 0, wx.ALL, 5 )
120 | 
121 | 		m_choice_bitDepthChoices = [ u"8", u"16", u"24", u"32" ]
122 | 		self.m_choice_bitDepth = wx.Choice( self, wx.ID_ANY, wx.DefaultPosition, wx.Size( 80,-1 ), m_choice_bitDepthChoices, 0 )
123 | 		self.m_choice_bitDepth.SetSelection( 1 )
124 | 		rec_win.Add( self.m_choice_bitDepth, 0, wx.ALL, 5 )
125 | 
126 | 		self.m_staticText_bits = wx.StaticText( self, wx.ID_ANY, u"bits", wx.DefaultPosition, wx.Size( 20,-1 ), 0 )
127 | 		self.m_staticText_bits.Wrap( -1 )
128 | 
129 | 		self.m_staticText_bits.SetForegroundColour( wx.SystemSettings.GetColour( wx.SYS_COLOUR_APPWORKSPACE ) )
130 | 
131 | 		rec_win.Add( self.m_staticText_bits, 0, wx.ALL, 5 )
132 | 
133 | 		self.m_staticText_recFileName = wx.StaticText( self, wx.ID_ANY, u"File:", wx.DefaultPosition, wx.Size( 30,-1 ), 0 )
134 | 		self.m_staticText_recFileName.Wrap( -1 )
135 | 
136 | 		self.m_staticText_recFileName.SetForegroundColour( wx.SystemSettings.GetColour( wx.SYS_COLOUR_APPWORKSPACE ) )
137 | 
138 | 		rec_win.Add( self.m_staticText_recFileName, 0, wx.ALL, 5 )
139 | 
140 | 		self.m_textCtrl_recFileName = wx.TextCtrl( self, wx.ID_ANY, u"rec_untitled1.wav", wx.DefaultPosition, wx.Size( 110,-1 ), 0 )
141 | 		rec_win.Add( self.m_textCtrl_recFileName, 0, wx.ALL, 5 )
142 | 
143 | 		self.m_staticText_null2 = wx.StaticText( self, wx.ID_ANY, wx.EmptyString, wx.DefaultPosition, wx.Size( 30,-1 ), 0 )
144 | 		self.m_staticText_null2.Wrap( -1 )
145 | 
146 | 		rec_win.Add( self.m_staticText_null2, 0, wx.ALL, 5 )
147 | 
148 | 		self.m_button_record = wx.Button( self, wx.ID_ANY, u"Record Start", wx.DefaultPosition, wx.Size( 110,-1 ), 0 )
149 | 		rec_win.Add( self.m_button_record, 0, wx.ALL, 5 )
150 | 
151 | 
152 | 		edit_win.Add( rec_win, 1, wx.EXPAND, 5 )
153 | 
154 | 
155 | 		win_sizer.Add( edit_win, 1, wx.EXPAND, 5 )
156 | 
157 | 		show_win = wx.BoxSizer( wx.VERTICAL )
158 | 
159 | 		plot_win = wx.WrapSizer( wx.HORIZONTAL, wx.WRAPSIZER_DEFAULT_FLAGS )
160 | 
161 | 		self.m_staticText_showAudioFile = wx.StaticText( self, wx.ID_ANY, u"Show Audio File:", wx.DefaultPosition, wx.Size( 215,-1 ), 0 )
162 | 		self.m_staticText_showAudioFile.Wrap( -1 )
163 | 
164 | 		self.m_staticText_showAudioFile.SetForegroundColour( wx.SystemSettings.GetColour( wx.SYS_COLOUR_APPWORKSPACE ) )
165 | 
166 | 		plot_win.Add( self.m_staticText_showAudioFile, 0, wx.ALL, 5 )
167 | 
168 | 		self.m_staticText_domain = wx.StaticText( self, wx.ID_ANY, u"Domain:", wx.DefaultPosition, wx.Size( 50,-1 ), 0 )
169 | 		self.m_staticText_domain.Wrap( -1 )
170 | 
171 | 		self.m_staticText_domain.SetForegroundColour( wx.SystemSettings.GetColour( wx.SYS_COLOUR_APPWORKSPACE ) )
172 | 
173 | 		plot_win.Add( self.m_staticText_domain, 0, wx.ALL, 5 )
174 | 
175 | 		m_choice_domainChoices = [ u"Time", u"Frequency" ]
176 | 		self.m_choice_domain = wx.Choice( self, wx.ID_ANY, wx.DefaultPosition, wx.DefaultSize, m_choice_domainChoices, 0 )
177 | 		self.m_choice_domain.SetSelection( 0 )
178 | 		plot_win.Add( self.m_choice_domain, 0, wx.ALL, 5 )
179 | 
180 | 		self.m_panel_plot = wx.Panel( self, wx.ID_ANY, wx.DefaultPosition, wx.Size( 720,360 ), wx.TAB_TRAVERSAL )
181 | 		self.m_panel_plot.SetFont( wx.Font( 9, wx.FONTFAMILY_DEFAULT, wx.FONTSTYLE_NORMAL, wx.FONTWEIGHT_NORMAL, False, "宋体" ) )
182 | 		self.m_panel_plot.SetForegroundColour( wx.SystemSettings.GetColour( wx.SYS_COLOUR_WINDOW ) )
183 | 		self.m_panel_plot.SetBackgroundColour( wx.SystemSettings.GetColour( wx.SYS_COLOUR_WINDOW ) )
184 | 
185 | 		plot_win.Add( self.m_panel_plot, 1, wx.EXPAND |wx.ALL, 5 )
186 | 
187 | 
188 | 		show_win.Add( plot_win, 1, wx.EXPAND, 5 )
189 | 
190 | 		ctrl_win = wx.WrapSizer( wx.HORIZONTAL, wx.WRAPSIZER_DEFAULT_FLAGS )
191 | 
192 | 		self.m_staticText_conv = wx.StaticText( self, wx.ID_ANY, u"Audio/Text  Conversation Configuration and Display:", wx.DefaultPosition, wx.Size( 285,-1 ), 0 )
193 | 		self.m_staticText_conv.Wrap( -1 )
194 | 
195 | 		self.m_staticText_conv.SetForegroundColour( wx.SystemSettings.GetColour( wx.SYS_COLOUR_APPWORKSPACE ) )
196 | 
197 | 		ctrl_win.Add( self.m_staticText_conv, 0, wx.ALL, 5 )
198 | 
199 | 		self.m_button_asrttsTextClear = wx.Button( self, wx.ID_ANY, u"Clear Text", wx.DefaultPosition, wx.Size( 80,-1 ), 0 )
200 | 		ctrl_win.Add( self.m_button_asrttsTextClear, 0, wx.ALL, 5 )
201 | 
202 | 		self.m_staticText_null3 = wx.StaticText( self, wx.ID_ANY, wx.EmptyString, wx.DefaultPosition, wx.Size( 100,-1 ), 0 )
203 | 		self.m_staticText_null3.Wrap( -1 )
204 | 
205 | 		ctrl_win.Add( self.m_staticText_null3, 0, wx.ALL, 5 )
206 | 
207 | 		self.m_staticText_lang = wx.StaticText( self, wx.ID_ANY, u"Language:", wx.Point( -1,-1 ), wx.Size( -1,-1 ), 0 )
208 | 		self.m_staticText_lang.Wrap( -1 )
209 | 
210 | 		self.m_staticText_lang.SetForegroundColour( wx.SystemSettings.GetColour( wx.SYS_COLOUR_APPWORKSPACE ) )
211 | 
212 | 		ctrl_win.Add( self.m_staticText_lang, 0, wx.ALL, 5 )
213 | 
214 | 		m_choice_langChoices = [ u"US English", u"Mandarin Chinese" ]
215 | 		self.m_choice_lang = wx.Choice( self, wx.ID_ANY, wx.DefaultPosition, wx.DefaultSize, m_choice_langChoices, 0 )
216 | 		self.m_choice_lang.SetSelection( 0 )
217 | 		ctrl_win.Add( self.m_choice_lang, 0, wx.ALL, 5 )
218 | 
219 | 		self.m_staticText_asrEngine = wx.StaticText( self, wx.ID_ANY, u"ASR Engine:", wx.DefaultPosition, wx.Size( 70,-1 ), 0 )
220 | 		self.m_staticText_asrEngine.Wrap( -1 )
221 | 
222 | 		self.m_staticText_asrEngine.SetForegroundColour( wx.SystemSettings.GetColour( wx.SYS_COLOUR_APPWORKSPACE ) )
223 | 
224 | 		ctrl_win.Add( self.m_staticText_asrEngine, 0, wx.ALL, 5 )
225 | 
226 | 		m_choice_asrEngineChoices = [ u"CMU Sphinx", u"Google Speech Recognition", u"Google Cloud Speech API", u"Wit.ai", u"Microsoft Bing Voice Recognition", u"Houndify API", u"IBM Speech to Text", u"Snowboy Hotword Detection" ]
227 | 		self.m_choice_asrEngine = wx.Choice( self, wx.ID_ANY, wx.DefaultPosition, wx.Size( 155,-1 ), m_choice_asrEngineChoices, 0 )
228 | 		self.m_choice_asrEngine.SetSelection( 0 )
229 | 		ctrl_win.Add( self.m_choice_asrEngine, 0, wx.ALL, 5 )
230 | 
231 | 		self.m_staticText_asrId = wx.StaticText( self, wx.ID_ANY, u"Id:", wx.DefaultPosition, wx.Size( 40,-1 ), 0 )
232 | 		self.m_staticText_asrId.Wrap( -1 )
233 | 
234 | 		self.m_staticText_asrId.SetForegroundColour( wx.SystemSettings.GetColour( wx.SYS_COLOUR_APPWORKSPACE ) )
235 | 
236 | 		ctrl_win.Add( self.m_staticText_asrId, 0, wx.ALL, 5 )
237 | 
238 | 		self.m_textCtrl_asrId = wx.TextCtrl( self, wx.ID_ANY, u"N/A", wx.DefaultPosition, wx.Size( 150,-1 ), 0 )
239 | 		ctrl_win.Add( self.m_textCtrl_asrId, 0, wx.ALL, 5 )
240 | 
241 | 		self.m_staticText_asrKey = wx.StaticText( self, wx.ID_ANY, u"Key:", wx.DefaultPosition, wx.DefaultSize, 0 )
242 | 		self.m_staticText_asrKey.Wrap( -1 )
243 | 
244 | 		self.m_staticText_asrKey.SetForegroundColour( wx.SystemSettings.GetColour( wx.SYS_COLOUR_APPWORKSPACE ) )
245 | 
246 | 		ctrl_win.Add( self.m_staticText_asrKey, 0, wx.ALL, 5 )
247 | 
248 | 		self.m_textCtrl_asrKey = wx.TextCtrl( self, wx.ID_ANY, u"N/A", wx.DefaultPosition, wx.Size( 232,-1 ), 0 )
249 | 		ctrl_win.Add( self.m_textCtrl_asrKey, 0, wx.ALL, 5 )
250 | 
251 | 		self.m_staticText_ttsEngine = wx.StaticText( self, wx.ID_ANY, u"TTS Engine:", wx.DefaultPosition, wx.Size( 70,-1 ), 0 )
252 | 		self.m_staticText_ttsEngine.Wrap( -1 )
253 | 
254 | 		self.m_staticText_ttsEngine.SetForegroundColour( wx.SystemSettings.GetColour( wx.SYS_COLOUR_APPWORKSPACE ) )
255 | 
256 | 		ctrl_win.Add( self.m_staticText_ttsEngine, 0, wx.ALL, 5 )
257 | 
258 | 		m_choice_ttsEngineChoices = [ u"pyttsx3 - SAPI5", u"gTTS" ]
259 | 		self.m_choice_ttsEngine = wx.Choice( self, wx.ID_ANY, wx.DefaultPosition, wx.Size( 155,-1 ), m_choice_ttsEngineChoices, 0 )
260 | 		self.m_choice_ttsEngine.SetSelection( 0 )
261 | 		ctrl_win.Add( self.m_choice_ttsEngine, 0, wx.ALL, 5 )
262 | 
263 | 		self.m_staticText_voice = wx.StaticText( self, wx.ID_ANY, u"Voice:", wx.DefaultPosition, wx.Size( 40,-1 ), 0 )
264 | 		self.m_staticText_voice.Wrap( -1 )
265 | 
266 | 		self.m_staticText_voice.SetForegroundColour( wx.SystemSettings.GetColour( wx.SYS_COLOUR_APPWORKSPACE ) )
267 | 
268 | 		ctrl_win.Add( self.m_staticText_voice, 0, wx.ALL, 5 )
269 | 
270 | 		m_choice_voiceChoices = []
271 | 		self.m_choice_voice = wx.Choice( self, wx.ID_ANY, wx.DefaultPosition, wx.Size( 310,-1 ), m_choice_voiceChoices, 0 )
272 | 		self.m_choice_voice.SetSelection( 0 )
273 | 		ctrl_win.Add( self.m_choice_voice, 0, wx.ALL, 5 )
274 | 
275 | 		self.m_staticText_null4 = wx.StaticText( self, wx.ID_ANY, wx.EmptyString, wx.DefaultPosition, wx.Size( 100,-1 ), 0 )
276 | 		self.m_staticText_null4.Wrap( -1 )
277 | 
278 | 		ctrl_win.Add( self.m_staticText_null4, 0, wx.ALL, 5 )
279 | 
280 | 		self.m_staticText_ttwEngine = wx.StaticText( self, wx.ID_ANY, u"TTW Engine: ", wx.DefaultPosition, wx.Size( 70,-1 ), 0 )
281 | 		self.m_staticText_ttwEngine.Wrap( -1 )
282 | 
283 | 		self.m_staticText_ttwEngine.SetForegroundColour( wx.SystemSettings.GetColour( wx.SYS_COLOUR_APPWORKSPACE ) )
284 | 
285 | 		ctrl_win.Add( self.m_staticText_ttwEngine, 0, wx.ALL, 5 )
286 | 
287 | 		m_choice_ttwEngineChoices = [ u"eSpeak TTS", u"Festival SSS" ]
288 | 		self.m_choice_ttwEngine = wx.Choice( self, wx.ID_ANY, wx.DefaultPosition, wx.Size( 155,-1 ), m_choice_ttwEngineChoices, 0 )
289 | 		self.m_choice_ttwEngine.SetSelection( 0 )
290 | 		ctrl_win.Add( self.m_choice_ttwEngine, 0, wx.ALL, 5 )
291 | 
292 | 		self.m_staticText_gender = wx.StaticText( self, wx.ID_ANY, u"Gender:", wx.DefaultPosition, wx.Size( 40,-1 ), 0 )
293 | 		self.m_staticText_gender.Wrap( -1 )
294 | 
295 | 		self.m_staticText_gender.SetForegroundColour( wx.SystemSettings.GetColour( wx.SYS_COLOUR_APPWORKSPACE ) )
296 | 
297 | 		ctrl_win.Add( self.m_staticText_gender, 0, wx.ALL, 5 )
298 | 
299 | 		m_choice_genderChoices = [ u"Male", u"Female" ]
300 | 		self.m_choice_gender = wx.Choice( self, wx.ID_ANY, wx.DefaultPosition, wx.Size( 70,-1 ), m_choice_genderChoices, 0 )
301 | 		self.m_choice_gender.SetSelection( 0 )
302 | 		ctrl_win.Add( self.m_choice_gender, 0, wx.ALL, 5 )
303 | 
304 | 
305 | 		show_win.Add( ctrl_win, 1, wx.EXPAND, 5 )
306 | 
307 | 		conv_win = wx.WrapSizer( wx.HORIZONTAL, wx.WRAPSIZER_DEFAULT_FLAGS )
308 | 
309 | 		text_win = wx.WrapSizer( wx.HORIZONTAL, wx.WRAPSIZER_DEFAULT_FLAGS )
310 | 
311 | 		self.m_textCtrl_asrttsText = wx.TextCtrl( self, wx.ID_ANY, wx.EmptyString, wx.DefaultPosition, wx.Size( 480,120 ), wx.TE_MULTILINE )
312 | 		text_win.Add( self.m_textCtrl_asrttsText, 0, wx.ALL, 5 )
313 | 
314 | 
315 | 		conv_win.Add( text_win, 1, wx.EXPAND, 5 )
316 | 
317 | 		asrtts_win = wx.WrapSizer( wx.HORIZONTAL, wx.WRAPSIZER_DEFAULT_FLAGS )
318 | 
319 | 		self.m_button_asr = wx.Button( self, wx.ID_ANY, u"ASR", wx.DefaultPosition, wx.Size( 40,-1 ), 0 )
320 | 		asrtts_win.Add( self.m_button_asr, 0, wx.ALL, 5 )
321 | 
322 | 		self.m_textCtrl_asrFileName = wx.TextCtrl( self, wx.ID_ANY, u"asr_untitled1.txt", wx.DefaultPosition, wx.Size( 180,-1 ), 0 )
323 | 		asrtts_win.Add( self.m_textCtrl_asrFileName, 0, wx.ALL, 5 )
324 | 
325 | 		self.m_button_tts = wx.Button( self, wx.ID_ANY, u"TTS", wx.DefaultPosition, wx.Size( 40,-1 ), 0 )
326 | 		asrtts_win.Add( self.m_button_tts, 0, wx.ALL, 5 )
327 | 
328 | 		self.m_textCtrl_ttsFileName = wx.TextCtrl( self, wx.ID_ANY, u"tts_untitled1.wav", wx.DefaultPosition, wx.Size( 180,-1 ), 0 )
329 | 		asrtts_win.Add( self.m_textCtrl_ttsFileName, 0, wx.ALL, 5 )
330 | 
331 | 
332 | 		conv_win.Add( asrtts_win, 1, wx.EXPAND, 5 )
333 | 
334 | 
335 | 		show_win.Add( conv_win, 1, wx.EXPAND, 5 )
336 | 
337 | 
338 | 		win_sizer.Add( show_win, 1, wx.EXPAND, 5 )
339 | 
340 | 
341 | 		self.SetSizer( win_sizer )
342 | 		self.Layout()
343 | 		self.statusBar = self.CreateStatusBar( 1, wx.STB_SIZEGRIP, wx.ID_ANY )
344 | 
345 | 		self.Centre( wx.BOTH )
346 | 
347 | 		# Connect Events
348 | 		self.Bind( wx.EVT_MENU, self.showHomepageInfo, id = self.m_menuItem_homePage.GetId() )
349 | 		self.Bind( wx.EVT_MENU, self.showAboutInfo, id = self.m_menuItem_about.GetId() )
350 | 		self.m_genericDirCtrl_audioDir.Bind( wx.EVT_TREE_SEL_CHANGED, self.viewAudio )
351 | 		self.m_button_play.Bind( wx.EVT_BUTTON, self.playAudio )
352 | 		self.m_button_record.Bind( wx.EVT_BUTTON, self.recordAudio )
353 | 		self.m_button_asrttsTextClear.Bind( wx.EVT_BUTTON, self.clearAsrTtsText )
354 | 		self.m_choice_voice.Bind( wx.EVT_ENTER_WINDOW, self.refreshVoice )
355 | 		self.m_button_asr.Bind( wx.EVT_BUTTON, self.audioSpeechRecognition )
356 | 		self.m_button_tts.Bind( wx.EVT_BUTTON, self.textToSpeech )
357 | 
358 | 	def __del__( self ):
359 | 		pass
360 | 
361 | 
362 | 	# Virtual event handlers, overide them in your derived class
363 | 	def showHomepageInfo( self, event ):
364 | 		event.Skip()
365 | 
366 | 	def showAboutInfo( self, event ):
367 | 		event.Skip()
368 | 
369 | 	def viewAudio( self, event ):
370 | 		event.Skip()
371 | 
372 | 	def playAudio( self, event ):
373 | 		event.Skip()
374 | 
375 | 	def recordAudio( self, event ):
376 | 		event.Skip()
377 | 
378 | 	def clearAsrTtsText( self, event ):
379 | 		event.Skip()
380 | 
381 | 	def refreshVoice( self, event ):
382 | 		event.Skip()
383 | 
384 | 	def audioSpeechRecognition( self, event ):
385 | 		event.Skip()
386 | 
387 | 	def textToSpeech( self, event ):
388 | 		event.Skip()
389 | 
390 | 
391 | 


--------------------------------------------------------------------------------