├── .gitignore ├── LICENSE.txt ├── README-en.md ├── README.md ├── bin └── _temp.txt ├── conv ├── asr │ └── audio-to-text files.txt ├── rec │ ├── recorded wav files.txt │ ├── winxp_shutdown.wav │ └── winxp_startup.wav └── tts │ └── text-to-speech files.txt ├── env ├── do_pack_by_pyinstaller.bat ├── do_setup_by_pip.bat └── pyinstaller_pack_fw.spec ├── gui └── pzh-speech.fbp ├── img ├── ico logo source.txt ├── pzh-speech.ico └── pzh-speech.png └── src ├── main.py └── win.py /.gitignore: -------------------------------------------------------------------------------- 1 | # 2 | # NOTE! Don't add files that are generated in specific 3 | # subdirectories here. Add them in the ".gitignore" file 4 | # in that subdirectory instead. 5 | # 6 | # NOTE! Please use 'git-ls-files -i --exclude-standard' 7 | # command after changing this file, to see if there are 8 | # any tracked files which get ignored after the change. 9 | 10 | # Python bytecode 11 | *.pyc 12 | __pycache__ 13 | 14 | /.idea 15 | 16 | 17 | 18 | 19 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | Copyright (c) 2017-2018, Jay Heng 2 | All rights reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without 5 | modification, are permitted provided that the following conditions are met: 6 | 7 | * Redistributions of source code must retain the above copyright notice, this 8 | list of conditions and the following disclaimer. 9 | 10 | * Redistributions in binary form must reproduce the above copyright notice, 11 | this list of conditions and the following disclaimer in the documentation 12 | and/or other materials provided with the distribution. 13 | 14 | * Neither the name of elapsedtimer nor the names of its 15 | contributors may be used to endorse or promote products derived from 16 | this software without specific prior written permission. 17 | 18 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 19 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 21 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 22 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 24 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 25 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 26 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -------------------------------------------------------------------------------- /README-en.md: -------------------------------------------------------------------------------- 1 | # pzh-speech 2 | A tiny audio speech (.wav) utility tool (GUI) based on Python2.7+wxPython4.0+PyAudio+Matplotlib+SpeechRecognition(PocketSphinx)+pyttsx3(eSpeak) | 一款支持多引擎的wav格式语音处理小工具(音频录播与波形显示,语音识别,文语合成) 3 | 4 | [![GitHub release](https://img.shields.io/github/release/JayHeng/pzh-py-speech.svg)](https://github.com/JayHeng/pzh-py-speech/releases/latest) [![GitHub commits](https://img.shields.io/github/commits-since/JayHeng/pzh-py-speech/v1.0.0.svg)](https://github.com/JayHeng/pzh-py-speech/compare/v1.0.0...master) [![GitHub license](https://img.shields.io/github/license/JayHeng/pzh-py-speech.svg)](https://github.com/JayHeng/pzh-py-speech/blob/master/LICENSE.txt) 5 | 6 | English | [中文](./README.md) 7 | 8 | 9 | 10 | ### How to build : 11 | ******************** 12 |   First of all, you should install all Non-Python packages listed in [《痞子衡语音处理助手-开发环境搭建》](https://www.cnblogs.com/henjay724/p/9542690.html), then follow below steps: 13 | ```text 14 | 1. Install Python2.7.15 x86 version 15 | 2. Confirm that the directory "\Python27\" and "\Python27\Scripts\" are in the system environment variable path after the installation is completed 16 | 3. Double click "\pzh-py-speech\env\do_setup_by_pip.bat" to install the Python library on which pzh-speech depends 17 | 4. Double click "\pzh-py-speech\env\do_pack_by_pyinstaller.bat" to regenerate the pzh-speech.exe 18 | 5. Open "\pzh-py-speech\bin\pzh-speech.exe" to use it 19 | ``` 20 | 21 | ### Tool Features : 22 | ******************** 23 | * View the waveform of selected .wav file 24 | * Record sound from microphone to .wav file (\conv\rec) 25 | * Play selected .wav file 26 | * ASR: Recognize selected .wav file to text (\conv\asr) 27 | * TTS: Translate input text to speech 28 | * TTW: Translate input text to .wav file (\conv\tts) 29 | * Both English and Chinese are supported in ASR,TTS,TTW 30 | * Design detail: [《痞子衡语音处理助手诞生记(全七篇)》](https://www.cnblogs.com/henjay724/p/9541867.html) 31 | 32 | Known issues: 33 | * The audio data are all 0x00s if BitDepth is set as 8bits when recording sound (it seems to be PyAudio issue) 34 | * Sometimes Application will hang up if language is set as Chinese when using TTS (it may be MSSpeech_TTS_xxx language package installation issue) 35 | 36 | ### License : 37 | ******************** 38 |   This package is licensed under the BSD three-clause license. See the LICENSE.txt file for details. 39 | 40 | Copyright © 2017-2018 Jay Heng. -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # 痞子衡语音处理助手 2 | A tiny audio speech (.wav) utility tool (GUI) based on Python2.7+wxPython4.0+PyAudio+Matplotlib+SpeechRecognition(PocketSphinx)+pyttsx3(eSpeak) | 一款支持多引擎的wav格式语音处理小工具(音频录播与波形显示,语音识别,文语合成) 3 | 4 | [![GitHub release](https://img.shields.io/github/release/JayHeng/pzh-py-speech.svg)](https://github.com/JayHeng/pzh-py-speech/releases/latest) [![GitHub commits](https://img.shields.io/github/commits-since/JayHeng/pzh-py-speech/v1.0.0.svg)](https://github.com/JayHeng/pzh-py-speech/compare/v1.0.0...master) [![GitHub license](https://img.shields.io/github/license/JayHeng/pzh-py-speech.svg)](https://github.com/JayHeng/pzh-py-speech/blob/master/LICENSE.txt) 5 | 6 | [English](./README-en.md) | 中文 7 | 8 | 9 | 10 | ### 1. 二次开发与重编 : 11 | ******************** 12 |   参考这篇文章 [《痞子衡语音处理助手-开发环境搭建》](https://www.cnblogs.com/henjay724/p/9542690.html) 安装所有非Python相关的开发工具, 然后按照如下步骤继续安装Python环境: 13 | ```text 14 | 1. 安装Python2.7.15 x86 version 15 | 2. 确认系统路径包含"\Python27\" 和 "\Python27\Scripts\" 16 | 3. 双击"\pzh-py-speech\env\do_setup_by_pip.bat"脚本安装所有依赖的第三方Python库 17 | 4. 双击"\pzh-py-speech\env\do_pack_by_pyinstaller.bat"脚本重新生成pzh-speech.exe 18 | 5. 双击"\pzh-py-speech\bin\pzh-speech.exe"运行 19 | ``` 20 | 21 | ### 2. 软件功能 : 22 | ******************** 23 | * 支持查看所选的.wav文件波形图 24 | * 支持从麦克风录制声音进.wav文件 (\conv\rec) 25 | * 支持播放所选的.wav文件 26 | * ASR: 支持识别.wav文件里的内容并保存到文本文件 (\conv\asr) 27 | * TTS: 支持将输入的文本内容转换成语音播放 28 | * TTW: 支持将输入的文本内容转换成.wav文件 (\conv\tts) 29 | * 支持两种语言(中英)的上述ASR,TTS,TTW处理 30 | * 软件设计细节详见: [《痞子衡语音处理助手诞生记(全七篇)》](https://www.cnblogs.com/henjay724/p/9541867.html) 31 | 32 | 已知问题: 33 | * 在录制声音时,如果BitDepth设为8bits,录制的音频数据全是0x00 (应该是PyAudio库的问题) 34 | * 在处理TTS时,如果语言设置为中文,有时候软件会停止执行 (可能是MSSpeech_TTS_xxx language包的安装问题) 35 | 36 | ### 3. License : 37 | ******************** 38 |   软件采用BSD three-clause license, 更多许可证细节详见LICENSE.txt。 39 | 40 | Copyright © 2017-2018 Jay Heng. -------------------------------------------------------------------------------- /bin/_temp.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JayHeng/pzh-py-speech/5eefe3f6720e12af4e3bcdc3144063a3e6c1331d/bin/_temp.txt -------------------------------------------------------------------------------- /conv/asr/audio-to-text files.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JayHeng/pzh-py-speech/5eefe3f6720e12af4e3bcdc3144063a3e6c1331d/conv/asr/audio-to-text files.txt -------------------------------------------------------------------------------- /conv/rec/recorded wav files.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JayHeng/pzh-py-speech/5eefe3f6720e12af4e3bcdc3144063a3e6c1331d/conv/rec/recorded wav files.txt -------------------------------------------------------------------------------- /conv/rec/winxp_shutdown.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JayHeng/pzh-py-speech/5eefe3f6720e12af4e3bcdc3144063a3e6c1331d/conv/rec/winxp_shutdown.wav -------------------------------------------------------------------------------- /conv/rec/winxp_startup.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JayHeng/pzh-py-speech/5eefe3f6720e12af4e3bcdc3144063a3e6c1331d/conv/rec/winxp_startup.wav -------------------------------------------------------------------------------- /conv/tts/text-to-speech files.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JayHeng/pzh-py-speech/5eefe3f6720e12af4e3bcdc3144063a3e6c1331d/conv/tts/text-to-speech files.txt -------------------------------------------------------------------------------- /env/do_pack_by_pyinstaller.bat: -------------------------------------------------------------------------------- 1 | pyinstaller.exe pyinstaller_pack_fw.spec 2 | copy .\dist\pzh-speech.exe ..\bin 3 | rd /q /s .\build 4 | rd /q /s .\dist -------------------------------------------------------------------------------- /env/do_setup_by_pip.bat: -------------------------------------------------------------------------------- 1 | pip.exe install wxPython==4.0.3 2 | pip.exe install PyAudio==0.2.11 3 | pip.exe install matplotlib==2.2.3 4 | pip.exe install numpy==1.15.0 5 | pip.exe install SpeechRecognition==3.8.1 6 | pip.exe install pocketsphinx==0.1.15 7 | pip.exe install pyttsx3==2.7 8 | 9 | pip.exe install PyInstaller -------------------------------------------------------------------------------- /env/pyinstaller_pack_fw.spec: -------------------------------------------------------------------------------- 1 | # -*- mode: python -*- 2 | 3 | block_cipher = None 4 | 5 | 6 | a = Analysis(['..\\src\\main.py', 7 | '..\\src\\win.py'], 8 | binaries=[], 9 | datas=[], 10 | hiddenimports=[], 11 | hookspath=[], 12 | runtime_hooks=[], 13 | excludes=[], 14 | win_no_prefer_redirects=False, 15 | win_private_assemblies=False, 16 | cipher=block_cipher) 17 | pyz = PYZ(a.pure, a.zipped_data, 18 | cipher=block_cipher) 19 | exe = EXE(pyz, 20 | a.scripts, 21 | a.binaries, 22 | a.zipfiles, 23 | a.datas, 24 | name='pzh-speech', 25 | debug=False, 26 | strip=False, 27 | upx=True, 28 | runtime_tmpdir=None, 29 | console=False , icon='..\\img\\pzh-speech.ico') 30 | -------------------------------------------------------------------------------- /img/ico logo source.txt: -------------------------------------------------------------------------------- 1 | Get logo from https://cmusphinx.github.io/ -------------------------------------------------------------------------------- /img/pzh-speech.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JayHeng/pzh-py-speech/5eefe3f6720e12af4e3bcdc3144063a3e6c1331d/img/pzh-speech.ico -------------------------------------------------------------------------------- /img/pzh-speech.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JayHeng/pzh-py-speech/5eefe3f6720e12af4e3bcdc3144063a3e6c1331d/img/pzh-speech.png -------------------------------------------------------------------------------- /src/main.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -* 2 | import wx 3 | import sys, os 4 | reload(sys) 5 | sys.setdefaultencoding('utf-8') 6 | import subprocess 7 | import win 8 | import numpy 9 | import matplotlib 10 | from matplotlib.backends.backend_wxagg import FigureCanvasWxAgg as FigureCanvas 11 | from matplotlib.figure import Figure 12 | import matplotlib.tri as Tri 13 | 14 | import wave 15 | import pyaudio 16 | import speech_recognition 17 | import pyttsx3 18 | 19 | MAX_AUDIO_CHANNEL = 8 20 | #unit: inch 21 | PLOT_PANEL_WIDTH = 720 22 | PLOT_PANEL_HEIGHT = 360 23 | #unit: percent 24 | PLOT_AXES_WIDTH_TITLE = 0.05 25 | PLOT_AXES_HEIGHT_LABEL = 0.075 26 | AUDIO_CHUNK_SIZE = 1024 27 | 28 | AUDIO_PLAY_STATE_START = 0 29 | AUDIO_PLAY_STATE_PLAY = 1 30 | AUDIO_PLAY_STATE_PAUSE = 2 31 | AUDIO_PLAY_STATE_RESUME = 3 32 | AUDIO_PLAY_STATE_END = 4 33 | 34 | class wavCursor(object): 35 | def __init__(self, ax, x, y): 36 | self.ax = ax 37 | self.vline = ax.axvline(color='blue', alpha=1) 38 | self.hline = ax.axhline(color='blue', alpha=1) 39 | self.marker, = ax.plot([0],[0], marker="o", color="crimson", zorder=3) 40 | self.x = x 41 | self.y = y 42 | self.xlim = self.x[len(self.x)-1] 43 | self.text = ax.text(0.7, 0.9, '', bbox=dict(facecolor='yellow', alpha=0.5)) 44 | 45 | def moveMouse(self, event): 46 | if not event.inaxes: 47 | return 48 | x, y = event.xdata, event.ydata 49 | if x > self.xlim: 50 | x = self.xlim 51 | index = numpy.searchsorted(self.x, [x])[0] 52 | x = self.x[index] 53 | y = self.y[index] 54 | self.vline.set_xdata(x) 55 | self.hline.set_ydata(y) 56 | self.marker.set_data([x],[y]) 57 | self.text.set_text('x=%1.2f, y=%1.2f' % (x, y)) 58 | self.text.set_position((x,y)) 59 | self.ax.figure.canvas.draw_idle() 60 | 61 | class wavCanvasPanel(wx.Panel): 62 | 63 | def __init__(self, parent): 64 | wx.Panel.__init__(self, parent) 65 | dpi = 60 66 | width = PLOT_PANEL_WIDTH / dpi 67 | height = PLOT_PANEL_HEIGHT / dpi 68 | self.wavFigure = Figure(figsize=[width,height], dpi=dpi, facecolor='#404040') 69 | self.wavCanvas = FigureCanvas(self, -1, self.wavFigure) 70 | self.wavSizer = wx.BoxSizer(wx.VERTICAL) 71 | self.wavSizer.Add(self.wavCanvas, 1, wx.EXPAND|wx.ALL) 72 | self.SetSizerAndFit(self.wavSizer) 73 | self.wavAxes = [None] * MAX_AUDIO_CHANNEL 74 | self.wavCursor = [None] * MAX_AUDIO_CHANNEL 75 | 76 | def fromstring(self, wavData, alignedByte): 77 | if alignedByte <= 8: 78 | src = numpy.ndarray(len(wavData), numpy.dtype('>i1'), wavData) 79 | dest = numpy.zeros(len(wavData) / alignedByte, numpy.dtype('>i8')) 80 | for i in range(alignedByte): 81 | dest.view(dtype='>i1')[alignedByte-1-i::8] = src.view(dtype='>i1')[i::alignedByte] 82 | [hex(x) for x in dest] 83 | return True, dest 84 | else: 85 | return False, wavData 86 | 87 | def readWave(self, wavPath, wavInfo): 88 | if os.path.isfile(wavPath): 89 | # Open the wav file to get wave data and parameters 90 | wavFile = wave.open(wavPath, "rb") 91 | wavParams = wavFile.getparams() 92 | wavChannels = wavParams[0] 93 | wavSampwidth = wavParams[1] 94 | wavFramerate = wavParams[2] 95 | wavFrames = wavParams[3] 96 | wavInfo.SetStatusText('Opened Audio Info = ' + 97 | 'Channels:' + str(wavChannels) + 98 | ', SampWidth:' + str(wavSampwidth) + 'Byte' + 99 | ', SampRate:' + str(wavFramerate) + 'kHz' + 100 | ', FormatTag:' + wavParams[4]) 101 | wavData = wavFile.readframes(wavFrames) 102 | wavFile.close() 103 | # Transpose the wav data if wave has multiple channels 104 | if wavSampwidth == 1: 105 | dtype = numpy.int8 106 | elif wavSampwidth == 2: 107 | dtype = numpy.int16 108 | elif wavSampwidth == 3: 109 | dtype = None 110 | elif wavSampwidth == 4: 111 | dtype = numpy.float32 112 | else: 113 | return 0, 0, 0 114 | if dtype != None: 115 | retData = numpy.fromstring(wavData, dtype = dtype) 116 | else: 117 | # Implement int24 manually 118 | status, retData = self.fromstring(wavData, 3) 119 | if not status: 120 | return 0, 0, 0 121 | if wavChannels != 1: 122 | retData.shape = -1, wavChannels 123 | retData = retData.T 124 | # Calculate and arange wave time 125 | retTime = numpy.arange(0, wavFrames) * (1.0 / wavFramerate) 126 | retChannels = wavChannels 127 | return retChannels, retData, retTime 128 | else: 129 | return 0, 0, 0 130 | 131 | def showWave(self, wavPath, wavDomain, wavInfo): 132 | self.wavFigure.clear() 133 | waveChannels, waveData, waveTime = self.readWave(wavPath, wavInfo) 134 | if waveChannels != 0: 135 | # Note: only show max supported channel if actual channel > max supported channel 136 | if waveChannels > MAX_AUDIO_CHANNEL: 137 | waveChannels = MAX_AUDIO_CHANNEL 138 | # Polt the waveform of each channel in sequence 139 | for i in range(waveChannels): 140 | left = PLOT_AXES_HEIGHT_LABEL 141 | bottom = (1.0 / waveChannels) * (waveChannels - 1 - i) + PLOT_AXES_HEIGHT_LABEL 142 | height = 1.0 / waveChannels - (PLOT_AXES_WIDTH_TITLE + PLOT_AXES_HEIGHT_LABEL) 143 | width = 1 - left - 0.05 144 | self.wavAxes[i] = self.wavFigure.add_axes([left, bottom, width, height], facecolor='k') 145 | if waveChannels == 1: 146 | waveAmplitude = waveData 147 | else: 148 | waveAmplitude = waveData[i] 149 | if wavDomain == 'Time': 150 | self.wavAxes[i].set_prop_cycle(color='#00F279', lw=[1]) 151 | self.wavAxes[i].set_xlabel('time (s)', color='w') 152 | self.wavAxes[i].plot(waveTime, waveAmplitude) 153 | self.wavCursor[i] = wavCursor(self.wavAxes[i], waveTime, waveAmplitude) 154 | elif wavDomain == 'Frequency': 155 | self.wavAxes[i].set_prop_cycle(color='red', lw=[1]) 156 | self.wavAxes[i].set_xlabel('Frequency (Hz)', color='w') 157 | waveMagnitude = numpy.absolute(numpy.fft.rfft(waveAmplitude)) 158 | waveFreq = numpy.fft.rfftfreq(len(waveTime), numpy.diff(waveTime)[0]) 159 | self.wavAxes[i].plot(waveFreq, waveMagnitude) 160 | self.wavCursor[i] = wavCursor(self.wavAxes[i], waveFreq, waveMagnitude) 161 | self.wavAxes[i].set_ylabel('value', color='w') 162 | self.wavAxes[i].grid() 163 | self.wavAxes[i].tick_params(labelcolor='w') 164 | self.wavAxes[i].set_title('Audio Channel ' + str(i), color='w') 165 | self.wavCanvas.mpl_connect('motion_notify_event', self.wavCursor[i].moveMouse) 166 | # Note!!!: draw() must be called if figure has been cleared once 167 | self.wavCanvas.draw() 168 | 169 | def showWelcome (self): 170 | self.wavFigure.clear() 171 | # Get Code from below link 172 | # https://matplotlib.org/gallery/images_contours_and_fields/tripcolor_demo.html#sphx-glr-gallery-images-contours-and-fields-tripcolor-demo-py 173 | n_angles = 36 174 | n_radii = 8 175 | min_radius = 0.25 176 | radii = numpy.linspace(min_radius, 0.95, n_radii) 177 | angles = numpy.linspace(0, 2 * numpy.pi, n_angles, endpoint=False) 178 | angles = numpy.repeat(angles[..., numpy.newaxis], n_radii, axis=1) 179 | angles[:, 1::2] += numpy.pi / n_angles 180 | x = (radii * numpy.cos(angles)).flatten() 181 | y = (radii * numpy.sin(angles)).flatten() 182 | z = (numpy.cos(radii) * numpy.cos(3 * angles)).flatten() 183 | triang = Tri.Triangulation(x, y) 184 | triang.set_mask(numpy.hypot(x[triang.triangles].mean(axis=1), 185 | y[triang.triangles].mean(axis=1)) 186 | < min_radius) 187 | welcomeAxes = self.wavFigure.add_axes([0.13,0.2,0.7,0.7], facecolor='#404040') 188 | #welcomeAxes.set_aspect('equal') 189 | welcomeAxes.tripcolor(triang, z, shading='flat') 190 | # Set some properties 191 | welcomeAxes.set_title('Welcome to use pzh-speech', color='w') 192 | welcomeAxes.set_xticks([]) 193 | welcomeAxes.set_yticks([]) 194 | welcomeAxes.spines['top'].set_visible(False) 195 | welcomeAxes.spines['right'].set_visible(False) 196 | welcomeAxes.spines['bottom'].set_visible(False) 197 | welcomeAxes.spines['left'].set_visible(False) 198 | self.wavCanvas.draw() 199 | 200 | class mainWin(win.speech_win): 201 | 202 | def __init__(self, parent): 203 | win.speech_win.__init__(self, parent) 204 | icon = wx.Icon() 205 | icon.CopyFromBitmap(wx.Bitmap( u"../img/pzh-speech.ico", wx.BITMAP_TYPE_ANY)) 206 | self.SetIcon(icon) 207 | self.wavPanel = wavCanvasPanel(self.m_panel_plot) 208 | self.m_genericDirCtrl_audioDir.SetFilter("Audio files (*.wav)|*.wav") 209 | self.isRecording = False 210 | # Start -> Play -> Pause -> Resume -> End 211 | self.playState = AUDIO_PLAY_STATE_START 212 | self.statusBar.SetFieldsCount(1) 213 | self.wavPanel.showWelcome() 214 | self.ttsObj = None 215 | 216 | def viewAudio( self, event ): 217 | self.wavPath = self.m_genericDirCtrl_audioDir.GetFilePath() 218 | wavDomain = self.m_choice_domain.GetString(self.m_choice_domain.GetSelection()) 219 | self.wavPanel.showWave(self.wavPath, wavDomain, self.statusBar) 220 | if self.playState != AUDIO_PLAY_STATE_START: 221 | self.playState = AUDIO_PLAY_STATE_END 222 | self.m_button_play.SetLabel('Play Start') 223 | 224 | def recordAudioCallback(self, in_data, frame_count, time_info, status): 225 | if not self.isRecording: 226 | status = pyaudio.paComplete 227 | else: 228 | self.wavFrames.append(in_data) 229 | status = pyaudio.paContinue 230 | return (in_data, status) 231 | 232 | def recordAudio( self, event ): 233 | if not self.isRecording: 234 | self.isRecording = True 235 | self.m_button_record.SetLabel('Record Stop') 236 | # Get the wave parameter from user settings 237 | fileName = self.m_textCtrl_recFileName.GetLineText(0) 238 | if fileName == '': 239 | fileName = 'rec_untitled1.wav' 240 | self.wavPath = os.path.join(os.path.dirname(os.path.abspath(os.path.dirname(__file__))), 'conv', 'rec', fileName) 241 | self.wavSampRate = int(self.m_choice_sampRate.GetString(self.m_choice_sampRate.GetSelection())) 242 | channels = self.m_choice_channels.GetString(self.m_choice_channels.GetSelection()) 243 | if channels == 'Mono': 244 | self.wavChannels = 1 245 | else: #if channels == 'Stereo': 246 | self.wavChannels = 2 247 | bitDepth = int(self.m_choice_bitDepth.GetString(self.m_choice_bitDepth.GetSelection())) 248 | if bitDepth == 8: 249 | self.wavBitFormat = pyaudio.paInt8 250 | elif bitDepth == 24: 251 | self.wavBitFormat = pyaudio.paInt24 252 | elif bitDepth == 32: 253 | self.wavBitFormat = pyaudio.paFloat32 254 | else: 255 | self.wavBitFormat = pyaudio.paInt16 256 | # Record audio according to wave parameters 257 | self.wavFrames = [] 258 | self.wavPyaudio = pyaudio.PyAudio() 259 | self.wavStream = self.wavPyaudio.open(format=self.wavBitFormat, 260 | channels=self.wavChannels, 261 | rate=self.wavSampRate, 262 | input=True, 263 | frames_per_buffer=AUDIO_CHUNK_SIZE, 264 | stream_callback=self.recordAudioCallback) 265 | self.wavStream.start_stream() 266 | else: 267 | self.isRecording = False 268 | self.m_button_record.SetLabel('Record Start') 269 | self.wavStream.stop_stream() 270 | self.wavStream.close() 271 | self.wavPyaudio.terminate() 272 | # Save the wave data into file 273 | wavFile = wave.open(self.wavPath, 'wb') 274 | wavFile.setnchannels(self.wavChannels) 275 | wavFile.setsampwidth(self.wavPyaudio.get_sample_size(self.wavBitFormat)) 276 | wavFile.setframerate(self.wavSampRate) 277 | wavFile.writeframes(b''.join(self.wavFrames)) 278 | wavFile.close() 279 | 280 | def playAudioCallback(self, in_data, frame_count, time_info, status): 281 | if self.playState == AUDIO_PLAY_STATE_PLAY or self.playState == AUDIO_PLAY_STATE_RESUME: 282 | data = self.wavFile.readframes(frame_count) 283 | if self.wavFile.getnframes() == self.wavFile.tell(): 284 | status = pyaudio.paComplete 285 | self.playState = AUDIO_PLAY_STATE_END 286 | self.m_button_play.SetLabel('Play Start') 287 | else: 288 | status = pyaudio.paContinue 289 | return (data, status) 290 | else: 291 | # Note!!!: 292 | data = numpy.zeros(frame_count*self.wavFile.getnchannels()).tostring() 293 | return (data, pyaudio.paContinue) 294 | 295 | def playAudio( self, event ): 296 | if os.path.isfile(self.wavPath): 297 | if self.playState == AUDIO_PLAY_STATE_END: 298 | self.playState = AUDIO_PLAY_STATE_START 299 | self.wavStream.stop_stream() 300 | self.wavStream.close() 301 | self.wavPyaudio.terminate() 302 | self.wavFile.close() 303 | if self.playState == AUDIO_PLAY_STATE_START: 304 | self.playState = AUDIO_PLAY_STATE_PLAY 305 | self.m_button_play.SetLabel('Play Pause') 306 | self.wavFile = wave.open(self.wavPath, "rb") 307 | self.wavPyaudio = pyaudio.PyAudio() 308 | self.wavStream = self.wavPyaudio.open(format=self.wavPyaudio.get_format_from_width(self.wavFile.getsampwidth()), 309 | channels=self.wavFile.getnchannels(), 310 | rate=self.wavFile.getframerate(), 311 | output=True, 312 | stream_callback=self.playAudioCallback) 313 | self.wavStream.start_stream() 314 | elif self.playState == AUDIO_PLAY_STATE_PLAY or self.playState == AUDIO_PLAY_STATE_RESUME: 315 | self.playState = AUDIO_PLAY_STATE_PAUSE 316 | self.m_button_play.SetLabel('Play Resume') 317 | elif self.playState == AUDIO_PLAY_STATE_PAUSE: 318 | self.playState = AUDIO_PLAY_STATE_RESUME 319 | self.m_button_play.SetLabel('Play Pause') 320 | else: 321 | pass 322 | 323 | def getLanguageSelection(self): 324 | languageType = self.m_choice_lang.GetString(self.m_choice_lang.GetSelection()) 325 | if languageType == 'Mandarin Chinese': 326 | languageType = 'zh-CN' 327 | languageName = 'Chinese' 328 | else: # languageType == 'US English': 329 | languageType = 'en-US' 330 | languageName = 'English' 331 | return languageType, languageName 332 | 333 | def audioSpeechRecognition( self, event ): 334 | if os.path.isfile(self.wavPath): 335 | asrObj = speech_recognition.Recognizer() 336 | with speech_recognition.AudioFile(self.wavPath) as source: 337 | # Read the entire audio file 338 | speechAudio = asrObj.record(source) 339 | self.m_textCtrl_asrttsText.Clear() 340 | # Get language type 341 | languageType, languageName = self.getLanguageSelection() 342 | engineType = self.m_choice_asrEngine.GetString(self.m_choice_asrEngine.GetSelection()) 343 | if engineType == 'CMU Sphinx': 344 | # Recognize speech using Sphinx 345 | try: 346 | speechText = asrObj.recognize_sphinx(speechAudio, language=languageType) 347 | self.m_textCtrl_asrttsText.write(speechText) 348 | self.statusBar.SetStatusText("ASR Conversation Info: Successfully") 349 | fileName = self.m_textCtrl_asrFileName.GetLineText(0) 350 | if fileName == '': 351 | fileName = 'asr_untitled1.txt' 352 | asrFilePath = os.path.join(os.path.dirname(os.path.abspath(os.path.dirname(__file__))), 'conv', 'asr', fileName) 353 | asrFileObj = open(asrFilePath, 'wb') 354 | asrFileObj.write(speechText) 355 | asrFileObj.close() 356 | except speech_recognition.UnknownValueError: 357 | self.statusBar.SetStatusText("ASR Conversation Info: Sphinx could not understand audio") 358 | except speech_recognition.RequestError as e: 359 | self.statusBar.SetStatusText("ASR Conversation Info: Sphinx error; {0}".format(e)) 360 | else: 361 | self.statusBar.SetStatusText("ASR Conversation Info: Unavailable ASR Engine") 362 | 363 | def refreshVoice( self, event ): 364 | languageType, languageName = self.getLanguageSelection() 365 | engineType = self.m_choice_ttsEngine.GetString(self.m_choice_ttsEngine.GetSelection()) 366 | if engineType == 'pyttsx3 - SAPI5': 367 | if self.ttsObj == None: 368 | self.ttsObj = pyttsx3.init() 369 | voices = self.ttsObj.getProperty('voices') 370 | voiceItems = [None] * len(voices) 371 | itemIndex = 0 372 | for voice in voices: 373 | voiceId = voice.id.lower() 374 | voiceName = voice.name.lower() 375 | if (voiceId.find(languageType.lower()) != -1) or (voiceName.find(languageName.lower()) != -1): 376 | voiceItems[itemIndex] = voice.name 377 | itemIndex += 1 378 | voiceItems = voiceItems[0:itemIndex] 379 | self.m_choice_voice.Clear() 380 | self.m_choice_voice.SetItems(voiceItems) 381 | else: 382 | voiceItem = ['N/A'] 383 | self.m_choice_voice.Clear() 384 | self.m_choice_voice.SetItems(voiceItem) 385 | 386 | def textToWav(self, text, language): 387 | fileName = self.m_textCtrl_ttsFileName.GetLineText(0) 388 | if fileName == '': 389 | fileName = 'tts_untitled1.wav' 390 | ttsFilePath = os.path.join(os.path.dirname(os.path.abspath(os.path.dirname(__file__))), 'conv', 'tts', fileName) 391 | ttwEngineType = self.m_choice_ttwEngine.GetString(self.m_choice_ttwEngine.GetSelection()) 392 | if ttwEngineType == 'eSpeak TTS': 393 | ttsTextFile = os.path.join(os.path.abspath(os.path.dirname(__file__)), 'ttsTextTemp.txt') 394 | ttsTextFileObj = open(ttsTextFile, 'wb') 395 | ttsTextFileObj.write(text) 396 | ttsTextFileObj.close() 397 | try: 398 | #espeak_path = "C:/tools_mcu/eSpeak/command_line/espeak.exe" 399 | #subprocess.call([espeak_path, "-v"+languageType[0:2], text]) 400 | gender = self.m_choice_gender.GetString(self.m_choice_gender.GetSelection()) 401 | gender = gender.lower()[0] + '3' 402 | subprocess.call(["espeak", "-v"+language[0:2]+'+'+gender, "-f"+ttsTextFile, "-w"+ttsFilePath]) 403 | except: 404 | self.statusBar.SetStatusText("TTW Conversation Info: eSpeak is not installed or its path is not added into system environment") 405 | os.remove(ttsTextFile) 406 | else: 407 | self.statusBar.SetStatusText("TTW Conversation Info: Unavailable TTW Engine") 408 | 409 | def textToSpeech( self, event ): 410 | languageType, languageName = self.getLanguageSelection() 411 | # Get text from m_textCtrl_asrttsText 412 | lines = self.m_textCtrl_asrttsText.GetNumberOfLines() 413 | if lines != 0: 414 | data = '' 415 | for i in range(0, lines): 416 | data += self.m_textCtrl_asrttsText.GetLineText(i) 417 | else: 418 | return 419 | ttsEngineType = self.m_choice_ttsEngine.GetString(self.m_choice_ttsEngine.GetSelection()) 420 | if ttsEngineType == 'pyttsx3 - SAPI5': 421 | if self.ttsObj == None: 422 | self.ttsObj = pyttsx3.init() 423 | hasVoice = False 424 | voices = self.ttsObj.getProperty('voices') 425 | voiceSel = self.m_choice_voice.GetString(self.m_choice_voice.GetSelection()) 426 | for voice in voices: 427 | #print ('id = {} \nname = {} \nlanguages = {} \n'.format(voice.id, voice.name, voice.languages)) 428 | voiceId = voice.id.lower() 429 | voiceName = voice.name.lower() 430 | if (voiceId.find(languageType.lower()) != -1) or (voiceName.find(languageName.lower()) != -1): 431 | if (voiceSel == '') or (voiceSel == voice.name): 432 | hasVoice = True 433 | break 434 | if hasVoice: 435 | self.ttsObj.setProperty('voice', voice.id) 436 | self.ttsObj.say(data) 437 | self.statusBar.SetStatusText("TTS Conversation Info: Run and Wait") 438 | self.ttsObj.runAndWait() 439 | self.statusBar.SetStatusText("TTS Conversation Info: Successfully") 440 | else: 441 | self.statusBar.SetStatusText("TTS Conversation Info: Language is not supported by current PC") 442 | self.textToWav(data, languageType) 443 | else: 444 | self.statusBar.SetStatusText("TTS Conversation Info: Unavailable TTS Engine") 445 | 446 | def clearAsrTtsText( self, event ): 447 | self.m_textCtrl_asrttsText.Clear() 448 | 449 | def showHomepageInfo( self, event ): 450 | messageText = (('Code: \n https://github.com/JayHeng/pzh-py-speech.git \n') + 451 | ('Doc: \n https://www.cnblogs.com/henjay724/p/9541867.html \n')) 452 | wx.MessageBox(messageText, "Homepage", wx.OK | wx.ICON_INFORMATION) 453 | 454 | def showAboutInfo( self, event ): 455 | messageText = (('Author: Jay Heng \n') + 456 | ('Email: hengjie1989@foxmail.com \n')) 457 | wx.MessageBox(messageText, "About", wx.OK | wx.ICON_INFORMATION) 458 | 459 | if __name__ == '__main__': 460 | app = wx.App() 461 | 462 | main_win = mainWin(None) 463 | main_win.SetTitle(u"pzh-speech v1.1.0") 464 | main_win.Show() 465 | 466 | app.MainLoop() 467 | -------------------------------------------------------------------------------- /src/win.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | ########################################################################### 4 | ## Python code generated with wxFormBuilder (version Oct 26 2018) 5 | ## http://www.wxformbuilder.org/ 6 | ## 7 | ## PLEASE DO *NOT* EDIT THIS FILE! 8 | ########################################################################### 9 | 10 | import wx 11 | import wx.xrc 12 | 13 | ########################################################################### 14 | ## Class speech_win 15 | ########################################################################### 16 | 17 | class speech_win ( wx.Frame ): 18 | 19 | def __init__( self, parent ): 20 | wx.Frame.__init__ ( self, parent, id = wx.ID_ANY, title = u"pzh-speech", pos = wx.DefaultPosition, size = wx.Size( 942,694 ), style = wx.DEFAULT_FRAME_STYLE|wx.TAB_TRAVERSAL ) 21 | 22 | self.SetSizeHints( wx.DefaultSize, wx.DefaultSize ) 23 | self.SetBackgroundColour( wx.Colour( 64, 64, 64 ) ) 24 | 25 | self.menubar = wx.MenuBar( 0 ) 26 | self.m_menu_help = wx.Menu() 27 | self.m_menuItem_homePage = wx.MenuItem( self.m_menu_help, wx.ID_ANY, u"Home Page", wx.EmptyString, wx.ITEM_NORMAL ) 28 | self.m_menu_help.Append( self.m_menuItem_homePage ) 29 | 30 | self.m_menuItem_about = wx.MenuItem( self.m_menu_help, wx.ID_ANY, u"About Author", wx.EmptyString, wx.ITEM_NORMAL ) 31 | self.m_menu_help.Append( self.m_menuItem_about ) 32 | 33 | self.menubar.Append( self.m_menu_help, u"Help" ) 34 | 35 | self.SetMenuBar( self.menubar ) 36 | 37 | win_sizer = wx.WrapSizer( wx.HORIZONTAL, wx.WRAPSIZER_DEFAULT_FLAGS ) 38 | 39 | edit_win = wx.BoxSizer( wx.VERTICAL ) 40 | 41 | dir_win = wx.WrapSizer( wx.HORIZONTAL, wx.WRAPSIZER_DEFAULT_FLAGS ) 42 | 43 | self.m_staticText_selectAudioFile = wx.StaticText( self, wx.ID_ANY, u"Select Audio File:", wx.DefaultPosition, wx.DefaultSize, 0 ) 44 | self.m_staticText_selectAudioFile.Wrap( -1 ) 45 | 46 | self.m_staticText_selectAudioFile.SetForegroundColour( wx.SystemSettings.GetColour( wx.SYS_COLOUR_APPWORKSPACE ) ) 47 | 48 | dir_win.Add( self.m_staticText_selectAudioFile, 0, wx.ALL, 5 ) 49 | 50 | self.m_genericDirCtrl_audioDir = wx.GenericDirCtrl( self, wx.ID_ANY, wx.EmptyString, wx.DefaultPosition, wx.Size( 180,250 ), wx.DIRCTRL_SHOW_FILTERS|wx.HSCROLL, wx.EmptyString, 0 ) 51 | 52 | self.m_genericDirCtrl_audioDir.ShowHidden( False ) 53 | self.m_genericDirCtrl_audioDir.SetForegroundColour( wx.SystemSettings.GetColour( wx.SYS_COLOUR_WINDOW ) ) 54 | self.m_genericDirCtrl_audioDir.SetBackgroundColour( wx.SystemSettings.GetColour( wx.SYS_COLOUR_WINDOW ) ) 55 | 56 | dir_win.Add( self.m_genericDirCtrl_audioDir, 1, wx.EXPAND |wx.ALL, 5 ) 57 | 58 | self.m_staticText_null0 = wx.StaticText( self, wx.ID_ANY, wx.EmptyString, wx.DefaultPosition, wx.Size( 30,-1 ), 0 ) 59 | self.m_staticText_null0.Wrap( -1 ) 60 | 61 | dir_win.Add( self.m_staticText_null0, 0, wx.ALL, 5 ) 62 | 63 | self.m_button_play = wx.Button( self, wx.ID_ANY, u"Play Start", wx.DefaultPosition, wx.Size( 110,-1 ), 0 ) 64 | dir_win.Add( self.m_button_play, 0, wx.ALL, 5 ) 65 | 66 | 67 | edit_win.Add( dir_win, 1, wx.EXPAND, 5 ) 68 | 69 | rec_win = wx.WrapSizer( wx.HORIZONTAL, wx.WRAPSIZER_DEFAULT_FLAGS ) 70 | 71 | self.m_staticText_null1 = wx.StaticText( self, wx.ID_ANY, wx.EmptyString, wx.DefaultPosition, wx.DefaultSize, 0 ) 72 | self.m_staticText_null1.Wrap( -1 ) 73 | 74 | rec_win.Add( self.m_staticText_null1, 0, wx.ALL, 5 ) 75 | 76 | self.m_staticText_recordAudioFile = wx.StaticText( self, wx.ID_ANY, u"Record Audio File:", wx.DefaultPosition, wx.Size( 180,-1 ), 0 ) 77 | self.m_staticText_recordAudioFile.Wrap( -1 ) 78 | 79 | self.m_staticText_recordAudioFile.SetForegroundColour( wx.SystemSettings.GetColour( wx.SYS_COLOUR_APPWORKSPACE ) ) 80 | 81 | rec_win.Add( self.m_staticText_recordAudioFile, 0, wx.ALL, 5 ) 82 | 83 | self.m_staticText_channels = wx.StaticText( self, wx.ID_ANY, u"Channels:", wx.DefaultPosition, wx.Size( 60,-1 ), 0 ) 84 | self.m_staticText_channels.Wrap( -1 ) 85 | 86 | self.m_staticText_channels.SetForegroundColour( wx.SystemSettings.GetColour( wx.SYS_COLOUR_APPWORKSPACE ) ) 87 | 88 | rec_win.Add( self.m_staticText_channels, 0, wx.ALL, 5 ) 89 | 90 | m_choice_channelsChoices = [ u"Mono", u"Stereo" ] 91 | self.m_choice_channels = wx.Choice( self, wx.ID_ANY, wx.DefaultPosition, wx.Size( 80,-1 ), m_choice_channelsChoices, 0 ) 92 | self.m_choice_channels.SetSelection( 1 ) 93 | rec_win.Add( self.m_choice_channels, 0, wx.ALL, 5 ) 94 | 95 | self.m_staticText_sampRate = wx.StaticText( self, wx.ID_ANY, u"Samp Rate:", wx.DefaultPosition, wx.Size( 60,-1 ), 0 ) 96 | self.m_staticText_sampRate.Wrap( -1 ) 97 | 98 | self.m_staticText_sampRate.SetForegroundColour( wx.SystemSettings.GetColour( wx.SYS_COLOUR_APPWORKSPACE ) ) 99 | 100 | rec_win.Add( self.m_staticText_sampRate, 0, wx.ALL, 5 ) 101 | 102 | m_choice_sampRateChoices = [ u"44100", u"22050", u"16000", u"11025" ] 103 | self.m_choice_sampRate = wx.Choice( self, wx.ID_ANY, wx.DefaultPosition, wx.Size( 80,-1 ), m_choice_sampRateChoices, 0 ) 104 | self.m_choice_sampRate.SetSelection( 0 ) 105 | rec_win.Add( self.m_choice_sampRate, 0, wx.ALL, 5 ) 106 | 107 | self.m_staticText_hz = wx.StaticText( self, wx.ID_ANY, u"Hz", wx.DefaultPosition, wx.Size( 20,-1 ), 0 ) 108 | self.m_staticText_hz.Wrap( -1 ) 109 | 110 | self.m_staticText_hz.SetForegroundColour( wx.SystemSettings.GetColour( wx.SYS_COLOUR_APPWORKSPACE ) ) 111 | 112 | rec_win.Add( self.m_staticText_hz, 0, wx.ALL, 5 ) 113 | 114 | self.m_staticText_bitDepth = wx.StaticText( self, wx.ID_ANY, u"Bit Depth:", wx.DefaultPosition, wx.Size( 60,-1 ), 0 ) 115 | self.m_staticText_bitDepth.Wrap( -1 ) 116 | 117 | self.m_staticText_bitDepth.SetForegroundColour( wx.SystemSettings.GetColour( wx.SYS_COLOUR_APPWORKSPACE ) ) 118 | 119 | rec_win.Add( self.m_staticText_bitDepth, 0, wx.ALL, 5 ) 120 | 121 | m_choice_bitDepthChoices = [ u"8", u"16", u"24", u"32" ] 122 | self.m_choice_bitDepth = wx.Choice( self, wx.ID_ANY, wx.DefaultPosition, wx.Size( 80,-1 ), m_choice_bitDepthChoices, 0 ) 123 | self.m_choice_bitDepth.SetSelection( 1 ) 124 | rec_win.Add( self.m_choice_bitDepth, 0, wx.ALL, 5 ) 125 | 126 | self.m_staticText_bits = wx.StaticText( self, wx.ID_ANY, u"bits", wx.DefaultPosition, wx.Size( 20,-1 ), 0 ) 127 | self.m_staticText_bits.Wrap( -1 ) 128 | 129 | self.m_staticText_bits.SetForegroundColour( wx.SystemSettings.GetColour( wx.SYS_COLOUR_APPWORKSPACE ) ) 130 | 131 | rec_win.Add( self.m_staticText_bits, 0, wx.ALL, 5 ) 132 | 133 | self.m_staticText_recFileName = wx.StaticText( self, wx.ID_ANY, u"File:", wx.DefaultPosition, wx.Size( 30,-1 ), 0 ) 134 | self.m_staticText_recFileName.Wrap( -1 ) 135 | 136 | self.m_staticText_recFileName.SetForegroundColour( wx.SystemSettings.GetColour( wx.SYS_COLOUR_APPWORKSPACE ) ) 137 | 138 | rec_win.Add( self.m_staticText_recFileName, 0, wx.ALL, 5 ) 139 | 140 | self.m_textCtrl_recFileName = wx.TextCtrl( self, wx.ID_ANY, u"rec_untitled1.wav", wx.DefaultPosition, wx.Size( 110,-1 ), 0 ) 141 | rec_win.Add( self.m_textCtrl_recFileName, 0, wx.ALL, 5 ) 142 | 143 | self.m_staticText_null2 = wx.StaticText( self, wx.ID_ANY, wx.EmptyString, wx.DefaultPosition, wx.Size( 30,-1 ), 0 ) 144 | self.m_staticText_null2.Wrap( -1 ) 145 | 146 | rec_win.Add( self.m_staticText_null2, 0, wx.ALL, 5 ) 147 | 148 | self.m_button_record = wx.Button( self, wx.ID_ANY, u"Record Start", wx.DefaultPosition, wx.Size( 110,-1 ), 0 ) 149 | rec_win.Add( self.m_button_record, 0, wx.ALL, 5 ) 150 | 151 | 152 | edit_win.Add( rec_win, 1, wx.EXPAND, 5 ) 153 | 154 | 155 | win_sizer.Add( edit_win, 1, wx.EXPAND, 5 ) 156 | 157 | show_win = wx.BoxSizer( wx.VERTICAL ) 158 | 159 | plot_win = wx.WrapSizer( wx.HORIZONTAL, wx.WRAPSIZER_DEFAULT_FLAGS ) 160 | 161 | self.m_staticText_showAudioFile = wx.StaticText( self, wx.ID_ANY, u"Show Audio File:", wx.DefaultPosition, wx.Size( 215,-1 ), 0 ) 162 | self.m_staticText_showAudioFile.Wrap( -1 ) 163 | 164 | self.m_staticText_showAudioFile.SetForegroundColour( wx.SystemSettings.GetColour( wx.SYS_COLOUR_APPWORKSPACE ) ) 165 | 166 | plot_win.Add( self.m_staticText_showAudioFile, 0, wx.ALL, 5 ) 167 | 168 | self.m_staticText_domain = wx.StaticText( self, wx.ID_ANY, u"Domain:", wx.DefaultPosition, wx.Size( 50,-1 ), 0 ) 169 | self.m_staticText_domain.Wrap( -1 ) 170 | 171 | self.m_staticText_domain.SetForegroundColour( wx.SystemSettings.GetColour( wx.SYS_COLOUR_APPWORKSPACE ) ) 172 | 173 | plot_win.Add( self.m_staticText_domain, 0, wx.ALL, 5 ) 174 | 175 | m_choice_domainChoices = [ u"Time", u"Frequency" ] 176 | self.m_choice_domain = wx.Choice( self, wx.ID_ANY, wx.DefaultPosition, wx.DefaultSize, m_choice_domainChoices, 0 ) 177 | self.m_choice_domain.SetSelection( 0 ) 178 | plot_win.Add( self.m_choice_domain, 0, wx.ALL, 5 ) 179 | 180 | self.m_panel_plot = wx.Panel( self, wx.ID_ANY, wx.DefaultPosition, wx.Size( 720,360 ), wx.TAB_TRAVERSAL ) 181 | self.m_panel_plot.SetFont( wx.Font( 9, wx.FONTFAMILY_DEFAULT, wx.FONTSTYLE_NORMAL, wx.FONTWEIGHT_NORMAL, False, "宋体" ) ) 182 | self.m_panel_plot.SetForegroundColour( wx.SystemSettings.GetColour( wx.SYS_COLOUR_WINDOW ) ) 183 | self.m_panel_plot.SetBackgroundColour( wx.SystemSettings.GetColour( wx.SYS_COLOUR_WINDOW ) ) 184 | 185 | plot_win.Add( self.m_panel_plot, 1, wx.EXPAND |wx.ALL, 5 ) 186 | 187 | 188 | show_win.Add( plot_win, 1, wx.EXPAND, 5 ) 189 | 190 | ctrl_win = wx.WrapSizer( wx.HORIZONTAL, wx.WRAPSIZER_DEFAULT_FLAGS ) 191 | 192 | self.m_staticText_conv = wx.StaticText( self, wx.ID_ANY, u"Audio/Text Conversation Configuration and Display:", wx.DefaultPosition, wx.Size( 285,-1 ), 0 ) 193 | self.m_staticText_conv.Wrap( -1 ) 194 | 195 | self.m_staticText_conv.SetForegroundColour( wx.SystemSettings.GetColour( wx.SYS_COLOUR_APPWORKSPACE ) ) 196 | 197 | ctrl_win.Add( self.m_staticText_conv, 0, wx.ALL, 5 ) 198 | 199 | self.m_button_asrttsTextClear = wx.Button( self, wx.ID_ANY, u"Clear Text", wx.DefaultPosition, wx.Size( 80,-1 ), 0 ) 200 | ctrl_win.Add( self.m_button_asrttsTextClear, 0, wx.ALL, 5 ) 201 | 202 | self.m_staticText_null3 = wx.StaticText( self, wx.ID_ANY, wx.EmptyString, wx.DefaultPosition, wx.Size( 100,-1 ), 0 ) 203 | self.m_staticText_null3.Wrap( -1 ) 204 | 205 | ctrl_win.Add( self.m_staticText_null3, 0, wx.ALL, 5 ) 206 | 207 | self.m_staticText_lang = wx.StaticText( self, wx.ID_ANY, u"Language:", wx.Point( -1,-1 ), wx.Size( -1,-1 ), 0 ) 208 | self.m_staticText_lang.Wrap( -1 ) 209 | 210 | self.m_staticText_lang.SetForegroundColour( wx.SystemSettings.GetColour( wx.SYS_COLOUR_APPWORKSPACE ) ) 211 | 212 | ctrl_win.Add( self.m_staticText_lang, 0, wx.ALL, 5 ) 213 | 214 | m_choice_langChoices = [ u"US English", u"Mandarin Chinese" ] 215 | self.m_choice_lang = wx.Choice( self, wx.ID_ANY, wx.DefaultPosition, wx.DefaultSize, m_choice_langChoices, 0 ) 216 | self.m_choice_lang.SetSelection( 0 ) 217 | ctrl_win.Add( self.m_choice_lang, 0, wx.ALL, 5 ) 218 | 219 | self.m_staticText_asrEngine = wx.StaticText( self, wx.ID_ANY, u"ASR Engine:", wx.DefaultPosition, wx.Size( 70,-1 ), 0 ) 220 | self.m_staticText_asrEngine.Wrap( -1 ) 221 | 222 | self.m_staticText_asrEngine.SetForegroundColour( wx.SystemSettings.GetColour( wx.SYS_COLOUR_APPWORKSPACE ) ) 223 | 224 | ctrl_win.Add( self.m_staticText_asrEngine, 0, wx.ALL, 5 ) 225 | 226 | m_choice_asrEngineChoices = [ u"CMU Sphinx", u"Google Speech Recognition", u"Google Cloud Speech API", u"Wit.ai", u"Microsoft Bing Voice Recognition", u"Houndify API", u"IBM Speech to Text", u"Snowboy Hotword Detection" ] 227 | self.m_choice_asrEngine = wx.Choice( self, wx.ID_ANY, wx.DefaultPosition, wx.Size( 155,-1 ), m_choice_asrEngineChoices, 0 ) 228 | self.m_choice_asrEngine.SetSelection( 0 ) 229 | ctrl_win.Add( self.m_choice_asrEngine, 0, wx.ALL, 5 ) 230 | 231 | self.m_staticText_asrId = wx.StaticText( self, wx.ID_ANY, u"Id:", wx.DefaultPosition, wx.Size( 40,-1 ), 0 ) 232 | self.m_staticText_asrId.Wrap( -1 ) 233 | 234 | self.m_staticText_asrId.SetForegroundColour( wx.SystemSettings.GetColour( wx.SYS_COLOUR_APPWORKSPACE ) ) 235 | 236 | ctrl_win.Add( self.m_staticText_asrId, 0, wx.ALL, 5 ) 237 | 238 | self.m_textCtrl_asrId = wx.TextCtrl( self, wx.ID_ANY, u"N/A", wx.DefaultPosition, wx.Size( 150,-1 ), 0 ) 239 | ctrl_win.Add( self.m_textCtrl_asrId, 0, wx.ALL, 5 ) 240 | 241 | self.m_staticText_asrKey = wx.StaticText( self, wx.ID_ANY, u"Key:", wx.DefaultPosition, wx.DefaultSize, 0 ) 242 | self.m_staticText_asrKey.Wrap( -1 ) 243 | 244 | self.m_staticText_asrKey.SetForegroundColour( wx.SystemSettings.GetColour( wx.SYS_COLOUR_APPWORKSPACE ) ) 245 | 246 | ctrl_win.Add( self.m_staticText_asrKey, 0, wx.ALL, 5 ) 247 | 248 | self.m_textCtrl_asrKey = wx.TextCtrl( self, wx.ID_ANY, u"N/A", wx.DefaultPosition, wx.Size( 232,-1 ), 0 ) 249 | ctrl_win.Add( self.m_textCtrl_asrKey, 0, wx.ALL, 5 ) 250 | 251 | self.m_staticText_ttsEngine = wx.StaticText( self, wx.ID_ANY, u"TTS Engine:", wx.DefaultPosition, wx.Size( 70,-1 ), 0 ) 252 | self.m_staticText_ttsEngine.Wrap( -1 ) 253 | 254 | self.m_staticText_ttsEngine.SetForegroundColour( wx.SystemSettings.GetColour( wx.SYS_COLOUR_APPWORKSPACE ) ) 255 | 256 | ctrl_win.Add( self.m_staticText_ttsEngine, 0, wx.ALL, 5 ) 257 | 258 | m_choice_ttsEngineChoices = [ u"pyttsx3 - SAPI5", u"gTTS" ] 259 | self.m_choice_ttsEngine = wx.Choice( self, wx.ID_ANY, wx.DefaultPosition, wx.Size( 155,-1 ), m_choice_ttsEngineChoices, 0 ) 260 | self.m_choice_ttsEngine.SetSelection( 0 ) 261 | ctrl_win.Add( self.m_choice_ttsEngine, 0, wx.ALL, 5 ) 262 | 263 | self.m_staticText_voice = wx.StaticText( self, wx.ID_ANY, u"Voice:", wx.DefaultPosition, wx.Size( 40,-1 ), 0 ) 264 | self.m_staticText_voice.Wrap( -1 ) 265 | 266 | self.m_staticText_voice.SetForegroundColour( wx.SystemSettings.GetColour( wx.SYS_COLOUR_APPWORKSPACE ) ) 267 | 268 | ctrl_win.Add( self.m_staticText_voice, 0, wx.ALL, 5 ) 269 | 270 | m_choice_voiceChoices = [] 271 | self.m_choice_voice = wx.Choice( self, wx.ID_ANY, wx.DefaultPosition, wx.Size( 310,-1 ), m_choice_voiceChoices, 0 ) 272 | self.m_choice_voice.SetSelection( 0 ) 273 | ctrl_win.Add( self.m_choice_voice, 0, wx.ALL, 5 ) 274 | 275 | self.m_staticText_null4 = wx.StaticText( self, wx.ID_ANY, wx.EmptyString, wx.DefaultPosition, wx.Size( 100,-1 ), 0 ) 276 | self.m_staticText_null4.Wrap( -1 ) 277 | 278 | ctrl_win.Add( self.m_staticText_null4, 0, wx.ALL, 5 ) 279 | 280 | self.m_staticText_ttwEngine = wx.StaticText( self, wx.ID_ANY, u"TTW Engine: ", wx.DefaultPosition, wx.Size( 70,-1 ), 0 ) 281 | self.m_staticText_ttwEngine.Wrap( -1 ) 282 | 283 | self.m_staticText_ttwEngine.SetForegroundColour( wx.SystemSettings.GetColour( wx.SYS_COLOUR_APPWORKSPACE ) ) 284 | 285 | ctrl_win.Add( self.m_staticText_ttwEngine, 0, wx.ALL, 5 ) 286 | 287 | m_choice_ttwEngineChoices = [ u"eSpeak TTS", u"Festival SSS" ] 288 | self.m_choice_ttwEngine = wx.Choice( self, wx.ID_ANY, wx.DefaultPosition, wx.Size( 155,-1 ), m_choice_ttwEngineChoices, 0 ) 289 | self.m_choice_ttwEngine.SetSelection( 0 ) 290 | ctrl_win.Add( self.m_choice_ttwEngine, 0, wx.ALL, 5 ) 291 | 292 | self.m_staticText_gender = wx.StaticText( self, wx.ID_ANY, u"Gender:", wx.DefaultPosition, wx.Size( 40,-1 ), 0 ) 293 | self.m_staticText_gender.Wrap( -1 ) 294 | 295 | self.m_staticText_gender.SetForegroundColour( wx.SystemSettings.GetColour( wx.SYS_COLOUR_APPWORKSPACE ) ) 296 | 297 | ctrl_win.Add( self.m_staticText_gender, 0, wx.ALL, 5 ) 298 | 299 | m_choice_genderChoices = [ u"Male", u"Female" ] 300 | self.m_choice_gender = wx.Choice( self, wx.ID_ANY, wx.DefaultPosition, wx.Size( 70,-1 ), m_choice_genderChoices, 0 ) 301 | self.m_choice_gender.SetSelection( 0 ) 302 | ctrl_win.Add( self.m_choice_gender, 0, wx.ALL, 5 ) 303 | 304 | 305 | show_win.Add( ctrl_win, 1, wx.EXPAND, 5 ) 306 | 307 | conv_win = wx.WrapSizer( wx.HORIZONTAL, wx.WRAPSIZER_DEFAULT_FLAGS ) 308 | 309 | text_win = wx.WrapSizer( wx.HORIZONTAL, wx.WRAPSIZER_DEFAULT_FLAGS ) 310 | 311 | self.m_textCtrl_asrttsText = wx.TextCtrl( self, wx.ID_ANY, wx.EmptyString, wx.DefaultPosition, wx.Size( 480,120 ), wx.TE_MULTILINE ) 312 | text_win.Add( self.m_textCtrl_asrttsText, 0, wx.ALL, 5 ) 313 | 314 | 315 | conv_win.Add( text_win, 1, wx.EXPAND, 5 ) 316 | 317 | asrtts_win = wx.WrapSizer( wx.HORIZONTAL, wx.WRAPSIZER_DEFAULT_FLAGS ) 318 | 319 | self.m_button_asr = wx.Button( self, wx.ID_ANY, u"ASR", wx.DefaultPosition, wx.Size( 40,-1 ), 0 ) 320 | asrtts_win.Add( self.m_button_asr, 0, wx.ALL, 5 ) 321 | 322 | self.m_textCtrl_asrFileName = wx.TextCtrl( self, wx.ID_ANY, u"asr_untitled1.txt", wx.DefaultPosition, wx.Size( 180,-1 ), 0 ) 323 | asrtts_win.Add( self.m_textCtrl_asrFileName, 0, wx.ALL, 5 ) 324 | 325 | self.m_button_tts = wx.Button( self, wx.ID_ANY, u"TTS", wx.DefaultPosition, wx.Size( 40,-1 ), 0 ) 326 | asrtts_win.Add( self.m_button_tts, 0, wx.ALL, 5 ) 327 | 328 | self.m_textCtrl_ttsFileName = wx.TextCtrl( self, wx.ID_ANY, u"tts_untitled1.wav", wx.DefaultPosition, wx.Size( 180,-1 ), 0 ) 329 | asrtts_win.Add( self.m_textCtrl_ttsFileName, 0, wx.ALL, 5 ) 330 | 331 | 332 | conv_win.Add( asrtts_win, 1, wx.EXPAND, 5 ) 333 | 334 | 335 | show_win.Add( conv_win, 1, wx.EXPAND, 5 ) 336 | 337 | 338 | win_sizer.Add( show_win, 1, wx.EXPAND, 5 ) 339 | 340 | 341 | self.SetSizer( win_sizer ) 342 | self.Layout() 343 | self.statusBar = self.CreateStatusBar( 1, wx.STB_SIZEGRIP, wx.ID_ANY ) 344 | 345 | self.Centre( wx.BOTH ) 346 | 347 | # Connect Events 348 | self.Bind( wx.EVT_MENU, self.showHomepageInfo, id = self.m_menuItem_homePage.GetId() ) 349 | self.Bind( wx.EVT_MENU, self.showAboutInfo, id = self.m_menuItem_about.GetId() ) 350 | self.m_genericDirCtrl_audioDir.Bind( wx.EVT_TREE_SEL_CHANGED, self.viewAudio ) 351 | self.m_button_play.Bind( wx.EVT_BUTTON, self.playAudio ) 352 | self.m_button_record.Bind( wx.EVT_BUTTON, self.recordAudio ) 353 | self.m_button_asrttsTextClear.Bind( wx.EVT_BUTTON, self.clearAsrTtsText ) 354 | self.m_choice_voice.Bind( wx.EVT_ENTER_WINDOW, self.refreshVoice ) 355 | self.m_button_asr.Bind( wx.EVT_BUTTON, self.audioSpeechRecognition ) 356 | self.m_button_tts.Bind( wx.EVT_BUTTON, self.textToSpeech ) 357 | 358 | def __del__( self ): 359 | pass 360 | 361 | 362 | # Virtual event handlers, overide them in your derived class 363 | def showHomepageInfo( self, event ): 364 | event.Skip() 365 | 366 | def showAboutInfo( self, event ): 367 | event.Skip() 368 | 369 | def viewAudio( self, event ): 370 | event.Skip() 371 | 372 | def playAudio( self, event ): 373 | event.Skip() 374 | 375 | def recordAudio( self, event ): 376 | event.Skip() 377 | 378 | def clearAsrTtsText( self, event ): 379 | event.Skip() 380 | 381 | def refreshVoice( self, event ): 382 | event.Skip() 383 | 384 | def audioSpeechRecognition( self, event ): 385 | event.Skip() 386 | 387 | def textToSpeech( self, event ): 388 | event.Skip() 389 | 390 | 391 | --------------------------------------------------------------------------------