├── LICENSE
├── README.md
├── binary
├── Win32.Debug
│ ├── CrystalTest.bat
│ └── config.xml
└── Win32.Release
│ ├── CrystalTest.bat
│ └── config.xml
├── data
└── putonghua
│ ├── hts
│ ├── hts.cfg
│ └── voice
│ │ ├── _readme.txt
│ │ ├── dur.pdf
│ │ ├── lf0.pdf
│ │ ├── lf0.win1
│ │ ├── lf0.win2
│ │ ├── lf0.win3
│ │ ├── lpf.pdf
│ │ ├── lpf.win1
│ │ ├── mgc.pdf
│ │ ├── mgc.win1
│ │ ├── mgc.win2
│ │ ├── mgc.win3
│ │ ├── tree-dur.inf
│ │ ├── tree-lf0.inf
│ │ ├── tree-lpf.inf
│ │ ├── tree-mgc.inf
│ │ └── unvoiced.prop
│ └── text
│ ├── _readme.txt
│ ├── gbk.bin
│ ├── sttable.dat
│ └── symbol.lex
├── demo
└── CrystalTest
│ ├── ReadMe.txt
│ └── main.cpp
├── document
├── docs
│ ├── 2007 TTS Framework.ppt
│ ├── Development Guide.doc
│ ├── Module Division and SSML Specification for Crystal.doc
│ ├── Readme.doc
│ ├── Support of SSML 1.1.doc
│ ├── XML & SSML Introduction.ppt
│ ├── architecture.png
│ ├── architecture.svg
│ ├── dynamic_module_loading.png
│ ├── interface.png
│ └── ssml_interface.png
├── doxygen.tpl
└── readme.txt
├── engine
├── csttools
│ ├── cmn
│ │ ├── cmn_error.h
│ │ ├── cmn_file.cpp
│ │ ├── cmn_file.h
│ │ ├── cmn_hashmap.h
│ │ ├── cmn_stack.h
│ │ ├── cmn_string.cpp
│ │ ├── cmn_string.h
│ │ ├── cmn_textfile.cpp
│ │ ├── cmn_textfile.h
│ │ ├── cmn_type.h
│ │ └── cmn_vector.h
│ ├── dsp
│ │ ├── dsp_databuffer.cpp
│ │ ├── dsp_databuffer.h
│ │ ├── dsp_dspalgorithm.cpp
│ │ ├── dsp_dspalgorithm.h
│ │ ├── dsp_wavedata.cpp
│ │ ├── dsp_wavedata.h
│ │ ├── dsp_wavedevice.cpp
│ │ ├── dsp_wavedevice.h
│ │ ├── dsp_wavefile.cpp
│ │ ├── dsp_wavefile.h
│ │ ├── dsp_waveplay.cpp
│ │ └── dsp_waveplay.h
│ ├── utils
│ │ ├── utl_bigram.cpp
│ │ ├── utl_bigram.h
│ │ ├── utl_chineseconv.cpp
│ │ ├── utl_chineseconv.h
│ │ ├── utl_pinyin.cpp
│ │ ├── utl_pinyin.h
│ │ ├── utl_regexp.cpp
│ │ ├── utl_regexp.h
│ │ ├── utl_string.cpp
│ │ ├── utl_string.h
│ │ ├── utl_viterbi.cpp
│ │ ├── utl_viterbi.h
│ │ ├── utl_wagontree.cpp
│ │ └── utl_wagontree.h
│ └── xml
│ │ ├── ssml_document.cpp
│ │ ├── ssml_document.h
│ │ ├── ssml_helper.cpp
│ │ ├── ssml_helper.h
│ │ ├── ssml_traversal.cpp
│ │ ├── ssml_traversal.h
│ │ ├── xml_dom.cpp
│ │ ├── xml_dom.h
│ │ ├── xml_sax.cpp
│ │ └── xml_sax.h
├── htslib
│ ├── HTS_audio.c
│ ├── HTS_engine.c
│ ├── HTS_engine.h
│ ├── HTS_gstream.c
│ ├── HTS_hidden.h
│ ├── HTS_label.c
│ ├── HTS_misc.c
│ ├── HTS_model.c
│ ├── HTS_pstream.c
│ ├── HTS_sstream.c
│ ├── HTS_vocoder.c
│ ├── Makefile.am
│ ├── Makefile.mak
│ ├── bin
│ │ ├── hts_engine.c
│ │ ├── hts_engine_o.c
│ │ └── readme.txt
│ ├── hts_synthesizer.cpp
│ └── hts_synthesizer.h
├── ttsbase
│ ├── datamanage
│ │ ├── base_module.h
│ │ ├── base_moduleapi.h
│ │ ├── base_ttsdocument.h
│ │ ├── data_datamanager.cpp
│ │ ├── data_datamanager.h
│ │ ├── data_drivendata.cpp
│ │ ├── data_drivendata.h
│ │ ├── module_manager.cpp
│ │ └── module_manager.h
│ ├── datatext
│ │ ├── data_textdata.cpp
│ │ ├── data_textdata.h
│ │ ├── lexicon_lexicon.cpp
│ │ └── lexicon_lexicon.h
│ ├── datavoice
│ │ ├── data_voicedata.cpp
│ │ ├── data_voicedata.h
│ │ └── wav_synthesizer.h
│ ├── preprocess
│ │ ├── dsa_docstruct.cpp
│ │ ├── dsa_docstruct.h
│ │ ├── dsa_langdetect.cpp
│ │ ├── dsa_langdetect.h
│ │ ├── dsa_symboldetect.cpp
│ │ ├── dsa_symboldetect.h
│ │ ├── dsa_textsegment.cpp
│ │ ├── dsa_textsegment.h
│ │ ├── lang_langconvert.cpp
│ │ ├── lang_langconvert.h
│ │ ├── prep_preprocess.cpp
│ │ └── prep_preprocess.h
│ ├── synth.concat
│ │ ├── splib_phonemeset.h
│ │ ├── splib_speechlib.h
│ │ ├── synth_concatenate.cpp
│ │ └── synth_concatenate.h
│ ├── synthesize
│ │ ├── psp_prosodypredict.cpp
│ │ ├── psp_prosodypredict.h
│ │ ├── synth_synthesize.cpp
│ │ ├── synth_synthesize.h
│ │ ├── unitseg_unitsegment.cpp
│ │ └── unitseg_unitsegment.h
│ ├── textparse
│ │ ├── gtp_grapheme2phoneme.cpp
│ │ ├── gtp_grapheme2phoneme.h
│ │ ├── norm_textnormalize.cpp
│ │ ├── norm_textnormalize.h
│ │ ├── psg_prosstructgen.cpp
│ │ ├── psg_prosstructgen.h
│ │ ├── wdseg_wordsegment.cpp
│ │ └── wdseg_wordsegment.h
│ ├── tts.synth
│ │ ├── tts_synthesizer.cpp
│ │ └── tts_synthesizer.h
│ └── tts.text
│ │ ├── tts_textparser.cpp
│ │ └── tts_textparser.h
└── ttschinese
│ ├── synth.hts
│ ├── data_voicedata.cpp
│ ├── data_voicedata.h
│ ├── hts_ssml2lab.cpp
│ ├── hts_ssml2lab.h
│ ├── hts_synthesize.cpp
│ ├── hts_synthesize.h
│ ├── hts_synthesizer.cpp
│ ├── hts_synthesizer.h
│ ├── lab format
│ │ ├── lab_format_Chinese_2017 (full_lab格式说明).docx
│ │ └── lab_format_English_2015.pdf
│ ├── psp_prosodypredict.cpp
│ ├── psp_prosodypredict.h
│ ├── tts_synthesizer.cpp
│ ├── tts_synthesizer.h
│ ├── unitseg_unitsegment.cpp
│ └── unitseg_unitsegment.h
│ ├── textparse
│ ├── data_textdata.cpp
│ ├── data_textdata.h
│ ├── lexicon_data.cpp
│ ├── lexicon_data.h
│ ├── tts_textparser.cpp
│ └── tts_textparser.h
│ └── utility
│ ├── utl_pinyin.cpp
│ └── utl_pinyin.h
└── project
└── Windows
├── CSTTools
├── CSTTools.vcxproj
├── CSTTools.vcxproj.filters
└── ReadMe.txt
├── CrystalSpeechToolkit.sln
├── CrystalTest
├── CrystalTest.vcxproj
└── CrystalTest.vcxproj.filters
├── HTSLib
├── HTSLib.vcxproj
├── HTSLib.vcxproj.filters
└── ReadMe.txt
├── TTSBase.synth.concat
├── ReadMe.txt
├── TTSBase.synth.concat.vcxproj
└── TTSBase.synth.concat.vcxproj.filters
├── TTSBase.synth
├── ReadMe.txt
├── TTSBase.synth.vcxproj
└── TTSBase.synth.vcxproj.filters
├── TTSBase.text
├── ReadMe.txt
├── TTSBase.text.vcxproj
└── TTSBase.text.vcxproj.filters
├── TTSChinese.synth.hts
├── ReadMe.txt
├── TTSChinese.synth.hts.vcxproj
└── TTSChinese.synth.hts.vcxproj.filters
└── TTSChinese.text
├── ReadMe.txt
├── TTSChinese.text.vcxproj
└── TTSChinese.text.vcxproj.filters
/README.md:
--------------------------------------------------------------------------------
1 | # Crystal Text-to-Speech (TTS) Engine
2 |
3 | C++ implementation of Crystal Text-to-Speech (TTS) engine.
4 |
5 | The Crystal TTS engine provides an implementation of a unified framework for multilingual TTS synthesis engine – Crystal. The unified framework defines the common TTS modules for different languages and/or dialects. The interfaces between consecutive modules conform to Speech Synthesis Markup Language (SSML) specification for standardization, in-teroperability, multilinguality, and extensibility.
6 |
7 | ### Architecture
8 |
9 |
10 |
11 |
12 |
13 | ### Reference
14 | For the motivation and design of the framework, you can refer to the [the following paper](http://www1.se.cuhk.edu.hk/~hccl/publications/pub/2035_Unified%20Framework.pdf). Please also use this paper for reference to this project:
15 |
16 | - Zhiyong WU, Guangqi CAO, Helen MENG, Lianhong CAI, "[A Unified Framework for Multilingual Text-to-Speech Synthesis with SSML Specification as Interface](http://www1.se.cuhk.edu.hk/~hccl/publications/pub/2035_Unified%20Framework.pdf)," *Tsinghua Science and Technology*, vol. 14, no. 5, pp. 623-630, October 2009.
17 |
18 | ### Native Support of SSML
19 |
20 | The framework uses Speech Synthesis Markup Language (SSML) specification as interface between different modules. Hence, the framework provides native support of SSML tags.
21 |
22 | Meanwhile, the framework provides **cst::xml::CSSMLTraversal** (*xml/ssml_traversal*) to convert the SSML document into internal data structure for convenient processing. This means you actually donot need to take care of the complex parsing procedures of SSML document when implementing your own algorithms. What you need to do is just to implement your algorithms by overriding the functions with internal data structures for the modules in **cst::tts::base::***.
23 |
24 |
25 |
26 |
27 |
28 | ### Support of Dynamic Module Loading & Cross-platform
29 |
30 | The framework provides the support of dynamic module loading on different platforms.
31 |
32 | You can implement different algorithms for each module and compile as a new dynamic library (.dll on Windows, .so on Linux platform). The backbone of the framework **cst::tts::base::CTextParser** (*ttsbase/tts.text/tts_textparser*) and **cst::tts::base::CSynthesizer** (*ttsbase/tts.synth/tts_synthesizer*) will automatically load the modules specified by an XML based configuration file. In this way, the framework provides the flexibility in switching between different TTS engines or algorithms.
33 |
34 |
35 |
36 |
37 |
38 | For example, the above left figure shows Concatenative Putonghua TTS engine running by specifying the "cmn.xml" as configuration input; while the above right figure shows HMM-based Chinese TTS engine running by specifying the "zh.xml" as configuration input.
39 |
40 |
41 | ### Support of Multilingual TTS Engine
42 |
43 | You can implement different TTS engines for different languages by overriding the TTSBase moduels in **cst::tts::base::***. The following figure depicts the multilingual support of the architecture.
44 |
45 |
46 |
47 |
48 |
49 |
50 | ### About the Project
51 |
52 | Copyright (c) Tsinghua-CUHK Joint Research Center for Media Sciences, Technologies and Systems. All rights reserved.
53 |
54 | http://mjrc.sz.tsinghua.edu.cn
55 |
56 | Tsinghua-CUHK Joint Research Center has the rights to create, modify, copy, compile, remove, rename, explain and deliver the source codes.
57 |
--------------------------------------------------------------------------------
/binary/Win32.Debug/CrystalTest.bat:
--------------------------------------------------------------------------------
1 | CrystalTest.exe config.xml
--------------------------------------------------------------------------------
/binary/Win32.Debug/config.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
--------------------------------------------------------------------------------
/binary/Win32.Release/CrystalTest.bat:
--------------------------------------------------------------------------------
1 | CrystalTest.exe config.xml
--------------------------------------------------------------------------------
/binary/Win32.Release/config.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
--------------------------------------------------------------------------------
/data/putonghua/hts/hts.cfg:
--------------------------------------------------------------------------------
1 | -td voice/tree-dur.inf
2 | -tm voice/tree-mgc.inf
3 | -tf voice/tree-lf0.inf
4 | -tl voice/tree-lpf.inf
5 | -md voice/dur.pdf
6 | -mm voice/mgc.pdf
7 | -mf voice/lf0.pdf
8 | -ml voice/lpf.pdf
9 | -mu voice/unvoiced.prop
10 | -dm voice/mgc.win1
11 | -dm voice/mgc.win2
12 | -dm voice/mgc.win3
13 | -df voice/lf0.win1
14 | -df voice/lf0.win2
15 | -df voice/lf0.win3
16 | -dl voice/lpf.win1
17 | -s 16000
18 | -p 80
19 | -a 0.42
20 | -g 0
21 | -b 0.4
22 | -l
23 | -r 1
24 | -vp
25 |
--------------------------------------------------------------------------------
/data/putonghua/hts/voice/_readme.txt:
--------------------------------------------------------------------------------
1 | This is the voice files for HTS engine for Chinese Mandarin and English from HCSI.
2 |
3 | (with initial-final format for Chinese Mandarin)
4 | (with phonemes for English)
5 |
--------------------------------------------------------------------------------
/data/putonghua/hts/voice/dur.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/thuhcsi/Crystal/d129e5400360e7c696d071c1576803917aaf9286/data/putonghua/hts/voice/dur.pdf
--------------------------------------------------------------------------------
/data/putonghua/hts/voice/lf0.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/thuhcsi/Crystal/d129e5400360e7c696d071c1576803917aaf9286/data/putonghua/hts/voice/lf0.pdf
--------------------------------------------------------------------------------
/data/putonghua/hts/voice/lf0.win1:
--------------------------------------------------------------------------------
1 | 1 1.0
2 |
--------------------------------------------------------------------------------
/data/putonghua/hts/voice/lf0.win2:
--------------------------------------------------------------------------------
1 | 3 -0.5 0.0 0.5
2 |
--------------------------------------------------------------------------------
/data/putonghua/hts/voice/lf0.win3:
--------------------------------------------------------------------------------
1 | 3 1.0 -2.0 1.0
2 |
--------------------------------------------------------------------------------
/data/putonghua/hts/voice/lpf.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/thuhcsi/Crystal/d129e5400360e7c696d071c1576803917aaf9286/data/putonghua/hts/voice/lpf.pdf
--------------------------------------------------------------------------------
/data/putonghua/hts/voice/lpf.win1:
--------------------------------------------------------------------------------
1 | 1 1.0
2 |
--------------------------------------------------------------------------------
/data/putonghua/hts/voice/mgc.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/thuhcsi/Crystal/d129e5400360e7c696d071c1576803917aaf9286/data/putonghua/hts/voice/mgc.pdf
--------------------------------------------------------------------------------
/data/putonghua/hts/voice/mgc.win1:
--------------------------------------------------------------------------------
1 | 1 1.0
2 |
--------------------------------------------------------------------------------
/data/putonghua/hts/voice/mgc.win2:
--------------------------------------------------------------------------------
1 | 3 -0.5 0.0 0.5
2 |
--------------------------------------------------------------------------------
/data/putonghua/hts/voice/mgc.win3:
--------------------------------------------------------------------------------
1 | 3 1.0 -2.0 1.0
2 |
--------------------------------------------------------------------------------
/data/putonghua/hts/voice/tree-lpf.inf:
--------------------------------------------------------------------------------
1 | {*}[2]
2 | "lpf_s2_1"
3 | {*}[3]
4 | "lpf_s3_1"
5 | {*}[4]
6 | "lpf_s4_1"
7 | {*}[5]
8 | "lpf_s5_1"
9 | {*}[6]
10 | "lpf_s6_1"
11 |
--------------------------------------------------------------------------------
/data/putonghua/hts/voice/unvoiced.prop:
--------------------------------------------------------------------------------
1 | 364
2 | -EAA+ 0
3 | -EAA0+ 0
4 | -EAA1+ 0
5 | -EAA2+ 0
6 | -EAE+ 0
7 | -EAE0+ 0
8 | -EAE1+ 0
9 | -EAE2+ 0
10 | -EAH+ 0
11 | -EAH0+ 0
12 | -EAH1+ 0
13 | -EAH2+ 0
14 | -EAO+ 0
15 | -EAO0+ 0
16 | -EAO1+ 0
17 | -EAO2+ 0
18 | -EAW+ 0
19 | -EAW0+ 0
20 | -EAW1+ 0
21 | -EAW2+ 0
22 | -EAX+ 0
23 | -EAX0+ 0
24 | -EAX1+ 0
25 | -EAX2+ 0
26 | -EAY+ 0
27 | -EAY0+ 0
28 | -EAY1+ 0
29 | -EAY2+ 0
30 | -EB+ 0
31 | -ECH+ 1
32 | -ED+ 0
33 | -EDH+ 1
34 | -EEH+ 0
35 | -EEH0+ 0
36 | -EEH1+ 0
37 | -EEH2+ 0
38 | -EER+ 0
39 | -EER0+ 0
40 | -EER1+ 0
41 | -EER2+ 0
42 | -EEY+ 0
43 | -EEY0+ 0
44 | -EEY1+ 0
45 | -EEY2+ 0
46 | -EF+ 1
47 | -EG+ 1
48 | -EHH+ 1
49 | -EIH+ 0
50 | -EIH0+ 0
51 | -EIH1+ 0
52 | -EIH2+ 0
53 | -EIY+ 0
54 | -EIY0+ 0
55 | -EIY1+ 0
56 | -EIY2+ 0
57 | -EJH+ 1
58 | -EK+ 1
59 | -EL+ 0
60 | -EM+ 0
61 | -EN+ 0
62 | -ENG+ 0
63 | -EOW+ 0
64 | -EOW0+ 0
65 | -EOW1+ 0
66 | -EOW2+ 0
67 | -EOY+ 0
68 | -EOY0+ 0
69 | -EOY1+ 0
70 | -EOY2+ 0
71 | -EP+ 1
72 | -ER+ 0
73 | -ER0+ 0
74 | -ER1+ 0
75 | -ER2+ 0
76 | -ES+ 1
77 | -ESH+ 1
78 | -ET+ 1
79 | -ETH+ 1
80 | -EUH+ 0
81 | -EUH0+ 0
82 | -EUH1+ 0
83 | -EUH2+ 0
84 | -EUW+ 0
85 | -EUW0+ 0
86 | -EUW1+ 0
87 | -EUW2+ 0
88 | -EV+ 0
89 | -EW+ 0
90 | -EY+ 0
91 | -EY0+ 0
92 | -EY1+ 0
93 | -EY2+ 0
94 | -EZ+ 1
95 | -EZH+ 1
96 | -a1+ 0
97 | -a2+ 0
98 | -a3+ 0
99 | -a4+ 0
100 | -a5+ 0
101 | -a6+ 0
102 | -ai1+ 0
103 | -ai2+ 0
104 | -ai3+ 0
105 | -ai4+ 0
106 | -ai5+ 0
107 | -ai6+ 0
108 | -an1+ 0
109 | -an2+ 0
110 | -an3+ 0
111 | -an4+ 0
112 | -an5+ 0
113 | -an6+ 0
114 | -ang1+ 0
115 | -ang2+ 0
116 | -ang3+ 0
117 | -ang4+ 0
118 | -ang5+ 0
119 | -ang6+ 0
120 | -ao1+ 0
121 | -ao2+ 0
122 | -ao3+ 0
123 | -ao4+ 0
124 | -ao5+ 0
125 | -ao6+ 0
126 | -b+ 0
127 | -c+ 1
128 | -ch+ 1
129 | -d+ 0
130 | -e1+ 0
131 | -e2+ 0
132 | -e3+ 0
133 | -e4+ 0
134 | -e5+ 0
135 | -e6+ 0
136 | -ei1+ 0
137 | -ei2+ 0
138 | -ei3+ 0
139 | -ei4+ 0
140 | -ei5+ 0
141 | -ei6+ 0
142 | -en1+ 0
143 | -en2+ 0
144 | -en3+ 0
145 | -en4+ 0
146 | -en5+ 0
147 | -en6+ 0
148 | -eng1+ 0
149 | -eng2+ 0
150 | -eng3+ 0
151 | -eng4+ 0
152 | -eng5+ 0
153 | -eng6+ 0
154 | -er1+ 0
155 | -er2+ 0
156 | -er3+ 0
157 | -er4+ 0
158 | -er5+ 0
159 | -er6+ 0
160 | -f+ 1
161 | -g+ 1
162 | -h+ 1
163 | -i1+ 0
164 | -i2+ 0
165 | -i3+ 0
166 | -i4+ 0
167 | -i5+ 0
168 | -i6+ 0
169 | -ia1+ 0
170 | -ia2+ 0
171 | -ia3+ 0
172 | -ia4+ 0
173 | -ia5+ 0
174 | -ia6+ 0
175 | -ian1+ 0
176 | -ian2+ 0
177 | -ian3+ 0
178 | -ian4+ 0
179 | -ian5+ 0
180 | -ian6+ 0
181 | -iang1+ 0
182 | -iang2+ 0
183 | -iang3+ 0
184 | -iang4+ 0
185 | -iang5+ 0
186 | -iang6+ 0
187 | -iao1+ 0
188 | -iao2+ 0
189 | -iao3+ 0
190 | -iao4+ 0
191 | -iao5+ 0
192 | -iao6+ 0
193 | -ie1+ 0
194 | -ie2+ 0
195 | -ie3+ 0
196 | -ie4+ 0
197 | -ie5+ 0
198 | -ie6+ 0
199 | -in1+ 0
200 | -in2+ 0
201 | -in3+ 0
202 | -in4+ 0
203 | -in5+ 0
204 | -in6+ 0
205 | -ing1+ 0
206 | -ing2+ 0
207 | -ing3+ 0
208 | -ing4+ 0
209 | -ing5+ 0
210 | -ing6+ 0
211 | -io1+ 0
212 | -io2+ 0
213 | -io3+ 0
214 | -io4+ 0
215 | -io5+ 0
216 | -io6+ 0
217 | -iong1+ 0
218 | -iong2+ 0
219 | -iong3+ 0
220 | -iong4+ 0
221 | -iong5+ 0
222 | -iong6+ 0
223 | -iou1+ 0
224 | -iou2+ 0
225 | -iou3+ 0
226 | -iou4+ 0
227 | -iou5+ 0
228 | -iou6+ 0
229 | -ix1+ 0
230 | -ix2+ 0
231 | -ix3+ 0
232 | -ix4+ 0
233 | -ix5+ 0
234 | -ix6+ 0
235 | -iy1+ 0
236 | -iy2+ 0
237 | -iy3+ 0
238 | -iy4+ 0
239 | -iy5+ 0
240 | -iy6+ 0
241 | -j+ 1
242 | -k+ 1
243 | -l+ 0
244 | -lp+ 1
245 | -m+ 0
246 | -m1+ 0
247 | -m2+ 0
248 | -m3+ 0
249 | -m4+ 0
250 | -m5+ 0
251 | -m6+ 0
252 | -n+ 0
253 | -ng1+ 0
254 | -ng2+ 0
255 | -ng3+ 0
256 | -ng4+ 0
257 | -ng5+ 0
258 | -ng6+ 0
259 | -o1+ 0
260 | -o2+ 0
261 | -o3+ 0
262 | -o4+ 0
263 | -o5+ 0
264 | -o6+ 0
265 | -ong1+ 0
266 | -ong2+ 0
267 | -ong3+ 0
268 | -ong4+ 0
269 | -ong5+ 0
270 | -ong6+ 0
271 | -ou1+ 0
272 | -ou2+ 0
273 | -ou3+ 0
274 | -ou4+ 0
275 | -ou5+ 0
276 | -ou6+ 0
277 | -p+ 1
278 | -q+ 1
279 | -r+ 0
280 | -s+ 1
281 | -sh+ 1
282 | -sil+ 1
283 | -sp+ 1
284 | -t+ 1
285 | -u1+ 0
286 | -u2+ 0
287 | -u3+ 0
288 | -u4+ 0
289 | -u5+ 0
290 | -u6+ 0
291 | -ua1+ 0
292 | -ua2+ 0
293 | -ua3+ 0
294 | -ua4+ 0
295 | -ua5+ 0
296 | -ua6+ 0
297 | -uai1+ 0
298 | -uai2+ 0
299 | -uai3+ 0
300 | -uai4+ 0
301 | -uai5+ 0
302 | -uai6+ 0
303 | -uan1+ 0
304 | -uan2+ 0
305 | -uan3+ 0
306 | -uan4+ 0
307 | -uan5+ 0
308 | -uan6+ 0
309 | -uang1+ 0
310 | -uang2+ 0
311 | -uang3+ 0
312 | -uang4+ 0
313 | -uang5+ 0
314 | -uang6+ 0
315 | -uei1+ 0
316 | -uei2+ 0
317 | -uei3+ 0
318 | -uei4+ 0
319 | -uei5+ 0
320 | -uei6+ 0
321 | -uen1+ 0
322 | -uen2+ 0
323 | -uen3+ 0
324 | -uen4+ 0
325 | -uen5+ 0
326 | -uen6+ 0
327 | -ueng1+ 0
328 | -ueng2+ 0
329 | -ueng3+ 0
330 | -ueng4+ 0
331 | -ueng5+ 0
332 | -ueng6+ 0
333 | -uo1+ 0
334 | -uo2+ 0
335 | -uo3+ 0
336 | -uo4+ 0
337 | -uo5+ 0
338 | -uo6+ 0
339 | -v1+ 0
340 | -v2+ 0
341 | -v3+ 0
342 | -v4+ 0
343 | -v5+ 0
344 | -v6+ 0
345 | -van1+ 0
346 | -van2+ 0
347 | -van3+ 0
348 | -van4+ 0
349 | -van5+ 0
350 | -van6+ 0
351 | -ve1+ 0
352 | -ve2+ 0
353 | -ve3+ 0
354 | -ve4+ 0
355 | -ve5+ 0
356 | -ve6+ 0
357 | -vn1+ 0
358 | -vn2+ 0
359 | -vn3+ 0
360 | -vn4+ 0
361 | -vn5+ 0
362 | -vn6+ 0
363 | -x+ 1
364 | -z+ 1
365 | -zh+ 1
366 |
--------------------------------------------------------------------------------
/data/putonghua/text/_readme.txt:
--------------------------------------------------------------------------------
1 |
2 | These are the lexicon files from HCSI.
3 |
4 | 1) gbk.bin includes the pronunciations of Chinese characters, in CSV (comma-separated values) format: "Char,Pronunciaiton,POS,Frequency".
5 |
6 | gbk.bin 包括中文汉字的发音读法,使用CSV格式,每行包括:“汉字,读音,词性,词频”。
7 |
8 | 2) symbol.lex includes the pronunciations of English/Greek letters and words, also in CSV format. The pronunciation follows the CMU dictionary specification.
9 |
10 | symbol.lex 包括英文、希腊字母的读法,使用了CMU dictionary 的标注方法。
11 |
12 | 3) sttable.dat contains the mapping table for simple Traditional-Simplified Chinese and fullwidth-halfwidth form character conversion.
13 |
14 | sttable.dat 包括一个简单的映射表,用来对繁体中文-简体中文之间、半角-全角字符之间进行转换。
15 |
16 |
--------------------------------------------------------------------------------
/data/putonghua/text/sttable.dat:
--------------------------------------------------------------------------------
1 | !"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~万与丑专业丛东丝丢两严丧个丰临为丽举么义乌乐乔习乡书买乱争亏云亚产亩亲亿仅从仑仓仪们价众优伙会伞伟传伤伦伪体佣侠侣侥侦侧侨侩俩俭债倾偿储儿兑党兰关兴兹养兽内冈册写军农冯冲决况冻净凄凉凌减凑凛几凤凭凯击凿划刘则刚创删别刽剂剐剑剥剧劝办务动励劲劳势勋匀区医华协单卖卜卢卤卧卫却厂厅历厉压厌厕厢厦厨厩县叁参双发变叙叠叶号叹吁后吓吕吗吨听启吴呕员呛呜咏咙咸响哑哗哟唤啮啸喷嘘嘱嚣团园囱围国图圆圣场坏块坚坛坝坞坟坠垄垒垦垫堑堕墙壮声壳壶处备复够头夸夹夺奋奖奥妆妇妈娄娇娱婴婶孙学孪宁宝实宠审宪宫宽宾寝对寻导寿将尔尘尝尧尸尽层屉届属屡屿岁岂岗岛岭岳岿峡峦崭巩币帅师帐帘帜带帧帮幂干并广庄庆庐库应庙庞废开异弃张弥弯弹强归当录彝彦彻径御忆忧怀态怂怜总恋恳恶恼悦悬悯惊惧惨惩惫惭惮惯愤愿慑懒戏战户扎扑扦执扩扫扬扰抚抠抡抢护报抬担拟拢拣拥拦拧拨择挚挛挝挞挟挠挡挣挤挥捞损捡换捣据捻掳掷掸掺揽搀搁搂搅携摄摆摇摈摊撑撵擞攒敌敛数斋斗斩断无旧时旷昼显晋晒晓晕暂札术朴机杀杂权条来杨杰极构枢枣枪枫柜柠栅标栈栋栏树栖样档桥桨桩梦检椭楼槛横樱橱欢欧歼残殴毁毕毙毡气氢汇汉污汤汹沉沟没沤沥沦沧沪泞泪泻泼泽洁洒洼浅浆浇浊测济浑浓涂涌涛涝涟涡涣涤润涧涨涩淀渊渍渐渔渗温游湾湿溃溅滚滞满滤滥滦滨滩潍潜澜濒灭灯灵灶灾灿炉点炼炽烁烂烃烛烟烦烧烩烫烬热焕爱爷牵牺犊状犹狈狞独狭狮狰狱猎猪猫献獭玛环现珐琐琼瑶瓮电画畅畴疗疟疡疮疯痈痉痒痪痴瘪瘫癣皋皑皱盏盐监盖盗盘着睁瞒瞩矫矾矿码砖砚砾础硅硕确硷碍碱礼祷祸禄离秃秆种积称秸秽税稳穷窃窍窑窜窝窥竖竞笋笔笺笼筑筛筹签简箩篓篮篱类粤粪粮紧纠红纤约级纪纫纬纯纱纲纳纵纶纷纸纹纺纽线练组绅细织终绊绍绎经绑绒结绕绘给绚络绝绞统绢绣绥绦继绩绪续绰绳维绵绷绸综绽绿缀缄缅缆缉缎缓缔缕编缘缚缝缠缨缩缮缴网罗罚罢羡翘翱耸耻聂聋职联聪肃肠肤肾肿胀胁胆胜胶脉脏脐脑脓脚脱脸腊腻腾舆舰舱艰艳艺节芜芦苇苍苏苹范茎茧荆荐荚荡荣荤荧荫药莱莲获莹萝萤营萧萨葱蒋蓝蓟蔷蔼蕴虏虑虚虫虽虾蚀蚁蚂蚕蛊蛮蛰蜕蜗蜡蝇蝉蝎衅衔补衬袄袜袭装裤见观规觅视览觉触誉誊计订讣认讥讨让讫训议讯记讲讳讶许讹论讼讽设访诀证评诅识诈诉诊诌词译试诗诚诛话诞诡询诣该详诧诫诬语误诱诲说诵请诸诺读诽课谁调谅谆谈谊谋谍谎谐谓谗谚谜谢谣谤谦谨谩谬谭谰谱谴贝贞负贡财责贤败账货质贩贪贫贬购贮贯贰贱贴贵贷贸费贺贼贾贿赁赂赃资赊赋赌赎赏赐赔赖赘赚赛赞赠赡赢赣赵赶趋跃践踊踌踪蹿躯车轧轨轩转轮软轰轴轻载轿较辅辆辈辉辊辐辑输辕辖辗辙辞辩辫边辽达迁过迈运还这进远违连迟迭适选逊递逻遗遥邓邮邹邻郑郧郸酝酱酿释里鉴针钉钎钒钓钙钝钞钟钠钡钢钥钦钧钨钩钮钱钳钵钻钾铀铁铂铃铅铆铜铝铡铣铬铭铰铱铲银铸铺链销锁锄锅锈锋锌锐锑锗错锚锡锣锤锥锦锨锭键锯锰锹锻镀镁镇镊镍镐镑镜镣镭镰镶长门闪闭问闯闰闲间闷闸闹闺闻闽阀阁阂阅阉阎阐阑阔队阳阴阵阶际陆陇陈陕陨险随隐隶难雏雾霉静韦韧韩韵页顶顷项顺须顽顾顿颁颂预颅领颇颈颊颐频颓颖颗题颜额颠颤颧风飘飞饥饭饮饯饰饱饲饵饶饺饼饿馁馅馆馈馋馏馒马驭驮驯驰驱驳驴驶驹驻驼驾骂骄骆骇骋验骏骑骗骚骡骤鱼鲁鲍鲜鲤鲸鳃鳖鳞鸟鸡鸣鸥鸦鸭鸯鸳鸵鸽鸿鹃鹅鹊鹏鹤鹰麦黄齐齿龄龋龙龚龟﹫
2 | !"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~萬與醜專業叢東絲丟兩嚴喪個豐臨爲麗舉麽義烏樂喬習鄉書買亂爭虧雲亞産畝親億僅從侖倉儀們價衆優夥會傘偉傳傷倫僞體傭俠侶僥偵側僑儈倆儉債傾償儲兒兌黨蘭關興茲養獸內岡冊寫軍農馮沖決況凍淨淒涼淩減湊凜幾鳳憑凱擊鑿劃劉則剛創刪別劊劑剮劍剝劇勸辦務動勵勁勞勢勳勻區醫華協單賣蔔盧鹵臥衛卻廠廳曆厲壓厭廁廂廈廚廄縣三參雙發變敘疊葉號歎籲後嚇呂嗎噸聽啓吳嘔員嗆嗚詠嚨鹹響啞嘩喲喚齧嘯噴噓囑囂團園囪圍國圖圓聖場壞塊堅壇壩塢墳墜壟壘墾墊塹墮牆壯聲殼壺處備複夠頭誇夾奪奮獎奧妝婦媽婁嬌娛嬰嬸孫學孿甯寶實寵審憲宮寬賓寢對尋導壽將爾塵嘗堯屍盡層屜屆屬屢嶼歲豈崗島嶺嶽巋峽巒嶄鞏幣帥師帳簾幟帶幀幫冪幹並廣莊慶廬庫應廟龐廢開異棄張彌彎彈強歸當錄彜彥徹徑禦憶憂懷態慫憐總戀懇惡惱悅懸憫驚懼慘懲憊慚憚慣憤願懾懶戲戰戶紮撲扡執擴掃揚擾撫摳掄搶護報擡擔擬攏揀擁攔擰撥擇摯攣撾撻挾撓擋掙擠揮撈損撿換搗據撚擄擲撣摻攬攙擱摟攪攜攝擺搖擯攤撐攆擻攢敵斂數齋鬥斬斷無舊時曠晝顯晉曬曉暈暫劄術樸機殺雜權條來楊傑極構樞棗槍楓櫃檸柵標棧棟欄樹棲樣檔橋槳樁夢檢橢樓檻橫櫻櫥歡歐殲殘毆毀畢斃氈氣氫彙漢汙湯洶沈溝沒漚瀝淪滄滬濘淚瀉潑澤潔灑窪淺漿澆濁測濟渾濃塗湧濤澇漣渦渙滌潤澗漲澀澱淵漬漸漁滲溫遊灣濕潰濺滾滯滿濾濫灤濱灘濰潛瀾瀕滅燈靈竈災燦爐點煉熾爍爛烴燭煙煩燒燴燙燼熱煥愛爺牽犧犢狀猶狽獰獨狹獅猙獄獵豬貓獻獺瑪環現琺瑣瓊瑤甕電畫暢疇療瘧瘍瘡瘋癰痙癢瘓癡癟癱癬臯皚皺盞鹽監蓋盜盤著睜瞞矚矯礬礦碼磚硯礫礎矽碩確鹼礙堿禮禱禍祿離禿稈種積稱稭穢稅穩窮竊竅窯竄窩窺豎競筍筆箋籠築篩籌簽簡籮簍籃籬類粵糞糧緊糾紅纖約級紀紉緯純紗綱納縱綸紛紙紋紡紐線練組紳細織終絆紹繹經綁絨結繞繪給絢絡絕絞統絹繡綏縧繼績緒續綽繩維綿繃綢綜綻綠綴緘緬纜緝緞緩締縷編緣縛縫纏纓縮繕繳網羅罰罷羨翹翺聳恥聶聾職聯聰肅腸膚腎腫脹脅膽勝膠脈髒臍腦膿腳脫臉臘膩騰輿艦艙艱豔藝節蕪蘆葦蒼蘇蘋範莖繭荊薦莢蕩榮葷熒蔭藥萊蓮獲瑩蘿螢營蕭薩蔥蔣藍薊薔藹蘊虜慮虛蟲雖蝦蝕蟻螞蠶蠱蠻蟄蛻蝸蠟蠅蟬蠍釁銜補襯襖襪襲裝褲見觀規覓視覽覺觸譽謄計訂訃認譏討讓訖訓議訊記講諱訝許訛論訟諷設訪訣證評詛識詐訴診謅詞譯試詩誠誅話誕詭詢詣該詳詫誡誣語誤誘誨說誦請諸諾讀誹課誰調諒諄談誼謀諜謊諧謂讒諺謎謝謠謗謙謹謾謬譚讕譜譴貝貞負貢財責賢敗賬貨質販貪貧貶購貯貫貳賤貼貴貸貿費賀賊賈賄賃賂贓資賒賦賭贖賞賜賠賴贅賺賽贊贈贍贏贛趙趕趨躍踐踴躊蹤躥軀車軋軌軒轉輪軟轟軸輕載轎較輔輛輩輝輥輻輯輸轅轄輾轍辭辯辮邊遼達遷過邁運還這進遠違連遲叠適選遜遞邏遺遙鄧郵鄒鄰鄭鄖鄲醞醬釀釋裏鑒針釘釺釩釣鈣鈍鈔鍾鈉鋇鋼鑰欽鈞鎢鈎鈕錢鉗缽鑽鉀鈾鐵鉑鈴鉛鉚銅鋁鍘銑鉻銘鉸銥鏟銀鑄鋪鏈銷鎖鋤鍋鏽鋒鋅銳銻鍺錯錨錫鑼錘錐錦鍁錠鍵鋸錳鍬鍛鍍鎂鎮鑷鎳鎬鎊鏡鐐鐳鐮鑲長門閃閉問闖閏閑間悶閘鬧閨聞閩閥閣閡閱閹閻闡闌闊隊陽陰陣階際陸隴陳陝隕險隨隱隸難雛霧黴靜韋韌韓韻頁頂頃項順須頑顧頓頒頌預顱領頗頸頰頤頻頹穎顆題顔額顛顫顴風飄飛饑飯飲餞飾飽飼餌饒餃餅餓餒餡館饋饞餾饅馬馭馱馴馳驅駁驢駛駒駐駝駕罵驕駱駭騁驗駿騎騙騷騾驟魚魯鮑鮮鯉鯨鰓鼈鱗鳥雞鳴鷗鴉鴨鴦鴛鴕鴿鴻鵑鵝鵲鵬鶴鷹麥黃齊齒鼀齲龍龔龜@
3 | !"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~丟並亂亞來侖侶俠倆倉個們倫偉側偵傑傘備傭傳債傷傾僅僑僞僥價儀億儈儉償優儲兌兒內兩冊冪凍凜凱別刪則剛剝剮創劃劄劇劉劊劍劑勁動務勝勞勢勳勵勸勻區協卻厭厲參叢吳呂員問啓啞喚喪喬單喲嗆嗎嗚嘔嘗嘩嘯噓噴噸嚇嚨嚴囂囑囪國圍園圓圖團執堅堯報場堿塊塗塢塵塹墊墜墮墳墾壇壓壘壞壟壩壯壺壽夠夢夾奧奪奮妝娛婁婦媽嬌嬰嬸孫學孿宮寢實審寫寬寵寶將專尋對導屆屍屜屢層屬岡島峽崗嶄嶺嶼嶽巋巒帥師帳帶幀幟幣幫幹幾庫廁廂廄廈廚廟廠廢廣廬廳張強彈彌彎彙彜彥後徑從徹恥悅悶惡惱愛態慘慚慣慫慮慶憂憊憐憑憚憤憫憲憶懇應懲懶懷懸懼懾戀戰戲戶扡挾掃掄掙揀揚換揮損搖搗搶摟摯摳摻撈撐撓撚撣撥撫撲撻撾撿擁擄擇擊擋擔據擠擡擬擯擰擱擲擴擺擻擾攆攏攔攙攜攝攢攣攤攪攬敗敘敵數斂斃斬斷時晉晝暈暢暫曆曉曠曬書會東柵條棄棗棟棧棲楊楓業極榮構槍槳樁樂樓標樞樣樸樹橋機橢橫檔檢檸檻櫃櫥櫻欄權欽歎歐歡歲歸殘殲殺殼毀毆氈氣氫汙決沒沖況洶涼淒淚淨淩淪淵淺渙減渦測渾湊湧湯溝溫滄滅滌滬滯滲滾滿漁漚漢漣漬漲漸漿潑潔潛潤潰澀澆澇澗澤澱濁濃濕濘濟濤濫濰濱濺濾瀉瀕瀝瀾灑灘灣灤災烏烴無煉煙煥煩熒熱熾燈燒燙營燦燭燴燼爍爐爛爭爲爺爾牆牽犢犧狀狹狽猙猶獄獅獎獨獰獲獵獸獺獻現琺瑣瑤瑩瑪環瓊甕産甯畝畢畫異當疇疊痙瘋瘍瘓瘡瘧療癟癡癢癬癰癱發皚皺盜盞盡監盤盧睜瞞矚矯硯碩確碼磚礎礙礦礫礬祿禍禦禮禱禿稅稈稭種稱積穎穢穩窩窪窮窯窺竄竅竈竊競筆筍箋節範築篩簍簡簽簾籃籌籠籬籮籲粵糞糧糾紀約紅紉紋納紐純紗紙級紛紡紮細紳紹終組絆結絕絞絡絢給絨統絲絹綁綏經綜綠綢維綱網綴綸綻綽綿緊緒緘線緝緞締緣編緩緬緯練縛縣縧縫縮縱縷總績繃織繕繞繡繩繪繭繳繹繼續纏纓纖纜缽罰罵罷羅羨義習翹翺聖聞聯聰聲聳聶職聽聾肅脅脈脫脹腎腦腫腳腸膚膠膩膽膿臉臍臘臥臨臯與興舉舊艙艦艱茲荊莊莖莢華萊萬葉葦葷蒼蓋蓮蔔蔣蔥蔭蕩蕪蕭薊薔薦薩藍藝藥藹蘆蘇蘊蘋蘭蘿處虛虜號虧蛻蝕蝦蝸螞螢蟄蟬蟲蟻蠅蠍蠟蠱蠶蠻衆術衛裏補裝複褲襖襪襯襲見規覓視親覺覽觀觸訂訃計訊討訓訖記訛訝訟訣訪設許訴診詐評詛詞詠詢詣試詩詫詭話該詳誅誇認誕誘語誠誡誣誤誦誨說誰課誹誼調諄談請諒論諜諧諱諷諸諺諾謀謂謄謅謊謎謗謙講謝謠謬謹謾證譏識譚譜譯議譴護譽讀變讒讓讕豈豎豐豔豬貓貝貞負財貢貧貨販貪貫責貯貳貴貶買貸費貼貿賀賂賃賄資賈賊賒賓賜賞賠賢賣賤賦質賬賭賴賺購賽贅贈贊贍贏贓贖贛趕趙趨踐踴蹤躊躍躥軀車軋軌軍軒軟軸較載輔輕輛輝輥輩輪輯輸輻輾輿轄轅轉轍轎轟辦辭辮辯農這連進遊運過達違遙遜遞遠適遲遷選遺遼邁還邊邏郵鄉鄒鄖鄧鄭鄰鄲醜醞醫醬釀釁釋釘針釣釩釺鈉鈍鈎鈔鈕鈞鈣鈴鈾鉀鉑鉗鉚鉛鉸鉻銀銅銑銘銜銥銳銷銻鋁鋅鋇鋒鋤鋪鋸鋼錄錐錘錠錢錦錨錫錯錳鍁鍋鍍鍘鍛鍬鍵鍺鍾鎂鎊鎖鎢鎬鎮鎳鏈鏟鏡鏽鐐鐮鐳鐵鑄鑒鑰鑲鑷鑼鑽鑿長門閃閉開閏閑間閘閡閣閥閨閩閱閹閻闊闌闖關闡陝陣陰陳陸陽隊階隕際隨險隱隴隸雖雙雛雜雞離難雲電霧靈靜鞏韋韌韓韻響頁頂頃項順須頌預頑頒頓頗領頤頭頰頸頹頻顆題額顔願顛類顧顫顯顱顴風飄飛飯飲飼飽飾餃餅養餌餒餓餞餡館餾饅饋饑饒饞馬馭馮馱馳馴駁駐駒駕駛駝駭駱駿騁騎騙騰騷騾驅驕驗驚驟驢髒體鬥鬧魚魯鮑鮮鯉鯨鰓鱗鳥鳳鳴鴉鴕鴛鴦鴨鴻鴿鵑鵝鵬鵲鶴鷗鷹鹵鹹鹼鹽麗麥黃點黨黴鼀鼈齊齋齒齧齲龍龐龔龜﹫
4 | !"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~丢并乱亚来仑侣侠俩仓个们伦伟侧侦杰伞备佣传债伤倾仅侨伪侥价仪亿侩俭偿优储兑儿内两册幂冻凛凯别删则刚剥剐创划札剧刘刽剑剂劲动务胜劳势勋励劝匀区协却厌厉参丛吴吕员问启哑唤丧乔单哟呛吗呜呕尝哗啸嘘喷吨吓咙严嚣嘱囱国围园圆图团执坚尧报场碱块涂坞尘堑垫坠堕坟垦坛压垒坏垄坝壮壶寿够梦夹奥夺奋妆娱娄妇妈娇婴婶孙学孪宫寝实审写宽宠宝将专寻对导届尸屉屡层属冈岛峡岗崭岭屿岳岿峦帅师帐带帧帜币帮干几库厕厢厩厦厨庙厂废广庐厅张强弹弥弯汇彝彦后径从彻耻悦闷恶恼爱态惨惭惯怂虑庆忧惫怜凭惮愤悯宪忆恳应惩懒怀悬惧慑恋战戏户扦挟扫抡挣拣扬换挥损摇捣抢搂挚抠掺捞撑挠捻掸拨抚扑挞挝捡拥掳择击挡担据挤抬拟摈拧搁掷扩摆擞扰撵拢拦搀携摄攒挛摊搅揽败叙敌数敛毙斩断时晋昼晕畅暂历晓旷晒书会东栅条弃枣栋栈栖杨枫业极荣构枪桨桩乐楼标枢样朴树桥机椭横档检柠槛柜橱樱栏权钦叹欧欢岁归残歼杀壳毁殴毡气氢污决没冲况汹凉凄泪净凌沦渊浅涣减涡测浑凑涌汤沟温沧灭涤沪滞渗滚满渔沤汉涟渍涨渐浆泼洁潜润溃涩浇涝涧泽淀浊浓湿泞济涛滥潍滨溅滤泻濒沥澜洒滩湾滦灾乌烃无炼烟焕烦荧热炽灯烧烫营灿烛烩烬烁炉烂争为爷尔墙牵犊牺状狭狈狰犹狱狮奖独狞获猎兽獭献现珐琐瑶莹玛环琼瓮产宁亩毕画异当畴叠痉疯疡痪疮疟疗瘪痴痒癣痈瘫发皑皱盗盏尽监盘卢睁瞒瞩矫砚硕确码砖础碍矿砾矾禄祸御礼祷秃税秆秸种称积颖秽稳窝洼穷窑窥窜窍灶窃竞笔笋笺节范筑筛篓简签帘篮筹笼篱箩吁粤粪粮纠纪约红纫纹纳纽纯纱纸级纷纺扎细绅绍终组绊结绝绞络绚给绒统丝绢绑绥经综绿绸维纲网缀纶绽绰绵紧绪缄线缉缎缔缘编缓缅纬练缚县绦缝缩纵缕总绩绷织缮绕绣绳绘茧缴绎继续缠缨纤缆钵罚骂罢罗羡义习翘翱圣闻联聪声耸聂职听聋肃胁脉脱胀肾脑肿脚肠肤胶腻胆脓脸脐腊卧临皋与兴举旧舱舰艰兹荆庄茎荚华莱万叶苇荤苍盖莲卜蒋葱荫荡芜萧蓟蔷荐萨蓝艺药蔼芦苏蕴苹兰萝处虚虏号亏蜕蚀虾蜗蚂萤蛰蝉虫蚁蝇蝎蜡蛊蚕蛮众术卫里补装复裤袄袜衬袭见规觅视亲觉览观触订讣计讯讨训讫记讹讶讼诀访设许诉诊诈评诅词咏询诣试诗诧诡话该详诛夸认诞诱语诚诫诬误诵诲说谁课诽谊调谆谈请谅论谍谐讳讽诸谚诺谋谓誊诌谎谜谤谦讲谢谣谬谨谩证讥识谭谱译议谴护誉读变谗让谰岂竖丰艳猪猫贝贞负财贡贫货贩贪贯责贮贰贵贬买贷费贴贸贺赂赁贿资贾贼赊宾赐赏赔贤卖贱赋质账赌赖赚购赛赘赠赞赡赢赃赎赣赶赵趋践踊踪踌跃蹿躯车轧轨军轩软轴较载辅轻辆辉辊辈轮辑输辐辗舆辖辕转辙轿轰办辞辫辩农这连进游运过达违遥逊递远适迟迁选遗辽迈还边逻邮乡邹郧邓郑邻郸丑酝医酱酿衅释钉针钓钒钎钠钝钩钞钮钧钙铃铀钾铂钳铆铅铰铬银铜铣铭衔铱锐销锑铝锌钡锋锄铺锯钢录锥锤锭钱锦锚锡错锰锨锅镀铡锻锹键锗钟镁镑锁钨镐镇镍链铲镜锈镣镰镭铁铸鉴钥镶镊锣钻凿长门闪闭开闰闲间闸阂阁阀闺闽阅阉阎阔阑闯关阐陕阵阴陈陆阳队阶陨际随险隐陇隶虽双雏杂鸡离难云电雾灵静巩韦韧韩韵响页顶顷项顺须颂预顽颁顿颇领颐头颊颈颓频颗题额颜愿颠类顾颤显颅颧风飘飞饭饮饲饱饰饺饼养饵馁饿饯馅馆馏馒馈饥饶馋马驭冯驮驰驯驳驻驹驾驶驼骇骆骏骋骑骗腾骚骡驱骄验惊骤驴脏体斗闹鱼鲁鲍鲜鲤鲸鳃鳞鸟凤鸣鸦鸵鸳鸯鸭鸿鸽鹃鹅鹏鹊鹤鸥鹰卤咸硷盐丽麦黄点党霉龄鳖齐斋齿啮龋龙庞龚龟@
5 |
--------------------------------------------------------------------------------
/data/putonghua/text/symbol.lex:
--------------------------------------------------------------------------------
1 | zh-cmn,x-pinyin
2 | @,_AE1/T,x,1
3 | A,_EY1,x,1
4 | B,_B/IY1,x,1
5 | C,_S/IY1,x,1
6 | D,_D/IY1,x,1
7 | E,_IY1,x,1
8 | F,_EH1/F,x,1
9 | G,_JH/IY1,x,1
10 | H,_EY1/CH,x,1
11 | I,_AY1,x,1
12 | J,_JH/EY1,x,1
13 | K,_K/EY1,x,1
14 | L,_EH1/L,x,1
15 | M,_EH1/M,x,1
16 | N,_EH1/N,x,1
17 | O,_OW1,x,1
18 | P,_P/IY1,x,1
19 | Q,_K/Y/UW1,x,1
20 | R,_AA1/R,x,1
21 | S,_EH1/S,x,1
22 | T,_T/IY1,x,1
23 | U,_Y/UW1,x,1
24 | V,_V/IY1,x,1
25 | W,_D/AH1_B/AH0/L_Y/UW0,x,1
26 | X,_EH1/K/S,x,1
27 | Y,_W/AY1,x,1
28 | Z,_Z/IY1,x,1
29 | a,_EY1,x,1
30 | b,_B/IY1,x,1
31 | c,_S/IY1,x,1
32 | d,_D/IY1,x,1
33 | e,_IY1,x,1
34 | f,_EH1/F,x,1
35 | g,_JH/IY1,x,1
36 | h,_EY1/CH,x,1
37 | i,_AY1,x,1
38 | j,_JH/EY1,x,1
39 | k,_K/EY1,x,1
40 | l,_EH1/L,x,1
41 | m,_EH1/M,x,1
42 | n,_EH1/N,x,1
43 | o,_OW1,x,1
44 | p,_P/IY1,x,1
45 | q,_K/Y/UW1,x,1
46 | r,_AA1/R,x,1
47 | s,_EH1/S,x,1
48 | t,_T/IY1,x,1
49 | u,_Y/UW1,x,1
50 | v,_V/IY1,x,1
51 | w,_D/AH1_B/AH0/L_Y/UW0,x,1
52 | x,_EH1/K/S,x,1
53 | y,_W/AY1,x,1
54 | z,_Z/IY1,x,1
55 | Α,_AA1_L_F/AH0,x,1
56 | Β,_B/EY1_T/AH0,x,1
57 | Γ,_G/AA1_M/AA0,x,1
58 | Δ,_D/EH0_L_T/AH0,x,1
59 | Ε,_EH1/P_S/AH0_L/AA2/N,x,1
60 | Ζ,_Z/EY1_T/AH0,x,1
61 | Η,_IY1_T/AH0,x,1
62 | Θ,_TH/EY1_T/AH0,x,1
63 | Ι,_AY0_OW1_T/AH0,x,1
64 | Κ,_K/AE1_P/AH0,x,1
65 | Λ,_L/AE1/M_D/AH0,x,1
66 | Μ,_M/Y/UW1,x,1
67 | Ν,_N/Y/UW1,x,1
68 | Ξ,_Z/AY1,x,1
69 | Ο,_OW0_M/AY1_K/R/AA2/N,x,1
70 | Π,_P/AY1,x,1
71 | Ρ,_R/OW1,x,1
72 | Σ,_S/IH1/G_M/AH0,x,1
73 | ς,_S/IH1/G_M/AH0,x,1
74 | Τ,_T/AW1,x,1
75 | Υ,_AH1/P_S/AY1_L/AA2/N,x,1
76 | Φ,_F/AY1,x,1
77 | Χ,_K/AY1,x,1
78 | Ψ,_P/S/AY1,x,1
79 | Ω,_OW0_M/IH1_G/AH0,x,1
80 | α,_AA1_L_F/AH0,x,1
81 | β,_B/EY1_T/AH0,x,1
82 | γ,_G/AA1_M/AA0,x,1
83 | δ,_D/EH0_L_T/AH0,x,1
84 | ε,_EH1/P_S/AH0_L/AA2/N,x,1
85 | ζ,_Z/EY1_T/AH0,x,1
86 | η,_IY1_T/AH0,x,1
87 | θ,_TH/EY1_T/AH0,x,1
88 | ι,_AY0_OW1_T/AH0,x,1
89 | κ,_K/AE1_P/AH0,x,1
90 | λ,_L/AE1/M_D/AH0,x,1
91 | μ,_M/Y/UW1,x,1
92 | ν,_N/Y/UW1,x,1
93 | ξ,_Z/AY1,x,1
94 | ο,_OW0_M/AY1_K/R/AA2/N,x,1
95 | π,_P/AY1,x,1
96 | ρ,_R/OW1,x,1
97 | σ,_S/IH1/G_M/AH0,x,1
98 | τ,_T/AW1,x,1
99 | υ,_AH1/P_S/AY1_L/AA2/N,x,1
100 | φ,_F/AY1,x,1
101 | χ,_K/AY1,x,1
102 | ψ,_P/S/AY1,x,1
103 | ω,_OW0_M/IH1_G/AH0,x,1
104 | china,_CH/AY1_N/AH0,x,1
105 | hello,_HH/EH0_L/OW1,x,1
106 | hi,_HH/AY1,x,1
107 |
--------------------------------------------------------------------------------
/demo/CrystalTest/ReadMe.txt:
--------------------------------------------------------------------------------
1 | CrystalTest:
2 |
3 | Text project illustrating the detailed processing procedures of Crystal Text-to-Speech (TTS) engine.
4 |
--------------------------------------------------------------------------------
/demo/CrystalTest/main.cpp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/thuhcsi/Crystal/d129e5400360e7c696d071c1576803917aaf9286/demo/CrystalTest/main.cpp
--------------------------------------------------------------------------------
/document/docs/2007 TTS Framework.ppt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/thuhcsi/Crystal/d129e5400360e7c696d071c1576803917aaf9286/document/docs/2007 TTS Framework.ppt
--------------------------------------------------------------------------------
/document/docs/Development Guide.doc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/thuhcsi/Crystal/d129e5400360e7c696d071c1576803917aaf9286/document/docs/Development Guide.doc
--------------------------------------------------------------------------------
/document/docs/Module Division and SSML Specification for Crystal.doc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/thuhcsi/Crystal/d129e5400360e7c696d071c1576803917aaf9286/document/docs/Module Division and SSML Specification for Crystal.doc
--------------------------------------------------------------------------------
/document/docs/Readme.doc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/thuhcsi/Crystal/d129e5400360e7c696d071c1576803917aaf9286/document/docs/Readme.doc
--------------------------------------------------------------------------------
/document/docs/Support of SSML 1.1.doc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/thuhcsi/Crystal/d129e5400360e7c696d071c1576803917aaf9286/document/docs/Support of SSML 1.1.doc
--------------------------------------------------------------------------------
/document/docs/XML & SSML Introduction.ppt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/thuhcsi/Crystal/d129e5400360e7c696d071c1576803917aaf9286/document/docs/XML & SSML Introduction.ppt
--------------------------------------------------------------------------------
/document/docs/architecture.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/thuhcsi/Crystal/d129e5400360e7c696d071c1576803917aaf9286/document/docs/architecture.png
--------------------------------------------------------------------------------
/document/docs/dynamic_module_loading.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/thuhcsi/Crystal/d129e5400360e7c696d071c1576803917aaf9286/document/docs/dynamic_module_loading.png
--------------------------------------------------------------------------------
/document/docs/interface.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/thuhcsi/Crystal/d129e5400360e7c696d071c1576803917aaf9286/document/docs/interface.png
--------------------------------------------------------------------------------
/document/docs/ssml_interface.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/thuhcsi/Crystal/d129e5400360e7c696d071c1576803917aaf9286/document/docs/ssml_interface.png
--------------------------------------------------------------------------------
/document/readme.txt:
--------------------------------------------------------------------------------
1 |
2 | Please read "docs/Readme.doc" carefully first.
3 |
--------------------------------------------------------------------------------
/engine/csttools/cmn/cmn_error.h:
--------------------------------------------------------------------------------
1 | //
2 | // Crystal Text-to-Speech Engine
3 | //
4 | // Copyright (c) 2007 THU-CUHK Joint Research Center for
5 | // Media Sciences, Technologies and Systems. All rights reserved.
6 | //
7 | // http://mjrc.sz.tsinghua.edu.cn
8 | //
9 | // Redistribution and use in source and binary forms, with or without
10 | // modification, is not allowed, unless a valid written license is
11 | // granted by THU-CUHK Joint Research Center.
12 | //
13 | // THU-CUHK Joint Research Center has the rights to create, modify,
14 | // copy, compile, remove, rename, explain and deliver the source codes.
15 | //
16 |
17 |
18 | ///
19 | /// @file
20 | ///
21 | /// @brief
22 | ///
23 | /// @version 0.1.0
24 | /// @date 2007/06/05
25 | ///
26 | /// History:
27 | /// - Version: 0.1.0
28 | /// Author: Jackie (jackiecao@gmail.com)
29 | /// Date: 2007/06/05
30 | /// Changed: Created
31 | ///
32 |
33 |
34 | #ifndef _CST_TTS_BASE_CMN_ERROR_H_
35 | #define _CST_TTS_BASE_CMN_ERROR_H_
36 |
37 | #include
38 |
39 | namespace cst
40 | {
41 |
42 | // Part from WinError.h
43 |
44 | //
45 | // MessageId: ERROR_SUCCESS
46 | //
47 | // MessageText:
48 | //
49 | // The operation completed successfully.
50 | //
51 | #define ERROR_SUCCESS 0L
52 |
53 |
54 | //
55 | // MessageId: ERROR_BAD_FORMAT
56 | //
57 | // MessageText:
58 | //
59 | // An attempt was made to load a program with an incorrect format.
60 | //
61 | #define ERROR_BAD_FORMAT 11L
62 |
63 |
64 | // MessageId: ERROR_INVALID_PARAMETER
65 | //
66 | // MessageText:
67 | //
68 | // The parameter is incorrect.
69 | //
70 | #define ERROR_INVALID_PARAMETER 87L
71 |
72 | //
73 | // MessageId: ERROR_OPEN_FAILED
74 | //
75 | // MessageText:
76 | //
77 | // The system cannot open the device or file specified.
78 | //
79 | #define ERROR_OPEN_FAILED 110L
80 |
81 | //
82 | // MessageId: ERROR_OUTOFMEMORY
83 | //
84 | // MessageText:
85 | //
86 | // Not enough storage is available to complete this operation.
87 | //
88 | #define ERROR_OUTOFMEMORY 14L
89 |
90 | #define ERROR_MAX_EXIST_ERROR_NO 13884L
91 |
92 | // User defined error code
93 | #define ERROR_ENGINE_TYPE_INCORRECT ( ERROR_MAX_EXIST_ERROR_NO + 1 )
94 | #define ERROR_INVALID_SSML_DOCUMENT ( ERROR_MAX_EXIST_ERROR_NO + 2 )
95 | #define ERROR_NOT_INITIALIZED ( ERROR_MAX_EXIST_ERROR_NO + 3 )
96 | #define ERROR_UNSUPPORTED_LANGUAGE ( ERROR_MAX_EXIST_ERROR_NO + 4 )
97 | #define ERROR_DATA_READ_FAULT ( ERROR_MAX_EXIST_ERROR_NO + 5 )
98 | #define ERROR_NOT_IMPLEMENTED ( ERROR_MAX_EXIST_ERROR_NO + 6 )
99 |
100 |
101 | } // End of namespace cst
102 |
103 | #endif // End of _CST_TTS_BASE_CMN_ERROR_H_
--------------------------------------------------------------------------------
/engine/csttools/cmn/cmn_file.cpp:
--------------------------------------------------------------------------------
1 | //
2 | // Crystal Text-to-Speech Engine
3 | //
4 | // Copyright (c) 2007 THU-CUHK Joint Research Center for
5 | // Media Sciences, Technologies and Systems. All rights reserved.
6 | //
7 | // http://mjrc.sz.tsinghua.edu.cn
8 | //
9 | // Redistribution and use in source and binary forms, with or without
10 | // modification, is not allowed, unless a valid written license is
11 | // granted by THU-CUHK Joint Research Center.
12 | //
13 | // THU-CUHK Joint Research Center has the rights to create, modify,
14 | // copy, compile, remove, rename, explain and deliver the source codes.
15 | //
16 |
17 |
18 | ///
19 | /// @file
20 | ///
21 | /// @brief Implementation file of encapsulated file manipulation.
22 | ///
23 | /// History:
24 | /// - Version: 0.1.0
25 | /// Author: John (john.zywu@gmail.com)
26 | /// Date: 2007/06/14
27 | /// Changed: Created
28 | ///
29 |
30 | #include "cmn_file.h"
31 | #include
32 |
33 | namespace cst
34 | {
35 | namespace cmn
36 | {
37 | FILE* wfopen(const wchar_t *filename, const wchar_t *mode)
38 | {
39 | #if defined(WIN32) || defined(WINCE)
40 | // call _wfopen directly
41 | FILE* fp = NULL;
42 | _wfopen_s(&fp, filename, mode);
43 | return fp;
44 | #elif defined(__GNUC__)
45 | // convert from UTF-16 to UTF-8, and
46 | // call fopen
47 | std::string mbsName = str::wcstombs(filename);
48 | std::string mbsMode = str::wcstombs(mode);
49 | return fopen(mbsName.c_str(), mbsMode.c_str());
50 | #else
51 | return NULL;
52 | #endif
53 | }
54 |
55 | bool CFile::open(const wchar_t *filename, const wchar_t *mode)
56 | {
57 | if (m_pFile!=NULL)
58 | close();
59 | m_pFile = wfopen(filename, mode);
60 | return (m_pFile!=NULL);
61 | }
62 |
63 | bool CFile::close()
64 | {
65 | if (m_pFile==NULL || fclose(m_pFile)==0)
66 | {
67 | m_pFile = NULL;
68 | return true;
69 | }
70 | return false;
71 | }
72 |
73 | size_t CFile::read(void *buffer, size_t size, size_t count) const
74 | {
75 | if (m_pFile==NULL)
76 | return 0;
77 | return fread(buffer, size, count, m_pFile);
78 | }
79 |
80 | size_t CFile::write(const void *buffer, size_t size, size_t count) const
81 | {
82 | if (m_pFile==NULL || buffer==NULL)
83 | return 0;
84 | return fwrite(buffer, size, count, m_pFile);
85 | }
86 |
87 | bool CFile::seek(long long offset, int origin) const
88 | {
89 | if (m_pFile==NULL)
90 | return false;
91 | #if defined(WIN32) || defined(WINCE)
92 | if (_fseeki64(m_pFile, offset, origin)!=0)
93 | return false;
94 | #else
95 | if (fseek(m_pFile, offset, origin)!=0)
96 | return false;
97 | #endif
98 | return true;
99 | }
100 |
101 | long long CFile::tell() const
102 | {
103 | if (m_pFile==NULL)
104 | return -1;
105 | #if defined(WIN32) || defined(WINCE)
106 | return _ftelli64(m_pFile);
107 | #else
108 | return ftell(m_pFile);
109 | #endif
110 | }
111 | }
112 | }
113 |
--------------------------------------------------------------------------------
/engine/csttools/cmn/cmn_file.h:
--------------------------------------------------------------------------------
1 | //
2 | // Crystal Text-to-Speech Engine
3 | //
4 | // Copyright (c) 2007 THU-CUHK Joint Research Center for
5 | // Media Sciences, Technologies and Systems. All rights reserved.
6 | //
7 | // http://mjrc.sz.tsinghua.edu.cn
8 | //
9 | // Redistribution and use in source and binary forms, with or without
10 | // modification, is not allowed, unless a valid written license is
11 | // granted by THU-CUHK Joint Research Center.
12 | //
13 | // THU-CUHK Joint Research Center has the rights to create, modify,
14 | // copy, compile, remove, rename, explain and deliver the source codes.
15 | //
16 |
17 |
18 | ///
19 | /// @file
20 | ///
21 | /// @brief Definition of encapsulated file manipulation.
22 | ///
23 | /// History:
24 | /// - Version: 0.1.0
25 | /// Author: John (john.zywu@gmail.com)
26 | /// Date: 2007/06/14
27 | /// Changed: Created
28 | ///
29 |
30 | #ifndef _CST_TOOLS_CMN_FILE_H_
31 | #define _CST_TOOLS_CMN_FILE_H_
32 |
33 | #include
34 |
35 | namespace cst
36 | {
37 | namespace cmn
38 | {
39 | ///
40 | /// @brief Open the file with specified mode (wide-character version).
41 | /// Return the handle to the opened file, which can be closed with fclose().
42 | ///
43 | /// @param [in] filename The name of the file to be opened
44 | /// @param [in] mode Type of access permitted, takes the values as defined in ""
45 | ///
46 | /// @return Handle to the opened file. NULL if open failed.
47 | ///
48 | /// @note On Windows, this function calls _wfopen() directly.
49 | /// On Linux, this function first converts the parameters to UTF-8, and then calls fopen().
50 | ///
51 | FILE* wfopen(const wchar_t *filename, const wchar_t *mode);
52 |
53 |
54 | ///
55 | /// @brief The base encapsulated class for file manipulation
56 | ///
57 | /// This encapsulated class is provided for easy file manipulation
58 | /// by hiding the FILE related functions for later easy migration.
59 | ///
60 | class CFile
61 | {
62 | public:
63 | ///
64 | /// @brief Default constructor
65 | ///
66 | CFile() : m_pFile(NULL) {}
67 |
68 | ///
69 | /// @brief Default destructor
70 | ///
71 | virtual ~CFile() {close();}
72 |
73 | ///
74 | /// @brief Open the file with specified mode
75 | ///
76 | /// @param [in] filename The name of the file to be opened
77 | /// @param [in] mode Type of access permitted, takes the values as defined in ""
78 | ///
79 | /// @return Whether filename is opened successfully or not
80 | ///
81 | virtual bool open(const wchar_t *filename, const wchar_t *mode);
82 |
83 | ///
84 | /// @brief Close the file if opened
85 | ///
86 | /// @return Whether file is closed successfully or not
87 | ///
88 | virtual bool close();
89 |
90 | ///
91 | /// @brief Moves the file pointer to a specified location
92 | ///
93 | /// @param [in] offset Number of bytes from origin
94 | /// @param [in] origin Initial position, takes the values of SEEK_CUR, SEEK_END, SEEK_SET as defined in ""
95 | ///
96 | /// @return Whether operation is successful or not
97 | ///
98 | virtual bool seek(long long offset, int origin) const;
99 |
100 | ///
101 | /// @brief Gets the current position of the file pointer
102 | ///
103 | /// @return The current position of the file pointer
104 | ///
105 | virtual long long tell() const;
106 |
107 | ///
108 | /// @brief Read the data from file
109 | ///
110 | /// @param [out] buffer Storage location for data
111 | /// @param [in] size Item size in bytes
112 | /// @param [in] count Maximum number of items to be read
113 | ///
114 | /// @return The number of full items actually read
115 | ///
116 | virtual size_t read(void *buffer, size_t size, size_t count) const;
117 |
118 | ///
119 | /// @brief Write the data to file
120 | ///
121 | /// @param [in] buffer Pointer to data to be written
122 | /// @param [in] size Item size in bytes
123 | /// @param [in] count Maximum number of items to be written
124 | /// @return The number of full items actually written
125 | ///
126 | virtual size_t write(const void *buffer, size_t size, size_t count) const;
127 |
128 | ///
129 | /// @brief Detect whether file has been opened
130 | ///
131 | virtual bool opened() const
132 | {
133 | return (m_pFile != NULL);
134 | }
135 |
136 | protected:
137 | FILE *m_pFile; ///< File handle
138 | };
139 |
140 | }
141 | }
142 |
143 | #endif//_CST_TOOLS_CMN_FILE_H_
144 |
--------------------------------------------------------------------------------
/engine/csttools/cmn/cmn_stack.h:
--------------------------------------------------------------------------------
1 | //
2 | // Crystal Text-to-Speech Engine
3 | //
4 | // Copyright (c) 2007 THU-CUHK Joint Research Center for
5 | // Media Sciences, Technologies and Systems. All rights reserved.
6 | //
7 | // http://mjrc.sz.tsinghua.edu.cn
8 | //
9 | // Redistribution and use in source and binary forms, with or without
10 | // modification, is not allowed, unless a valid written license is
11 | // granted by THU-CUHK Joint Research Center.
12 | //
13 | // THU-CUHK Joint Research Center has the rights to create, modify,
14 | // copy, compile, remove, rename, explain and deliver the source codes.
15 | //
16 |
17 |
18 | ///
19 | /// @file
20 | ///
21 | /// @brief Definition of encapsulated stack manipulation.
22 | ///
23 | /// History:
24 | /// - Version: 0.1.0
25 | /// Author: Jackie (jackiecao@gmail.com)
26 | /// Date: 2007/05/21
27 | /// Changed: Created by adding an encapsulated stack class
28 | ///
29 |
30 | #ifndef _CST_TOOLS_CMN_STACK_H_
31 | #define _CST_TOOLS_CMN_STACK_H_
32 |
33 | #include
34 |
35 | namespace cst
36 | {
37 | namespace cmn
38 | {
39 | ///
40 | /// @brief Encapsulated stack class
41 | ///
42 | /// This encapsulated stack class is provided to avoid direct use of "std" related class
43 | /// for later easy migration.
44 | ///
45 | template
46 | class stack : public std::stack
47 | {
48 | };
49 |
50 | }
51 | }
52 |
53 | #endif//_CST_TOOLS_CMN_STACK_H_
54 |
--------------------------------------------------------------------------------
/engine/csttools/cmn/cmn_string.cpp:
--------------------------------------------------------------------------------
1 | //
2 | // Crystal Text-to-Speech Engine
3 | //
4 | // Copyright (c) 2007 THU-CUHK Joint Research Center for
5 | // Media Sciences, Technologies and Systems. All rights reserved.
6 | //
7 | // http://mjrc.sz.tsinghua.edu.cn
8 | //
9 | // Redistribution and use in source and binary forms, with or without
10 | // modification, is not allowed, unless a valid written license is
11 | // granted by THU-CUHK Joint Research Center.
12 | //
13 | // THU-CUHK Joint Research Center has the rights to create, modify,
14 | // copy, compile, remove, rename, explain and deliver the source codes.
15 | //
16 |
17 |
18 | ///
19 | /// @file
20 | ///
21 | /// @brief Implementation of encapsulated string manipulation.
22 | ///
23 | /// History:
24 | /// - Version: 0.1.0
25 | /// Author: Jackie (jackiecao@gmail.com)
26 | /// Date: 2007/05/21
27 | /// Changed: Created by adding an encapsulated string class
28 | ///
29 |
30 | #include
31 | #include "cmn_string.h"
32 |
33 |
34 | namespace cst
35 | {
36 | namespace str
37 | {
38 | int snwprintf(wchar_t *buffer, size_t count, const wchar_t *format, ...)
39 | {
40 | #pragma warning( push )
41 | #pragma warning( disable : 4996 )
42 | va_list arglist;
43 | va_start(arglist, format);
44 | #if defined(WIN32) || defined(WINCE)
45 | int ret = _vsnwprintf(buffer, count, format, arglist);
46 | #else
47 | int ret = vswprintf(buffer, count, format, arglist);
48 | #endif
49 | va_end(arglist);
50 | return ret;
51 | #pragma warning( pop )
52 | }
53 |
54 | int wcsnicmp(const wchar_t *string1, const wchar_t *string2, size_t count)
55 | {
56 | #if defined(WIN32) || defined(WINCE)
57 | return _wcsnicmp(string1, string2, count);
58 | #else
59 | return wcsncasecmp(string1, string2, count);
60 | #endif
61 |
62 | }
63 |
64 | }//namespace str
65 | }
66 |
--------------------------------------------------------------------------------
/engine/csttools/cmn/cmn_string.h:
--------------------------------------------------------------------------------
1 | //
2 | // Crystal Text-to-Speech Engine
3 | //
4 | // Copyright (c) 2007 THU-CUHK Joint Research Center for
5 | // Media Sciences, Technologies and Systems. All rights reserved.
6 | //
7 | // http://mjrc.sz.tsinghua.edu.cn
8 | //
9 | // Redistribution and use in source and binary forms, with or without
10 | // modification, is not allowed, unless a valid written license is
11 | // granted by THU-CUHK Joint Research Center.
12 | //
13 | // THU-CUHK Joint Research Center has the rights to create, modify,
14 | // copy, compile, remove, rename, explain and deliver the source codes.
15 | //
16 |
17 |
18 | ///
19 | /// @file
20 | ///
21 | /// @brief Definition of encapsulated string manipulation.
22 | ///
23 | /// History:
24 | /// - Version: 0.1.0
25 | /// Author: Jackie (jackiecao@gmail.com)
26 | /// Date: 2007/05/21
27 | /// Changed: Created by adding an encapsulated string class
28 | ///
29 |
30 |
31 | #ifndef _CST_TOOLS_CMN_STRING_H_
32 | #define _CST_TOOLS_CMN_STRING_H_
33 |
34 | #include
35 |
36 | ///
37 | /// @brief The base namespace for Crystal Speech Toolkit
38 | ///
39 | namespace cst
40 | {
41 | ///
42 | /// @brief The namespace for common useful utilities, such as: string, vector, etc.
43 | ///
44 | namespace cmn
45 | {
46 | ///
47 | /// @brief Encapsulated string class
48 | ///
49 | typedef std::string string;
50 |
51 | ///
52 | /// @brief Encapsulated wstring class
53 | ///
54 | typedef std::wstring wstring;
55 |
56 | }//namespace cmn
57 |
58 | ///
59 | /// @brief The namespace for string utilities, such as: encoding conversion, replace, trim, tokenize etc.
60 | ///
61 | namespace str
62 | {
63 | ///
64 | /// @brief Write formatted data to a string
65 | ///
66 | int snwprintf(wchar_t *buffer, size_t count, const wchar_t *format, ...);
67 |
68 | ///
69 | /// @brief Compare characters of two strings without regard to case
70 | ///
71 | int wcsnicmp(const wchar_t *string1, const wchar_t *string2, size_t count);
72 |
73 | }//namespace str
74 | }
75 |
76 | #endif//_CST_TOOLS_CMN_STRING_H_
77 |
--------------------------------------------------------------------------------
/engine/csttools/cmn/cmn_textfile.h:
--------------------------------------------------------------------------------
1 | //
2 | // Crystal Text-to-Speech Engine
3 | //
4 | // Copyright (c) 2007 THU-CUHK Joint Research Center for
5 | // Media Sciences, Technologies and Systems. All rights reserved.
6 | //
7 | // http://mjrc.sz.tsinghua.edu.cn
8 | //
9 | // Redistribution and use in source and binary forms, with or without
10 | // modification, is not allowed, unless a valid written license is
11 | // granted by THU-CUHK Joint Research Center.
12 | //
13 | // THU-CUHK Joint Research Center has the rights to create, modify,
14 | // copy, compile, remove, rename, explain and deliver the source codes.
15 | //
16 |
17 |
18 | ///
19 | /// @file
20 | ///
21 | /// @brief Definition of text file with different encodings manipulation.
22 | ///
23 | /// History:
24 | /// - Version: 0.1.0
25 | /// Author: John (john.zywu@gmail.com)
26 | /// Date: 2009/05/10
27 | /// Changed: Created
28 | ///
29 |
30 | #ifndef _CST_TTS_BASE_CMN_TEXTFILE_H_
31 | #define _CST_TTS_BASE_CMN_TEXTFILE_H_
32 |
33 | #include "cmn_file.h"
34 | #include "utils/utl_string.h"
35 |
36 | namespace cst
37 | {
38 | namespace cmn
39 | {
40 | ///
41 | /// @brief Standard input/output file manipulation with encoding
42 | ///
43 | class CTextFile : public CFile
44 | {
45 | public:
46 | ///
47 | /// @brief Default constructor
48 | ///
49 | CTextFile() : m_encoding(str::ENC_ANSI) {}
50 |
51 | ///
52 | /// @brief Open the file with specified mode and encoding
53 | ///
54 | /// @param [in] filename Name of the file to be opened
55 | /// @param [in] mode Type of access permitted, takes the values as defined in ""
56 | /// @param [in] encoding Encoding of the file text
57 | ///
58 | /// @return Whether filename is opened successfully or not
59 | ///
60 | virtual bool open(const wchar_t *filename, const wchar_t *mode, str::EEncoding encoding=str::ENC_AUTO);
61 |
62 | ///
63 | /// @brief Reads all text data into the string from the file
64 | ///
65 | /// @return false if end-of-file was reached without reading any data
66 | ///
67 | virtual bool readString(std::wstring &retstr);
68 |
69 | ///
70 | /// @brief Writes string text from a buffer to the file
71 | ///
72 | virtual bool writeString(const wchar_t *retstr);
73 |
74 | ///
75 | /// @brief Get the encoding of the text file
76 | ///
77 | const str::EEncoding &getEncoding() const {return m_encoding;}
78 |
79 | protected:
80 | /// Text encoding of the text file (default is ENC_ANSI)
81 | str::EEncoding m_encoding;
82 | /// Length of the Byte Order Mark (e.g. FF FE for UTF16LE, EF BB BF for UTF8)
83 | size_t m_BOMLen;
84 | };
85 |
86 | }
87 | }
88 |
89 | #endif//_CST_TTS_BASE_CMN_TEXTFILE_H_
90 |
--------------------------------------------------------------------------------
/engine/csttools/cmn/cmn_type.h:
--------------------------------------------------------------------------------
1 | //
2 | // Crystal Text-to-Speech Engine
3 | //
4 | // Copyright (c) 2007 THU-CUHK Joint Research Center for
5 | // Media Sciences, Technologies and Systems. All rights reserved.
6 | //
7 | // http://mjrc.sz.tsinghua.edu.cn
8 | //
9 | // Redistribution and use in source and binary forms, with or without
10 | // modification, is not allowed, unless a valid written license is
11 | // granted by THU-CUHK Joint Research Center.
12 | //
13 | // THU-CUHK Joint Research Center has the rights to create, modify,
14 | // copy, compile, remove, rename, explain and deliver the source codes.
15 | //
16 |
17 |
18 | ///
19 | /// @file
20 | ///
21 | /// @brief Definition of common types for the project
22 | ///
23 | /// @version 0.1.0
24 | /// @date 2007/06/15
25 | ///
26 | /// History:
27 | /// - Version: 0.1.0
28 | /// Author: John (john.zywu@gmail.com)
29 | /// Date: 2007/06/15
30 | /// Changed: Created
31 | ///
32 |
33 | #ifndef _CST_TTS_BASE_CMN_TYPE_H_
34 | #define _CST_TTS_BASE_CMN_TYPE_H_
35 |
36 | //namespace cst
37 | //{
38 |
39 | ///@todo keep only: byte, int, size_t, icode_t, handle
40 | ///@todo there are definitions in CrystalTTS.h
41 |
42 | typedef unsigned char uint8;
43 | typedef unsigned short uint16;
44 | typedef unsigned int uint32;
45 | typedef unsigned long long uint64;
46 | typedef unsigned int icode_t;
47 |
48 | typedef unsigned long ulong; // used in CWaveData (needed?)
49 | typedef unsigned short ushort;
50 | typedef unsigned int uint;
51 | typedef unsigned char byte;
52 | //typedef unsigned short icode_t;
53 | typedef void* handle;
54 | #ifndef NULL
55 | # define NULL 0
56 | #endif
57 |
58 | static const icode_t INVALID_ICODE = (icode_t)-1;
59 | static const uint32 INVALID_UNITID = (uint32)-1;
60 | //}
61 |
62 | template
63 | _T cst_dynamic_cast(_R value)
64 | {
65 | #if defined(CST_NO_RTTI_SUPPORT)
66 | // RTTI (Run-time type information) is not supported
67 | // Force type conversion
68 | return (_T)(value);
69 | #else
70 | // RTTI (Run-time type information) is supported
71 | return dynamic_cast<_T>(value);
72 | #endif
73 | };
74 |
75 | #endif//_CST_TTS_BASE_CMN_TYPE_H_
76 |
--------------------------------------------------------------------------------
/engine/csttools/cmn/cmn_vector.h:
--------------------------------------------------------------------------------
1 | //
2 | // Crystal Text-to-Speech Engine
3 | //
4 | // Copyright (c) 2007 THU-CUHK Joint Research Center for
5 | // Media Sciences, Technologies and Systems. All rights reserved.
6 | //
7 | // http://mjrc.sz.tsinghua.edu.cn
8 | //
9 | // Redistribution and use in source and binary forms, with or without
10 | // modification, is not allowed, unless a valid written license is
11 | // granted by THU-CUHK Joint Research Center.
12 | //
13 | // THU-CUHK Joint Research Center has the rights to create, modify,
14 | // copy, compile, remove, rename, explain and deliver the source codes.
15 | //
16 |
17 |
18 | ///
19 | /// @file
20 | ///
21 | /// @brief Definition of encapsulated vector manipulation.
22 | ///
23 | /// History:
24 | /// - Version: 0.1.0
25 | /// Author: Jackie (jackiecao@gmail.com)
26 | /// Date: 2007/05/21
27 | /// Changed: Created by adding an encapsulated vector class
28 | ///
29 |
30 | #ifndef _CST_TOOLS_CMN_VECTOR_H_
31 | #define _CST_TOOLS_CMN_VECTOR_H_
32 |
33 | #include
34 |
35 | namespace cst
36 | {
37 | namespace cmn
38 | {
39 | ///
40 | /// @brief Encapsulated vector class
41 | ///
42 | /// This encapsulated vector class is provided to avoid direct use of "std" related class
43 | /// for later easy migration.
44 | ///
45 | template
46 | class vector : public std::vector
47 | {
48 | };
49 |
50 | }
51 | }
52 |
53 | #endif//_CST_TOOLS_CMN_VECTOR_H_
54 |
--------------------------------------------------------------------------------
/engine/csttools/dsp/dsp_databuffer.cpp:
--------------------------------------------------------------------------------
1 | //
2 | // Crystal Text-to-Speech Engine
3 | //
4 | // Copyright (c) 2007 THU-CUHK Joint Research Center for
5 | // Media Sciences, Technologies and Systems. All rights reserved.
6 | //
7 | // http://mjrc.sz.tsinghua.edu.cn
8 | //
9 | // Redistribution and use in source and binary forms, with or without
10 | // modification, is not allowed, unless a valid written license is
11 | // granted by THU-CUHK Joint Research Center.
12 | //
13 | // THU-CUHK Joint Research Center has the rights to create, modify,
14 | // copy, compile, remove, rename, explain and deliver the source codes.
15 | //
16 |
17 |
18 | ///
19 | /// @file
20 | ///
21 | /// @brief Implementation file for functions of data buffer manipulation
22 | ///
23 | /// History:
24 | /// - Version: 0.1.0
25 | /// Author: John (john.zywu@gmail.com)
26 | /// Date: 2007/07/13
27 | /// Changed: Created
28 | ///
29 |
30 |
31 | #include
32 | #include "dsp_databuffer.h"
33 |
34 | namespace cst
35 | {
36 | namespace dsp
37 | {
38 | CDataBuffer::CDataBuffer(const CDataBuffer &right)
39 | {
40 | this->clear();
41 | this->appendData(right.m_pData, right.m_nLength);
42 | }
43 |
44 | CDataBuffer &CDataBuffer::operator = (const CDataBuffer &right)
45 | {
46 | // handle self assignment
47 | if (this != &right)
48 | {
49 | this->clear();
50 | this->appendData(right.m_pData, right.m_nLength);
51 | }
52 | return (*this);
53 | }
54 |
55 | bool CDataBuffer::clear()
56 | {
57 | delete[] m_pData;
58 | m_nLength =0;
59 | m_pData = 0;
60 | return true;
61 | }
62 |
63 | bool CDataBuffer::resize(unsigned long nNewByteSize)
64 | {
65 | if (nNewByteSize <= m_nLength)
66 | {
67 | // truncate the old data
68 | m_nLength = nNewByteSize;
69 | return true;
70 | }
71 | else
72 | {
73 | // append the zero
74 | return appendData(0, nNewByteSize-m_nLength);
75 | }
76 | }
77 |
78 | bool CDataBuffer::appendData(const unsigned char *pData, unsigned long nByteLen)
79 | {
80 | // create buffer
81 | unsigned char *pTmpData = new unsigned char[m_nLength+nByteLen];
82 | if (pTmpData==0)
83 | {
84 | // memory overflow
85 | return false;
86 | }
87 | memcpy(pTmpData, m_pData, sizeof(unsigned char)*m_nLength);
88 | if (pData==0)
89 | {
90 | // append the zero
91 | memset(pTmpData+m_nLength, 0, sizeof(unsigned char)*nByteLen);
92 | }
93 | else
94 | {
95 | // append the data
96 | memcpy(pTmpData+m_nLength, pData, sizeof(unsigned char)*nByteLen);
97 | }
98 | delete[] m_pData;
99 | m_pData = pTmpData;
100 | m_nLength += nByteLen;
101 | return true;
102 | }
103 |
104 | unsigned long CDataBuffer::assignData(const unsigned char *pData, unsigned long nByteLen, unsigned long nByteOffset)
105 | {
106 | // detect the correct length to copy
107 | unsigned long toCopy = m_nLength - nByteOffset;
108 | if (m_nLength <= nByteOffset)
109 | {
110 | toCopy = 0;
111 | }
112 | else if (m_nLength-nByteOffset > nByteLen)
113 | {
114 | toCopy = nByteLen;
115 | }
116 |
117 | if (pData == 0)
118 | {
119 | // assign the zero
120 | memset(m_pData+nByteOffset, 0, sizeof(unsigned char)*toCopy);
121 | }
122 | else
123 | {
124 | // copy the data
125 | memcpy(m_pData+nByteOffset, pData, sizeof(unsigned char)*toCopy);
126 | }
127 | return toCopy;
128 | }
129 | }
130 | }
131 |
--------------------------------------------------------------------------------
/engine/csttools/dsp/dsp_databuffer.h:
--------------------------------------------------------------------------------
1 | //
2 | // Crystal Text-to-Speech Engine
3 | //
4 | // Copyright (c) 2007 THU-CUHK Joint Research Center for
5 | // Media Sciences, Technologies and Systems. All rights reserved.
6 | //
7 | // http://mjrc.sz.tsinghua.edu.cn
8 | //
9 | // Redistribution and use in source and binary forms, with or without
10 | // modification, is not allowed, unless a valid written license is
11 | // granted by THU-CUHK Joint Research Center.
12 | //
13 | // THU-CUHK Joint Research Center has the rights to create, modify,
14 | // copy, compile, remove, rename, explain and deliver the source codes.
15 | //
16 |
17 |
18 | ///
19 | /// @file
20 | ///
21 | /// @brief Head file for functions of data buffer manipulation
22 | ///
23 | /// History:
24 | /// - Version: 0.1.0
25 | /// Author: John (john.zywu@gmail.com)
26 | /// Date: 2007/07/13
27 | /// Changed: Created
28 | ///
29 |
30 |
31 | #ifndef _CST_TTS_BASE_DSP_DATABUFFER_H_
32 | #define _CST_TTS_BASE_DSP_DATABUFFER_H_
33 |
34 | namespace cst
35 | {
36 | namespace dsp
37 | {
38 | ///
39 | /// @brief The class which manipulates the data buffer automatically
40 | ///
41 | class CDataBuffer
42 | {
43 | public:
44 | ///
45 | /// @brief Default constructor
46 | ///
47 | CDataBuffer () : m_pData(0), m_nLength(0) {}
48 |
49 | ///
50 | /// @brief Copy constructor
51 | ///
52 | CDataBuffer(const CDataBuffer &right);
53 |
54 | ///
55 | /// @brief Destructor, clear the data
56 | ///
57 | virtual ~CDataBuffer() {clear();}
58 |
59 | ///
60 | /// @brief Assignment operator =, assign right operand to left operand.
61 | ///
62 | CDataBuffer &operator = (const CDataBuffer &right);
63 |
64 | public:
65 | //////////////////////////////////////////////////////////////////////////
66 | //
67 | // Data related procedures
68 | //
69 | //////////////////////////////////////////////////////////////////////////
70 |
71 | ///
72 | /// @brief Clear the data buffer
73 | ///
74 | /// @return Whether operation is successful
75 | ///
76 | bool clear();
77 |
78 | ///
79 | /// @brief Resize the data buffer to the new size
80 | ///
81 | /// If new size is smaller than old size, the data buffer will be truncated.
82 | /// If new size is greater than old size, the empty data (zero) will be appended.
83 | ///
84 | /// @param [in] nNewByteSize The new size of the data buffer (in byte)
85 | ///
86 | /// @return Whether operation is successful
87 | ///
88 | /// @note If bits per sample of the audio data is 16, and the new size (in byte) is odd,
89 | /// then the data might be mis-aligned. The caller must ensure to avoid such situation.
90 | /// @see alignData
91 | ///
92 | bool resize(unsigned long nNewByteSize);
93 |
94 | ///
95 | /// @brief Assign the new data to this data from the offset position
96 | ///
97 | /// This function will only fill the space from offset position as much as possible.
98 | /// If the new data size is greater than the space, only available space will be assigned.
99 | /// If new data is empty, zero will be assigned.
100 | /// If offset position is greater than the space, no data will be assigned.
101 | ///
102 | /// @param [in] pData The new data to be assigned, or NULL to assign zero
103 | /// @param [in] nByteLen The size of the new data to be assigned (in byte)
104 | /// @param [in] nByteOffset The offset position in the original data (in byte)
105 | ///
106 | /// @return The actual length of the assigned data
107 | ///
108 | unsigned long assignData(const unsigned char *pData, unsigned long nByteLen, unsigned long nByteOffset=0);
109 |
110 | ///
111 | /// @brief Append the new data buffer to the end of this data
112 | ///
113 | /// if input new data is empty (NULL), the input length of zeros will be appended.
114 | ///
115 | /// @param [in] pData The new data to be appended, or NULL to append zero.
116 | /// @param [in] nByteLen The length of the data to be appended (in byte)
117 | ///
118 | /// @return Whether operation is successful
119 | ///
120 | bool appendData(const unsigned char *pData, unsigned long nByteLen);
121 |
122 | ///
123 | /// @brief Get the data buffer (in byte, unsigned char), the return value is changeable
124 | ///
125 | unsigned char *getData() {return m_pData;}
126 |
127 | ///
128 | /// @brief Get the data buffer (in byte), the return value is const, unchangeable
129 | ///
130 | const unsigned char *getData() const {return m_pData;}
131 |
132 | ///
133 | /// @brief Get the length of the data buffer (in byte)
134 | ///
135 | unsigned long getLength() const {return m_nLength;}
136 |
137 | protected:
138 | unsigned char *m_pData; ///< The data buffer
139 | unsigned long m_nLength; ///< The length of the data buffer (unit: in byte)
140 | };
141 | }
142 | }
143 |
144 | #endif//_CST_TTS_BASE_DSP_DATABUFFER_H_
145 |
--------------------------------------------------------------------------------
/engine/csttools/dsp/dsp_wavedata.cpp:
--------------------------------------------------------------------------------
1 | //
2 | // Crystal Text-to-Speech Engine
3 | //
4 | // Copyright (c) 2007 THU-CUHK Joint Research Center for
5 | // Media Sciences, Technologies and Systems. All rights reserved.
6 | //
7 | // http://mjrc.sz.tsinghua.edu.cn
8 | //
9 | // Redistribution and use in source and binary forms, with or without
10 | // modification, is not allowed, unless a valid written license is
11 | // granted by THU-CUHK Joint Research Center.
12 | //
13 | // THU-CUHK Joint Research Center has the rights to create, modify,
14 | // copy, compile, remove, rename, explain and deliver the source codes.
15 | //
16 |
17 |
18 | ///
19 | /// @file
20 | ///
21 | /// @brief Implementation file for functions of wave data manipulation
22 | ///
23 | /// History:
24 | /// - Version: 0.1.0
25 | /// Author: John (john.zywu@gmail.com)
26 | /// Date: 2007/07/13
27 | /// Changed: Created
28 | ///
29 |
30 |
31 | #include "dsp_wavedata.h"
32 |
33 | namespace cst
34 | {
35 | namespace dsp
36 | {
37 | CWaveData::CWaveData(const CWaveData &right)
38 | {
39 | this->clear();
40 | this->setFormat(right.m_nSamplesPerSec, right.m_wBitsPerSample, right.m_nChannels, right.m_wFormatTag);
41 | this->appendData(right.m_pData, right.m_nLength);
42 | }
43 |
44 | CWaveData &CWaveData::operator = (const CWaveData &right)
45 | {
46 | // handle self assignment
47 | if (this != &right)
48 | {
49 | this->clear();
50 | this->setFormat(right.m_nSamplesPerSec, right.m_wBitsPerSample, right.m_nChannels, right.m_wFormatTag);
51 | this->appendData(right.m_pData, right.m_nLength);
52 | }
53 | return (*this);
54 | }
55 |
56 | bool CWaveData::setFormat(unsigned long nSamplesPerSec, unsigned short wBitsPerSample, unsigned short nChannels, unsigned short wFormat)
57 | {
58 | if (wFormat != 1)
59 | {
60 | // currently, only WAVE_FORMAT_PCM(=1) is supported
61 | return false;
62 | }
63 | if (wBitsPerSample != 8 && wBitsPerSample != 16)
64 | {
65 | // only 8 or 16 is supported for WAVE_FORMAT_PCM
66 | return false;
67 | }
68 |
69 | // clear all the data
70 | clear();
71 |
72 | // set the format
73 | m_nSamplesPerSec = nSamplesPerSec;
74 | m_wBitsPerSample = wBitsPerSample;
75 | m_nChannels = nChannels;
76 | m_wFormatTag = wFormat;
77 | return true;
78 | }
79 |
80 | bool CWaveData::alignData()
81 | {
82 | if ((m_wBitsPerSample == 16) && (m_nLength & 1))
83 | {
84 | // the bits per sample is 16, the length is odd
85 | // append one zero byte
86 | appendData(0, 1);
87 | }
88 | return true;
89 | }
90 |
91 | int CWaveData::getSampleValue( unsigned long sampleIndex) const
92 | {
93 | int val = 0;
94 | if (getBitsPerSample() == 16)
95 | {
96 | val = *((const short*)getData() + sampleIndex);
97 | }
98 | else if (getBitsPerSample() == 8)
99 | {
100 | val = ((short)*(getData() + sampleIndex) - 128) << 8;
101 | }
102 | return val;
103 | }
104 |
105 | void CWaveData::setSampleValue( unsigned long sampleIndex, int sampleValue)
106 | {
107 | sampleValue = (sampleValue> 32767) ? 32767 : sampleValue;
108 | sampleValue = (sampleValue < -32768) ? -32768 : sampleValue;
109 |
110 | if (getBitsPerSample() == 16)
111 | {
112 | *((short*)getData() + sampleIndex) = sampleValue;
113 | }
114 | else if (getBitsPerSample() == 8)
115 | {
116 | *(getData() + sampleIndex) = (unsigned char)((sampleValue >> 8) + 128);
117 | }
118 | }
119 |
120 | }
121 | }
122 |
--------------------------------------------------------------------------------
/engine/csttools/dsp/dsp_wavedevice.h:
--------------------------------------------------------------------------------
1 | //
2 | // Crystal Text-to-Speech Engine
3 | //
4 | // Copyright (c) 2007 THU-CUHK Joint Research Center for
5 | // Media Sciences, Technologies and Systems. All rights reserved.
6 | //
7 | // http://mjrc.sz.tsinghua.edu.cn
8 | //
9 | // Redistribution and use in source and binary forms, with or without
10 | // modification, is not allowed, unless a valid written license is
11 | // granted by THU-CUHK Joint Research Center.
12 | //
13 | // THU-CUHK Joint Research Center has the rights to create, modify,
14 | // copy, compile, remove, rename, explain and deliver the source codes.
15 | //
16 |
17 |
18 | ///
19 | /// @file
20 | ///
21 | /// @brief Head file for wave device manipulation
22 | ///
23 | /// History:
24 | /// - Version: 0.1.0
25 | /// Author: John (john.zywu@gmail.com)
26 | /// Date: 2006/02/20
27 | /// Changed: Created
28 | ///
29 |
30 |
31 | #ifndef _CST_TOOLS_DSP_WAVEDEVICE_H_
32 | #define _CST_TOOLS_DSP_WAVEDEVICE_H_
33 |
34 | #ifndef WAVE_MAPPER
35 | #define WAVE_MAPPER ((unsigned int)-1) // device ID for wave device mapper
36 | #endif
37 |
38 | namespace cst
39 | {
40 | namespace dsp
41 | {
42 | ///
43 | /// @brief The class for wave out device manipulation (output waveform to device)
44 | ///
45 | class CWaveOut
46 | {
47 | public:
48 | ///
49 | /// @brief Constructor
50 | ///
51 | CWaveOut();
52 |
53 | ///
54 | /// @brief Destructor
55 | ///
56 | virtual ~CWaveOut();
57 |
58 | public:
59 | ///
60 | /// @brief Open the wave out device
61 | ///
62 | /// @param [in] devID Wave out device ID to be opened. WAVE_MAPPER is used to automatically select the device.
63 | /// @param [in] nSamplesPerSec Sampling rate of the waveform data to be output
64 | /// @param [in] nBitsPerSample Bits per sample of the waveform data to be output
65 | /// @param [in] nChannel Channel number of the waveform data to be output
66 | ///
67 | /// @return Whether operation is successful
68 | ///
69 | bool open(unsigned int devID=WAVE_MAPPER, int nSamplesPerSec=16000, int nBitsPerSample=16, int nChannels=1);
70 |
71 | ///
72 | /// @brief Close the wave out device
73 | ///
74 | bool close();
75 |
76 | ///
77 | /// @brief Output (play) the waveform data to the wave out device
78 | ///
79 | /// @param [in] pData The waveform data to be output to device
80 | /// @param [in] nByteLen Byte length of the waveform data
81 | ///
82 | bool write(const void* pData, unsigned int nByteLen);
83 |
84 | ///
85 | /// @brief Stop playing the wave data (stop the wave out device)
86 | ///
87 | bool stop();
88 |
89 | ///
90 | /// @brief Pause playing the wave data (pause the wave out device)
91 | ///
92 | bool pause();
93 |
94 | ///
95 | /// @brief Resume playing the wave data (resume the paused wave out device)
96 | ///
97 | bool resume();
98 |
99 | ///
100 | /// @brief Wait until wave data playing is finished (wait until the wave out device is idle and ready for next wave data output)
101 | ///
102 | bool wait();
103 |
104 | ///
105 | /// @brief Indicate whether wave out device is opened successfully
106 | ///
107 | bool isOpened();
108 |
109 | ///
110 | /// @brief Indicate whether it is still outputting the waveform data
111 | ///
112 | bool isWorking();
113 |
114 | protected:
115 | unsigned int m_nDevID; ///< ID of the wave device, usually WAVE_MAPPER is used
116 | void* m_hDevice; ///< Handle to the opened wave device
117 | void* m_pWaveHdr; ///< Wave header
118 | public:
119 | bool m_bWorking; ///< Indicating whether is still outputting wave data
120 |
121 | protected:
122 | ///
123 | /// @brief Callback function to access the working status of the wave out device
124 | ///
125 | //static void wavOutProc(void* hwo, unsigned int uMsg, unsigned long* dwInstance, unsigned long dwParam1, unsigned long dwParam2);
126 | };
127 |
128 | }
129 | }
130 |
131 | #endif//_CST_TOOLS_DSP_WAVEDEVICE_H_
132 |
--------------------------------------------------------------------------------
/engine/csttools/dsp/dsp_waveplay.cpp:
--------------------------------------------------------------------------------
1 | //
2 | // Crystal Text-to-Speech Engine
3 | //
4 | // Copyright (c) 2007 THU-CUHK Joint Research Center for
5 | // Media Sciences, Technologies and Systems. All rights reserved.
6 | //
7 | // http://mjrc.sz.tsinghua.edu.cn
8 | //
9 | // Redistribution and use in source and binary forms, with or without
10 | // modification, is not allowed, unless a valid written license is
11 | // granted by THU-CUHK Joint Research Center.
12 | //
13 | // THU-CUHK Joint Research Center has the rights to create, modify,
14 | // copy, compile, remove, rename, explain and deliver the source codes.
15 | //
16 |
17 |
18 | ///
19 | /// @file
20 | ///
21 | /// @brief Implementation file for wave play and device manipulation
22 | ///
23 | /// History:
24 | /// - Version: 0.1.0
25 | /// Author: John (john.zywu@gmail.com)
26 | /// Date: 2007/07/13
27 | /// Changed: Created
28 | ///
29 |
30 |
31 | #if defined(WIN32)
32 | # include
33 | # include
34 | # pragma comment(lib, "winmm.lib")
35 | # pragma message("WIN32: Under Microsoft Windows Operation System")
36 | # pragma message("WIN32: Linking with winmm.lib")
37 | #elif defined(WINCE)
38 | # include
39 | //# include
40 | //# pragma comment(lib, "winmm.lib")
41 | #elif defined(__GNUC__)
42 | #include
43 | #include // wcstombs()
44 | #include // ioctl()
45 | # include "utils/utl_string.h"
46 | # include "dsp/dsp_wavefile.h"
47 | #define AUDIO_DEVICE "/dev/dsp"
48 | //#include
49 | #else
50 | # error "Sorry for not support OS:>"
51 | #endif//WIN32
52 |
53 | #include "dsp_waveplay.h"
54 | #include "dsp_wavedevice.h"
55 | #include "dsp_wavedata.h"
56 |
57 |
58 | namespace cst
59 | {
60 | namespace dsp
61 | {
62 | bool CWavePlay::playWaveFile(const wchar_t *fileName, int playMode)
63 | {
64 | #if defined(WIN32)
65 | unsigned long mode = (playMode==ModeSync) ? (SND_FILENAME|SND_SYNC|SND_NODEFAULT) : (SND_FILENAME|SND_ASYNC|SND_NODEFAULT);
66 | if (PlaySoundW(fileName, NULL, mode))
67 | return true;
68 | else
69 | return false;
70 | #elif defined(WINCE)
71 | unsigned long mode = (playMode==ModeSync) ? SND_SYNC : SND_ASYNC;
72 | if (sndPlaySound(fileName, mode))
73 | return true;
74 | else
75 | return false;
76 | #elif defined(__GNUC__)
77 | /*
78 | // Handle for the PCM device
79 | snd_pcm_t *pcm_handle;
80 | // Playback stream
81 | snd_pcm_stream_t stream = SND_PCM_STREAM_PLAYBACK;
82 | // This structure contains information about the hardware and can be used to specify the configuration to be used for the PCM stream.
83 | snd_pcm_hw_params_t *hwparams;
84 | */
85 |
86 | // @todo binbinsh: better linux wav player
87 | char cCommand[1000];
88 | char cFileName[1000];
89 | wcstombs(cFileName, fileName, 1000);
90 | sprintf(cCommand, "mplayer %s 2>&1 > /dev/null", cFileName);
91 | if(system(cCommand))
92 | return true;
93 | else
94 | return false;
95 | #else
96 | return false;
97 | #endif
98 | }
99 |
100 | #if defined(WIN32)
101 | CWaveOut g_waveOutDev;
102 | CWaveData g_waveData;
103 | #endif
104 |
105 | bool CWavePlay::playWaveData(const CWaveData &waveData, int playMode)
106 | {
107 | #if defined(WIN32)
108 | if (playMode & ModeWait)
109 | {
110 | // wait until previous waveform play finishes
111 | g_waveOutDev.wait();
112 | }
113 | // wave data should be saved here
114 | // as the input waveData might be cleared in the caller function
115 | g_waveData = waveData;
116 | // open the new wave out device
117 | if (!g_waveOutDev.open(WAVE_MAPPER, g_waveData.getSamplesPerSec(), g_waveData.getBitsPerSample(), g_waveData.getChannels()))
118 | {
119 | return false;
120 | }
121 | // output waveform data
122 | if (!g_waveOutDev.write(g_waveData.getData(), g_waveData.getLength()))
123 | {
124 | g_waveOutDev.close();
125 | return false;
126 | }
127 | // play synchronously or not
128 | if (playMode & ModeSync)
129 | {
130 | // wait until current waveform play finishes
131 | g_waveOutDev.wait();
132 | }
133 | return true;
134 | #elif defined(__GNUC__)
135 | char tmp[20] = "wav.tmp.XXXXXX.wav";
136 | mkstemps(tmp, 4);
137 | std::wstring wavFile = str::mbstowcs(tmp);
138 | dsp::CWaveFile::save(wavFile.c_str(), waveData, dsp::CWaveFile::modeWrite);
139 | playWaveFile(wavFile.c_str(), ModeSync);
140 | remove(tmp);
141 | return true;
142 | #else
143 | return false;
144 | #endif
145 | }
146 | }
147 | }
148 |
--------------------------------------------------------------------------------
/engine/csttools/dsp/dsp_waveplay.h:
--------------------------------------------------------------------------------
1 | //
2 | // Crystal Text-to-Speech Engine
3 | //
4 | // Copyright (c) 2007 THU-CUHK Joint Research Center for
5 | // Media Sciences, Technologies and Systems. All rights reserved.
6 | //
7 | // http://mjrc.sz.tsinghua.edu.cn
8 | //
9 | // Redistribution and use in source and binary forms, with or without
10 | // modification, is not allowed, unless a valid written license is
11 | // granted by THU-CUHK Joint Research Center.
12 | //
13 | // THU-CUHK Joint Research Center has the rights to create, modify,
14 | // copy, compile, remove, rename, explain and deliver the source codes.
15 | //
16 |
17 |
18 | ///
19 | /// @file
20 | ///
21 | /// @brief Head file for wave play and device manipulation
22 | ///
23 | /// History:
24 | /// - Version: 0.1.0
25 | /// Author: John (john.zywu@gmail.com)
26 | /// Date: 2007/07/13
27 | /// Changed: Created
28 | ///
29 |
30 |
31 | #ifndef _CST_TTS_BASE_DSP_WAVEPLAY_H_
32 | #define _CST_TTS_BASE_DSP_WAVEPLAY_H_
33 |
34 | namespace cst
35 | {
36 | namespace dsp
37 | {
38 | // forward class reference
39 | class CWaveData;
40 |
41 | ///
42 | /// @brief The class which manipulates the signal processing algorithms
43 | ///
44 | class CWavePlay
45 | {
46 | public:
47 | ///
48 | /// @brief The wave data play mode
49 | ///
50 | enum EPlayMode
51 | {
52 | ModeAsync = 0x00, ///< Play asynchronously and function returns immediately after beginning sound
53 | ModeSync = 0x01, ///< Play synchronously and function does not return until sound ends
54 | ModeWait = 0x02, ///< Start play current data ONLY when the previous wave data has been played
55 | };
56 |
57 | public:
58 | ///
59 | /// @brief Play the waveform specified by the file name
60 | ///
61 | /// @param [in] fileName The name of the file containing the waveform. NULL to stop current playing waveform.
62 | /// @param [in] playMode The waveform play mode: Synchronously or asynchronously
63 | ///
64 | static bool playWaveFile(const wchar_t *fileName, int playMode);
65 |
66 | ///
67 | /// @brief Play the waveform data in the data buffer
68 | ///
69 | /// @param [in] waveData The waveform data to be played
70 | /// @param [in] playMode The waveform play mode: Synchronously or asynchronously and if wait until previous wave play stops
71 | ///
72 | static bool playWaveData(const CWaveData &waveData, int playMode);
73 | };
74 | }
75 | }
76 |
77 | #endif//_CST_TTS_BASE_DSP_WAVEPLAY_H_
78 |
--------------------------------------------------------------------------------
/engine/csttools/utils/utl_bigram.h:
--------------------------------------------------------------------------------
1 | //
2 | // Crystal Text-to-Speech Engine
3 | //
4 | // Copyright (c) 2007 THU-CUHK Joint Research Center for
5 | // Media Sciences, Technologies and Systems. All rights reserved.
6 | //
7 | // http://mjrc.sz.tsinghua.edu.cn
8 | //
9 | // Redistribution and use in source and binary forms, with or without
10 | // modification, is not allowed, unless a valid written license is
11 | // granted by THU-CUHK Joint Research Center.
12 | //
13 | // THU-CUHK Joint Research Center has the rights to create, modify,
14 | // copy, compile, remove, rename, explain and deliver the source codes.
15 | //
16 |
17 |
18 | ///
19 | /// @version 0.1
20 | /// @author Yongxin Wang
21 | /// @date 2006/08/07
22 | ///
23 | /// History:
24 | /// - Version: 0.1 \n
25 | /// Author: Yongxin WANG \n
26 | /// Date: 2006/08/07 \n
27 | /// Changed: create
28 | ///
29 |
30 | #ifndef _BL_DATA_BIGRAM_H_INCLUDED_
31 | #define _BL_DATA_BIGRAM_H_INCLUDED_
32 |
33 | #include "cmn/cmn_type.h"
34 | #include
35 | #include
36 |
37 | namespace cst
38 | {
39 |
40 | namespace cmn
41 | {
42 | ///
43 | /// @brief NGM file-format, a binary file storing the unigram & bi-gram
44 | ///
45 | /// This file contains 3 section:
46 | /// 1. TNGMFileHeader
47 | /// 2. Unigram
48 | /// 3. Bigram
49 | /// An important problem is that unigram section contains a textual POS with arbitrary
50 | /// length, instead of using a fixed length structure to store them, we keep one line for
51 | /// each POS
52 | ///
53 | struct TNGMFileHeader
54 | {
55 | uint32 posCount;
56 | uint32 unigramBuffLength;
57 | };
58 |
59 | class CBigram
60 | {
61 | public:
62 | CBigram(){m_vecBigram = NULL;}
63 | virtual ~CBigram() {clear();}
64 |
65 | public:
66 | ///
67 | /// @brief initialized the bigram module with the specified file
68 | ///
69 | /// The method will first clear everything that is in the lexicon
70 | ///
71 | /// @param [in] strFile input file name
72 | ///
73 | /// @return whether the intialization is sucessful
74 | ///
75 | /// @warning Whether the method is successfull or not, every thing that is
76 | /// already in the lexicon will be CLEARED!!
77 | ///
78 | bool loadBigramFromARPA(const std::wstring &strFile);
79 |
80 | ///
81 | /// @brief initialized the bigram module with the specified file
82 | ///
83 | /// The method will first clear everything that is in the lexicon
84 | ///
85 | /// @param [in] strFile input file name
86 | ///
87 | /// @return whether the intialization is sucessful
88 | ///
89 | /// @warning Whether the method is successfull or not, every thing that is
90 | /// already in the lexicon will be CLEARED!!
91 | ///
92 | bool loadBigramFromNGM(const std::string &strFile);
93 |
94 | public:
95 | bool saveBigramToNGM(const std::string &strFile);
96 |
97 | public:
98 | ///
99 | /// @brief get the POS index in this specific module
100 | ///
101 | /// The function will check the wstrPOS through the POS vector, and return
102 | /// the index. If it is not in the vector, the last character will be
103 | /// eliminated(to get the parent POS) and search again.
104 | ///
105 | /// @return the index value of the given POS(or its parent POS) in the POS
106 | /// vector.
107 | /// @retval -1 None of the POS and any of its prefix are in the POS vector.
108 | ///
109 | int getPOSIndex(const std::wstring &wstrPOS) const;
110 |
111 | ///
112 | /// @brief get the bigram value of the given POS's
113 | ///
114 | /// @param [in] wstrPOS1 the first element of the bigram
115 | /// @param [in] wstrPOS2 the second element of the bigram
116 | ///
117 | /// @return The bigram value of the give pair of POS's, in logrithm.\n
118 | /// The return will be -1e30 if not found.
119 | ///
120 | double getBigramValue(const std::wstring &wstrPOS1, const std::wstring &wstrPOS2) const;
121 |
122 | ///
123 | /// @brief get the bigram value of the given POS's
124 | ///
125 | /// @param [in] idxPOS1 the first element of the bigram
126 | /// @param [in] idxPOS2 the second element of the bigram
127 | ///
128 | /// @return The bigram value of the give pair of POS's, in logrithm.\n
129 | /// The return will be -1e30 if not found.
130 | ///
131 | double getBigramValue(int idxPOS1, int idxPOS2) const;
132 |
133 | ///
134 | /// @brief clear the Bigram model
135 | ///
136 | /// @return whether the model is cleared.
137 | ///
138 | bool clear();
139 |
140 | private:
141 | std::vector m_vecPOS; ///< an array of all the POS in the bigram module
142 | std::vector m_vecUnigram; ///< unigram module
143 | std::vector m_vecUnigramBackOff; ///< back off value, use to calculate bigram when
144 | ///< the bigram itself is missing
145 | // std::vector m_vecBigram; ///< bigram module
146 | double *m_vecBigram;
147 | };
148 | }
149 |
150 | }
151 |
152 | #endif // _BL_DATA_BIGRAM_H_INCLUDED_
153 |
--------------------------------------------------------------------------------
/engine/csttools/utils/utl_chineseconv.cpp:
--------------------------------------------------------------------------------
1 | //
2 | // Crystal Text-to-Speech Engine
3 | //
4 | // Copyright (c) 2007 THU-CUHK Joint Research Center for
5 | // Media Sciences, Technologies and Systems. All rights reserved.
6 | //
7 | // http://mjrc.sz.tsinghua.edu.cn
8 | //
9 | // Redistribution and use in source and binary forms, with or without
10 | // modification, is not allowed, unless a valid written license is
11 | // granted by THU-CUHK Joint Research Center.
12 | //
13 | // THU-CUHK Joint Research Center has the rights to create, modify,
14 | // copy, compile, remove, rename, explain and deliver the source codes.
15 | //
16 |
17 | ///
18 | /// @file
19 | ///
20 | /// @brief
21 | ///
22 | /// @version 0.1.0
23 | /// @date 2007/08/03
24 | ///
25 | /// History:
26 | /// - Version: 0.1.0
27 | /// Author: Jackie (jackiecao@gmail.com)
28 | /// Date: 2007/08/03
29 | /// Changed: Created
30 | ///
31 |
32 | #include "cmn/cmn_error.h"
33 | #include "cmn/cmn_textfile.h"
34 |
35 | #include "utl_chineseconv.h"
36 |
37 | namespace cst
38 | {
39 | namespace cmn
40 | {
41 | void CChineseConvert::terminate()
42 | {
43 | m_S2T_STable = L"";
44 | m_S2T_TTable = L"";
45 | m_S2T_TTable = L"";
46 | m_S2T_STable = L"";
47 | m_bInitialized = false;
48 | }
49 |
50 | bool CChineseConvert::initialize(const wchar_t *wstrFileName)
51 | {
52 | if ( m_bInitialized )
53 | {
54 | terminate();
55 | }
56 |
57 | // load text
58 | std::wstring buf;
59 | CTextFile file;
60 | if ( !file.open(wstrFileName, L"rb") || !file.readString(buf) )
61 | {
62 | return false;
63 | }
64 |
65 | // get each conversion table
66 | std::wstring deliminators = L"\r\n";
67 | std::vector tokens;
68 | str::tokenize(buf, deliminators, tokens);
69 |
70 | if ( tokens.size() != 4
71 | || tokens[0].length() != tokens[1].length()
72 | || tokens[2].length() != tokens[3].length() )
73 | {
74 | return false;
75 | }
76 |
77 | m_S2T_STable = tokens[0];
78 | m_S2T_TTable = tokens[1];
79 | m_T2S_TTable = tokens[2];
80 | m_T2S_STable = tokens[3];
81 |
82 | m_bInitialized = true;
83 | return true;
84 | }
85 |
86 | int CChineseConvert::findInTable(const wstring &table, wchar_t chr) const
87 | {
88 | // the conversion table is sorted ascending
89 | const wchar_t* ptable = table.data();
90 | int idxFst = 0;
91 | int idxLst = (int)table.length()-1;
92 | int idxTmp = -1;
93 |
94 | if (chr < ptable[idxFst] || chr > ptable[idxLst])
95 | return -1;
96 | else if (chr == ptable[idxFst])
97 | return idxFst;
98 | else if (chr == ptable[idxLst])
99 | return idxLst;
100 |
101 | while (idxFst < idxLst)
102 | {
103 | idxTmp = (idxFst + idxLst) / 2;
104 | if (idxTmp == idxFst)
105 | return -1;
106 | if (chr == ptable[idxTmp])
107 | return idxTmp;
108 | else if (chr < ptable[idxTmp])
109 | idxLst = idxTmp;
110 | else
111 | idxFst = idxTmp;
112 | }
113 | return -1;
114 | }
115 |
116 | void CChineseConvert::toTraditional(wstring &str) const
117 | {
118 | if (!m_bInitialized)
119 | return;
120 |
121 | int iTable;
122 | for (size_t i=0; iHistory:
26 | /// - Version: 0.1.0
27 | /// Author: Jackie (jackiecao@gmail.com)
28 | /// Date: 2007/08/03
29 | /// Changed: Created
30 | /// - Version: 0.1.1
31 | /// Author: John (john.zywu@gmail.com)
32 | /// Date: 2007/08/03
33 | /// Changed: Modified the initialization module
34 | ///
35 |
36 | #ifndef _CST_TTS_BASE_CMN_CHN_CONV_H_
37 | #define _CST_TTS_BASE_CMN_CHN_CONV_H_
38 |
39 | #include "cmn/cmn_string.h"
40 |
41 | namespace cst
42 | {
43 | namespace cmn
44 | {
45 | ///
46 | /// @brief The class performs the conversion between Simplified Chinese and Traditional Chinese
47 | ///
48 | class CChineseConvert
49 | {
50 | public:
51 | ///
52 | /// @brief Constructor
53 | ///
54 | CChineseConvert() : m_bInitialized(false) {}
55 |
56 | ///
57 | /// @brief Destructor
58 | ///
59 | virtual ~CChineseConvert() {terminate();}
60 |
61 | ///
62 | /// @brief Initialize the Chinese conversion module
63 | ///
64 | /// @param [in] wstrFileName The file name where conversion table is stored
65 | ///
66 | /// @return Whether conversion module is initialized successfully
67 | ///
68 | virtual bool initialize(const wchar_t *wstrFileName);
69 |
70 | ///
71 | /// @brief Terminate the conversion module, free all data
72 | ///
73 | virtual void terminate();
74 |
75 | ///
76 | /// @brief Convert the input string to Traditional Chinese
77 | ///
78 | /// @param [in] str The input string to be converted
79 | /// @param [out] str Return the converted string in Traditional Chinese
80 | ///
81 | virtual void toTraditional(wstring &str) const;
82 |
83 | ///
84 | /// @brief Convert the input string to Simplified Chinese
85 | ///
86 | /// @param [in] str The input string to be converted
87 | /// @param [out] str Return the converted string in Simplified Chinese
88 | ///
89 | virtual void toSimplified(wstring &str) const;
90 |
91 | protected:
92 | ///
93 | /// @brief Find the specific char in the conversion table, return the index of the char
94 | ///
95 | /// @param [in] table The conversion table where the char is to be located
96 | /// @param [in] chr The char to be found in the table
97 | ///
98 | /// @return The index of the char in the conversion table, or -1 if not found.
99 | ///
100 | int findInTable(const wstring &table, wchar_t chr) const;
101 |
102 | protected:
103 | bool m_bInitialized; ///< Whether conversion module is initialized
104 | wstring m_S2T_STable; ///< The Simplified characters table for Simplified to Traditional conversion (sorted by Unicode)
105 | wstring m_S2T_TTable; ///< The Traditional characters table for Simplified to Traditional conversion
106 | wstring m_T2S_TTable; ///< The Traditional characters table for Traditional to Simplified conversion (sorted by Unicode)
107 | wstring m_T2S_STable; ///< The Simplified characters table for Traditional to Simplified conversion
108 | };
109 |
110 | } // End of namespace cmn
111 | } // End of namespace cst
112 |
113 | #endif // End of _CST_TTS_BASE_CMN_CHN_CONV_H_
--------------------------------------------------------------------------------
/engine/csttools/utils/utl_pinyin.cpp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/thuhcsi/Crystal/d129e5400360e7c696d071c1576803917aaf9286/engine/csttools/utils/utl_pinyin.cpp
--------------------------------------------------------------------------------
/engine/csttools/utils/utl_pinyin.h:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/thuhcsi/Crystal/d129e5400360e7c696d071c1576803917aaf9286/engine/csttools/utils/utl_pinyin.h
--------------------------------------------------------------------------------
/engine/csttools/utils/utl_string.h:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/thuhcsi/Crystal/d129e5400360e7c696d071c1576803917aaf9286/engine/csttools/utils/utl_string.h
--------------------------------------------------------------------------------
/engine/csttools/xml/ssml_traversal.cpp:
--------------------------------------------------------------------------------
1 | //
2 | // Crystal Text-to-Speech Engine
3 | //
4 | // Copyright (c) 2007 THU-CUHK Joint Research Center for
5 | // Media Sciences, Technologies and Systems. All rights reserved.
6 | //
7 | // http://mjrc.sz.tsinghua.edu.cn
8 | //
9 | // Redistribution and use in source and binary forms, with or without
10 | // modification, is not allowed, unless a valid written license is
11 | // granted by THU-CUHK Joint Research Center.
12 | //
13 | // THU-CUHK Joint Research Center has the rights to create, modify,
14 | // copy, compile, remove, rename, explain and deliver the source codes.
15 | //
16 |
17 | ///
18 | /// @file
19 | ///
20 | /// @brief Implementation for traversal operation on SSML document.
21 | ///
22 | /// @version 0.1.0
23 | /// @date 2008/05/28
24 | ///
25 | /// History:
26 | /// - Version: 0.1.0
27 | /// Author: John (john.zywu@gmail.com)
28 | /// Date: 2008/05/28
29 | /// Changed: Created
30 | ///
31 |
32 |
33 | #include "ssml_document.h"
34 | #include "ssml_traversal.h"
35 | #include "cmn/cmn_error.h"
36 |
37 | namespace cst
38 | {
39 | namespace xml
40 | {
41 | int CSSMLTraversal::traverse(xml::CSSMLDocument *pSSMLDocument)
42 | {
43 | ///@todo Error processing of processNode and postProcessNode
44 |
45 | if (pSSMLDocument == NULL)
46 | {
47 | return ERROR_SUCCESS;
48 | }
49 | int retval = ERROR_SUCCESS;
50 |
51 | // use iteration (loop) instead of recursion,
52 | // because in embedded system, recursion might cause crash for using up "system stack"
53 |
54 | // traverse all the nodes using depth first method.
55 | xml::CXMLNode *pCurrentNode = pSSMLDocument->firstChild();
56 | while (pCurrentNode != NULL)
57 | {
58 | bool childProcessed = false;
59 | retval = processNode(pCurrentNode, childProcessed);
60 | if (retval != ERROR_SUCCESS)
61 | {
62 | // error occurred
63 | return retval;
64 | }
65 | if (pCurrentNode->firstChild() != NULL && !childProcessed)
66 | {
67 | // start the children processing
68 | pCurrentNode = pCurrentNode->firstChild();
69 | continue;
70 | }
71 |
72 | while (pCurrentNode != NULL)
73 | {
74 | // current-node processing is over, do post-processing
75 | retval = postProcessNode(pCurrentNode);
76 | if (retval != ERROR_SUCCESS)
77 | {
78 | // error occurred
79 | return retval;
80 | }
81 |
82 | if (pCurrentNode->nextSibling() != NULL)
83 | {
84 | // next sibling first
85 | pCurrentNode = pCurrentNode->nextSibling();
86 | break;
87 | }
88 | else
89 | {
90 | // back to parent
91 | pCurrentNode = pCurrentNode->parent();
92 | }
93 | }
94 | }
95 |
96 | return ERROR_SUCCESS;
97 | }
98 |
99 | int CSSMLTraversal::processNode(xml::CXMLNode *pNode, bool &childProcessed)
100 | {
101 | childProcessed = false;
102 | return ERROR_SUCCESS;
103 | }
104 |
105 | int CSSMLTraversal::postProcessNode(xml::CXMLNode *pNode)
106 | {
107 | return ERROR_SUCCESS;
108 | }
109 | }
110 | }
111 |
--------------------------------------------------------------------------------
/engine/csttools/xml/ssml_traversal.h:
--------------------------------------------------------------------------------
1 | //
2 | // Crystal Text-to-Speech Engine
3 | //
4 | // Copyright (c) 2007 THU-CUHK Joint Research Center for
5 | // Media Sciences, Technologies and Systems. All rights reserved.
6 | //
7 | // http://mjrc.sz.tsinghua.edu.cn
8 | //
9 | // Redistribution and use in source and binary forms, with or without
10 | // modification, is not allowed, unless a valid written license is
11 | // granted by THU-CUHK Joint Research Center.
12 | //
13 | // THU-CUHK Joint Research Center has the rights to create, modify,
14 | // copy, compile, remove, rename, explain and deliver the source codes.
15 | //
16 |
17 | ///
18 | /// @file
19 | ///
20 | /// @brief Definition for traversal operation on SSML document.
21 | ///
22 | /// @version 0.1.0
23 | /// @date 2008/05/28
24 | ///
25 | /// History:
26 | /// - Version: 0.1.0
27 | /// Author: John (john.zywu@gmail.com)
28 | /// Date: 2008/05/28
29 | /// Changed: Created
30 | ///
31 |
32 |
33 | #ifndef _CST_TTS_BASE_SSML_TRAVERSAL_H_
34 | #define _CST_TTS_BASE_SSML_TRAVERSAL_H_
35 |
36 | namespace cst
37 | {
38 | namespace xml
39 | {
40 | ///
41 | /// @brief The class for traversing the SSML document
42 | ///
43 | class CSSMLTraversal
44 | {
45 | public:
46 | /// Constructor
47 | CSSMLTraversal() {}
48 |
49 | /// Destructor
50 | virtual ~CSSMLTraversal() {}
51 |
52 | public:
53 | ///
54 | /// @brief The procedure for traversing the SSML document
55 | ///
56 | /// The procedure walks through all the nodes of the SSML document by depth first iteration method.
57 | ///
58 | /// When the node is first occurred, the procedure will call
59 | /// "processNode" to perform specific processing to the node.\n
60 | /// When all the children of the node are processed, the procedure will call
61 | /// "postProcessNode" to perform the post-processing to the node.
62 | ///
63 | /// @see processNode, postProcessNode
64 | ///
65 | /// @param [in] pSSMLDocument The SSML document to be traversed
66 | ///
67 | /// @return Whether traversing is successful for the SSML document
68 | ///
69 | virtual int traverse(xml::CSSMLDocument *pSSMLDocument);
70 |
71 | protected:
72 | ///
73 | /// @brief Process a specific SSML document node.
74 | ///
75 | /// It is just an empty implementation here in the base class.
76 | /// Child-class should overload this function to perform its specific work.
77 | ///
78 | /// If the children of current node is not processed (childProcessed = false), the main
79 | /// process procedure will perform the subtree (children of this node) traveling further.
80 | /// Otherwise (childProcessed = true), the subtree will not be traveled.
81 | ///
82 | /// @param [in] pNode The input SSML node to be processed
83 | /// @param [out] childProcessed Return whether children of pNode have been processed
84 | ///
85 | /// @return Whether operation is successful
86 | /// @retval ERROR_SUCCESS The operation is successful
87 | ///
88 | virtual int processNode(xml::CXMLNode *pNode, bool &childProcessed);
89 |
90 | ///
91 | /// @brief Perform the post-processing of a specific SSML document node
92 | ///
93 | /// This procedure is called after all the children nodes are processed.
94 | /// It is just an empty implementation here in the base class.
95 | ///
96 | /// @param [in] pNode The SSML node to be post-processed
97 | ///
98 | /// @return Whether operation is successful
99 | /// @retval ERROR_SUCCESS The operation is successful
100 | ///
101 | virtual int postProcessNode(xml::CXMLNode *pNode);
102 |
103 | };//CSSMLTraversal
104 | }
105 | }
106 |
107 | #endif//_CST_TTS_BASE_SSML_TRAVERSAL_H_
108 |
--------------------------------------------------------------------------------
/engine/htslib/Makefile.am:
--------------------------------------------------------------------------------
1 |
2 | EXTRA_DIST = Makefile.mak
3 |
4 | INCLUDES = -I$(top_srcdir)/include
5 |
6 | lib_LIBRARIES = libHTSEngine.a
7 |
8 | libHTSEngine_a_SOURCES = HTS_audio.c HTS_engine.c HTS_hidden.h HTS_misc.c \
9 | HTS_pstream.c HTS_sstream.c HTS_model.c HTS_vocoder.c \
10 | HTS_gstream.c HTS_label.c
11 |
12 | DISTCLEANFILES = *.log *.out *~
13 |
14 | MAINTAINERCLEANFILES = Makefile.in
15 |
--------------------------------------------------------------------------------
/engine/htslib/Makefile.mak:
--------------------------------------------------------------------------------
1 |
2 | CC = cl
3 |
4 | CFLAGS = /O2 /Ob2 /Oi /Ot /Oy /GT /GL /TC /I ..\include
5 | LFLAGS = /LTCG
6 |
7 | CORES = HTS_audio.obj HTS_engine.obj HTS_gstream.obj HTS_label.obj HTS_misc.obj HTS_model.obj HTS_pstream.obj HTS_sstream.obj HTS_vocoder.obj
8 |
9 | all: hts_engine_API.lib
10 |
11 | hts_engine_API.lib: $(CORES)
12 | lib $(LFLAGS) /OUT:$@ $(CORES)
13 |
14 | .c.obj:
15 | $(CC) $(CFLAGS) /c $<
16 |
17 | clean:
18 | del *.lib
19 | del *.obj
20 |
--------------------------------------------------------------------------------
/engine/htslib/bin/readme.txt:
--------------------------------------------------------------------------------
1 | sample codes for hts_engine
2 |
3 | hts_engine_o.c is the original sample codes in HTS engine 1.0.6
4 |
5 | hts_engine.c is the codes with voice/unvoice refinement for Chinese syllables.
6 |
7 | hts_synthesizer.h/hts_synthesizer.cpp is based on these files.
8 |
--------------------------------------------------------------------------------
/engine/ttsbase/datamanage/base_module.h:
--------------------------------------------------------------------------------
1 | //
2 | // Crystal Text-to-Speech Engine
3 | //
4 | // Copyright (c) 2007 THU-CUHK Joint Research Center for
5 | // Media Sciences, Technologies and Systems. All rights reserved.
6 | //
7 | // http://mjrc.sz.tsinghua.edu.cn
8 | //
9 | // Redistribution and use in source and binary forms, with or without
10 | // modification, is not allowed, unless a valid written license is
11 | // granted by THU-CUHK Joint Research Center.
12 | //
13 | // THU-CUHK Joint Research Center has the rights to create, modify,
14 | // copy, compile, remove, rename, explain and deliver the source codes.
15 | //
16 |
17 | ///
18 | /// @file
19 | ///
20 | /// @brief Head file for the basic module class
21 | ///
22 | /// History:
23 | /// - Version: 0.1.0
24 | /// Author: John (john.zywu@gmail.com)
25 | /// Date: 2008/05/25
26 | /// Changed: Created
27 | ///
28 |
29 | #ifndef _CST_TTS_BASE_BASE_MODULE_H_
30 | #define _CST_TTS_BASE_BASE_MODULE_H_
31 |
32 | namespace cst
33 | {
34 | // Forward declaration of SSML document class
35 | namespace xml {class CSSMLDocument;}
36 |
37 | namespace tts
38 | {
39 | namespace base
40 | {
41 | // Forward declaration of the data manager class
42 | class CDataManager;
43 |
44 | ///
45 | /// @brief The base class for the TTS engine modules
46 | ///
47 | class CModule
48 | {
49 | public:
50 | /// Constructor
51 | CModule(const CDataManager *pDataManager) : m_pDataManager(pDataManager) {}
52 |
53 | /// Destructor
54 | virtual ~CModule() {}
55 |
56 | public:
57 | ///
58 | /// @brief The main processing procedure for the module operation
59 | ///
60 | /// @param [in] pSSMLDocument The input SSML document to be processed
61 | /// @param [out] pSSMLDocument Return the SSML document which has been processed
62 | ///
63 | /// @return Whether operation is successful
64 | /// @retval ERROR_SUCCESS The operation is successful
65 | ///
66 | virtual int process(xml::CSSMLDocument *pSSMLDocument) = 0;
67 |
68 | public:
69 | ///
70 | /// @brief Get the handle to the data manager, the returned type is specified by the caller
71 | ///
72 | template
73 | const _Ty *getDataManager() const
74 | {
75 | return cst_dynamic_cast(m_pDataManager);
76 | }
77 |
78 | private:
79 | /// Handle to the data manager which maintains the supporting data needed by TTS engine
80 | const CDataManager *m_pDataManager;
81 | };
82 |
83 | }//namespace base
84 | }
85 | }
86 |
87 | #endif//_CST_TTS_BASE_BASE_MODULE_H_
88 |
--------------------------------------------------------------------------------
/engine/ttsbase/datamanage/base_moduleapi.h:
--------------------------------------------------------------------------------
1 | //
2 | // Crystal Text-to-Speech Engine
3 | //
4 | // Copyright (c) 2007 THU-CUHK Joint Research Center for
5 | // Media Sciences, Technologies and Systems. All rights reserved.
6 | //
7 | // http://mjrc.sz.tsinghua.edu.cn
8 | //
9 | // Redistribution and use in source and binary forms, with or without
10 | // modification, is not allowed, unless a valid written license is
11 | // granted by THU-CUHK Joint Research Center.
12 | //
13 | // THU-CUHK Joint Research Center has the rights to create, modify,
14 | // copy, compile, remove, rename, explain and deliver the source codes.
15 | //
16 |
17 | ///
18 | /// @file
19 | ///
20 | /// @brief Head file for the module API related definitions
21 | ///
22 | /// History:
23 | /// - Version: 0.1.0
24 | /// Author: John (john.zywu@gmail.com)
25 | /// Date: 2008/05/25
26 | /// Changed: Created
27 | ///
28 |
29 | #ifndef _CST_TTS_BASE_BASE_MODULEAPI_H_
30 | #define _CST_TTS_BASE_BASE_MODULEAPI_H_
31 |
32 | #include