├── .cvsignore
├── .gitignore
├── .project
├── .pydevproject
├── .travis.yml
├── MANIFEST.in
├── README.md
├── example
├── Taiwanese.htsvoice
├── __init__.py
├── example.py
└── full.lab
├── hts_engine_python.c
├── setup.py
└── src
├── .cvsignore
├── AUTHORS
├── COPYING
├── INSTALL
├── Makefile.am
├── Makefile.mak
├── NEWS
├── README
├── bin
├── .cvsignore
├── Makefile.am
├── Makefile.mak
└── hts_engine.c
├── config
└── .cvsignore
├── configure.ac
├── include
└── HTS_engine.h
└── lib
├── .cvsignore
├── HTS_audio.c
├── HTS_engine.c
├── HTS_gstream.c
├── HTS_hidden.h
├── HTS_label.c
├── HTS_misc.c
├── HTS_model.c
├── HTS_pstream.c
├── HTS_sstream.c
├── HTS_vocoder.c
├── Makefile.am
└── Makefile.mak
/.cvsignore:
--------------------------------------------------------------------------------
1 | memo.txt
2 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | /build
2 | *.wav
3 | /dist/
4 | /venv/
5 | /MANIFEST
6 | /csv_data/
7 |
--------------------------------------------------------------------------------
/.project:
--------------------------------------------------------------------------------
1 |
2 |
3 | hts_engine_python
4 |
5 |
6 |
7 |
8 |
9 | org.python.pydev.PyDevBuilder
10 |
11 |
12 |
13 |
14 |
15 | org.python.pydev.pythonNature
16 |
17 |
18 |
--------------------------------------------------------------------------------
/.pydevproject:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | Default
6 | python 3.0
7 |
8 |
9 |
--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
1 | sudo: false
2 | language: python
3 | python:
4 | - '3.3'
5 | - '3.4'
6 | - '3.5'
7 | env:
8 | global:
9 | matrix:
10 | install:
11 | - pip install .
12 | - pip install python-coveralls
13 | branches:
14 | only:
15 | - master
16 | - /\d+\.\d+\.\d+/
17 | script:
18 | - coverage run example/example.py
19 | matrix:
20 | after_success:
21 | - coverage report
22 | - coveralls
23 | deploy:
24 | provider: pypi
25 | user: sih4sing5hong5
26 | password:
27 | secure: ZMIErucnJcngkW5gK8B+/nykwDvyHZvgkgw9bki2xTVm2szEvbS4Tz1NgWpEchFj36irHS46c0gGGLjfNNnJbZHRq0R/uXw56gile+BYW70XYGb7aDbQjZFiOGkoAdXiqjSeC4NnTktx4v7UMMJgMwe2R9Hs0EVOPc9iFbxBZdV9t5luIRg0fDBo6lDdtRTWug/Eh0ivpjNTcLoOCfYgjCoFVZuCZkHHLuSLPo9mjIbtINqL29ZyFGdK3rzv2GeV1l4kaHAIpXgh6Noh0MB/4WbB4+7IzbDHVSazxFAdQZhrYYhIaIRr6HRHT48OyFOtZEAwwS6qGiK06aKgeFzTBH/Nn2Ap0gTzgbLBSEpwVe3mPXqCcjXS9A5oJMRwfFQktef66ggLS1N5dmxwaTfMCCYYTvSceBXoqO6Eyo9sC/lCSn6Et6ljwn+7i2lByBW49w52FUtvOaP1fnXq18+VXMo2rYyZKZ97kOcJwupt5HwtifBA9BJhpvJZu0ByZ4SNlwxX/6nNK9Yce+HBjSLA95+gvj8lJG/0pY1aTCOQYoHo/uBBCr1nygVL8rZhZm2kpdg13zqYUl7ifBV5QtcxVf7CjVBnlkisazyNj0NW2vecC2kR8B5m/oIvTXl7lGeUH5r9eJJw2wFc29m+JG2o8+OlFJDI10mjHTFqOXlRc5w=
28 | on:
29 | tags: true
30 | repo: sih4sing5hong5/hts_engine_python
31 |
--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | recursive-include src *.h *.c
2 | include README.md
3 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | #HTS Engine Python Extension
2 |
3 | [](https://travis-ci.org/sih4sing5hong5/hts_engine_python)
4 |
5 | This package is an extension for whose want to use hts engine by Python 3.
6 |
7 | htsengine will update when hts_engine updating.
8 |
9 | ## Installation
10 | ```bash
11 | pip install htsengine
12 | ```
13 |
14 | ### Uninstallation
15 | ```bash
16 | pip uninstall htsengine
17 | ```
18 |
19 | ## Usage
20 | See `example/example.py`
21 | ```python3
22 | import htsengine
23 | model = 'Taiwanese.htsvoice'
24 | label = [line.rstrip() for line in open('full.lab')]
25 |
26 | s, f, n, a = htsengine.synthesize(model, label)
27 | import wave
28 | wavFile = wave.open('result.wav', 'wb')
29 | wavFile.setsampwidth(s)
30 | wavFile.setframerate(f)
31 | wavFile.setnchannels(n)
32 | wavFile.writeframesraw(a)
33 | wavFile.close()
34 | ```
35 |
36 | ## Update with newest hts_engine
37 | ```bash
38 | sudo apt-get install -y git-cvs
39 | rsync -av --delete rsync://hts-engine.cvs.sourceforge.net/cvsroot/hts-engine/ csv_data
40 | git checkout origin
41 | git cvsimport -p x -v -d `pwd`/csv_data/ hts_engine_API
42 | git checkout master
43 | git push origin origin
44 | ```
45 | refer:http://ghantoos.org/2010/11/11/migrating-sourceforge-cvs-source-repository-to-github/
46 |
47 | If I forget to update, please contact me by pull request on github.
48 |
49 | Github website: https://github.com/sih4sing5hong5/hts_engine_python
50 |
51 | ## Install HTS Engine Execution Files
52 | 1. Enter the src/ directory.
53 | 2. Run this two commands:
54 | ```bash
55 | aclocal
56 | autoconf
57 | ```
58 | 3. Generate the ChangeLog file, you choice one command to run:
59 | ```bash
60 | git log > ChangeLog # if you have the git version control
61 | cvs log > ChangeLog # if you have the cvs version control
62 | touch ChangeLog # otherwise
63 | ```
64 | 4. Run this command:
65 | ```bash
66 | automake --add-missing
67 | ```
68 | 5. And then see the src/INSTALL file.
69 |
70 | ##HTS Engine Readme
71 | ```
72 | ===============================================================================
73 | The HMM-Based Speech Synthesis Engine "hts_engine API" version 1.08
74 | release December 25, 2013
75 |
76 |
77 | The hts_engine API is an API version of hts_engine which has been released
78 | since HTS version 1.1. It has been being developed by the HTS working group
79 | (see "Who we are" below) and some graduate students in Nagoya Institute of
80 | Technology (see "AUTHORS" in the same directory).
81 |
82 | *******************************************************************************
83 | Copying
84 | *******************************************************************************
85 |
86 | The hts_engine API is released under the Modified BSD license (see
87 | http://www.opensource.org/). Using and distributing this software is free
88 | (without restriction including without limitation the rights to use, copy,
89 | modify, merge, publish, distribute, sublicense, and/or sell copies of this
90 | work, and to permit persons to whom this work is furnished to do so) subject to
91 | the conditions in the following license:
92 |
93 | /* ----------------------------------------------------------------- */
94 | /* The HMM-Based Speech Synthesis Engine "hts_engine API" */
95 | /* developed by HTS Working Group */
96 | /* http://hts-engine.sourceforge.net/ */
97 | /* ----------------------------------------------------------------- */
98 | /* */
99 | /* Copyright (c) 2001-2013 Nagoya Institute of Technology */
100 | /* Department of Computer Science */
101 | /* */
102 | /* 2001-2008 Tokyo Institute of Technology */
103 | /* Interdisciplinary Graduate School of */
104 | /* Science and Engineering */
105 | /* */
106 | /* All rights reserved. */
107 | /* */
108 | /* Redistribution and use in source and binary forms, with or */
109 | /* without modification, are permitted provided that the following */
110 | /* conditions are met: */
111 | /* */
112 | /* - Redistributions of source code must retain the above copyright */
113 | /* notice, this list of conditions and the following disclaimer. */
114 | /* - Redistributions in binary form must reproduce the above */
115 | /* copyright notice, this list of conditions and the following */
116 | /* disclaimer in the documentation and/or other materials provided */
117 | /* with the distribution. */
118 | /* - Neither the name of the HTS working group nor the names of its */
119 | /* contributors may be used to endorse or promote products derived */
120 | /* from this software without specific prior written permission. */
121 | /* */
122 | /* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND */
123 | /* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, */
124 | /* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
125 | /* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
126 | /* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS */
127 | /* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, */
128 | /* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED */
129 | /* TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, */
130 | /* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON */
131 | /* ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, */
132 | /* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY */
133 | /* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
134 | /* POSSIBILITY OF SUCH DAMAGE. */
135 | /* ----------------------------------------------------------------- */
136 |
137 | Although this software is free, we still offer no warranties and no
138 | maintenance. We will continue to endeavor to fix bugs and answer queries when
139 | can, but are not in a position to guarantee it. We will consider consultancy if
140 | desired, please contacts us for details.
141 |
142 | If you are using the hts_engine API in commercial environments, even though no
143 | license is required, we would be grateful if you let us know as it helps
144 | justify ourselves to our various sponsors. We also strongly encourage you to
145 |
146 | * refer to the use of hts_engine API in any publications that use this
147 | software
148 | * report bugs, where possible with bug fixes, that are found
149 |
150 | See also "COPYING" file in the current directory for details.
151 |
152 | *******************************************************************************
153 | Installation
154 | *******************************************************************************
155 |
156 | See "INSTALL" in the same directory for details.
157 |
158 | *******************************************************************************
159 | Documentation
160 | *******************************************************************************
161 |
162 | Reference manual of hts_engine API is available at
163 |
164 | http://hts-engine.sourceforge.net/
165 |
166 | *******************************************************************************
167 | Acknowledgements
168 | *******************************************************************************
169 |
170 | Keiichi Tokuda
171 | Shinji Sako
172 | Heiga Zen
173 | Keiichiro Oura
174 | Kazuhiro Nakamura
175 | Keijiro Saino
176 |
177 | *******************************************************************************
178 | Who we are
179 | *******************************************************************************
180 |
181 | The HTS working group is a voluntary group for developing the HMM-Based Speech
182 | Synthesis System. Current members are
183 |
184 | Keiichi Tokuda http://www.sp.nitech.ac.jp/~tokuda/
185 | (Produce and Design)
186 | Keiichiro Oura http://www.sp.nitech.ac.jp/~uratec/
187 | (Design and Development, Main Maintainer)
188 | Kei Hashimoto http://www.sp.nitech.ac.jp/~bonanza/
189 | Sayaka Shiota http://www.sp.nitech.ac.jp/~sayaka/
190 | Shinji Takaki http://www.sp.nitech.ac.jp/~k-prr44/
191 | Heiga Zen
192 | Junichi Yamagishi http://homepages.inf.ed.ac.uk/jyamagis/
193 | Tomoki Toda http://spalab.naist.jp/~tomoki/index_e.html
194 | Takashi Nose
195 | Shinji Sako http://www.mmsp.nitech.ac.jp/~sako/
196 | Alan W. Black http://www.cs.cmu.edu/~awb/
197 |
198 | and the members are dynamically changing. The current formal contact address of
199 | HTS working group and a mailing list for HTS users can be found at
200 | http://hts.sp.nitech.ac.jp/
201 | ===============================================================================
202 | ```
203 |
--------------------------------------------------------------------------------
/example/Taiwanese.htsvoice:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/i3thuan5/hts_engine_python/d6a67d42126533b1b4c7b67d8326f6d3159416b3/example/Taiwanese.htsvoice
--------------------------------------------------------------------------------
/example/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/i3thuan5/hts_engine_python/d6a67d42126533b1b4c7b67d8326f6d3159416b3/example/__init__.py
--------------------------------------------------------------------------------
/example/example.py:
--------------------------------------------------------------------------------
1 | import htsengine
2 | from os.path import join, dirname
3 | model = join(dirname(__file__), 'Taiwanese.htsvoice')
4 | label = [line.rstrip() for line in open(join(dirname(__file__), 'full.lab'))]
5 |
6 | s, f, n, a = htsengine.synthesize(model, label)
7 | import wave
8 | wavFile = wave.open('result.wav', 'wb')
9 | wavFile.setsampwidth(s)
10 | wavFile.setframerate(f)
11 | wavFile.setnchannels(n)
12 | wavFile.writeframesraw(a)
13 | wavFile.close()
14 |
--------------------------------------------------------------------------------
/example/full.lab:
--------------------------------------------------------------------------------
1 | x-sil+x/tiau7:x/su5:x!x@x/ku3:x^x_x
2 | sil-g+ua/tiau7:1/su5:0!1@1/ku3:0^5_5
3 | g-ua+ai/tiau7:1/su5:0!1@1/ku3:0^5_5
4 | ua-ai+tsh/tiau7:2/su5:0!1@1/ku3:1^4_5
5 | ai-tsh+ua/tiau7:2/su5:0!3@3/ku3:2^3_5
6 | tsh-ua+b/tiau7:2/su5:0!3@3/ku3:2^3_5
7 | ua-b+un/tiau7:7/su5:1!2@3/ku3:3^2_5
8 | b-un+l/tiau7:7/su5:1!2@3/ku3:3^2_5
9 | un-l+e/tiau7:7/su5:2!1@3/ku3:4^1_5
10 | l-e+sil/tiau7:7/su5:2!1@3/ku3:4^1_5
11 | x-sil+x/tiau7:x/su5:x!x@x/ku3:x^x_x
12 |
13 |
--------------------------------------------------------------------------------
/hts_engine_python.c:
--------------------------------------------------------------------------------
1 | #include
2 | #include "HTS_engine.h"
3 | #include "HTS_hidden.h"
4 |
5 | /* HTS_Engine_return_generated_speech: return generated speech */
6 | static PyObject* HTS_Engine_return_generated_speech(HTS_Engine * engine)
7 | {
8 | size_t i;
9 | double x;
10 | short temp;
11 | HTS_GStreamSet *gss = &engine->gss;
12 | int wav_bytes=sizeof(short)*HTS_GStreamSet_get_total_nsamples(gss);
13 | char *wav=malloc(wav_bytes);
14 |
15 | for (i = 0; i < HTS_GStreamSet_get_total_nsamples(gss); i++) {
16 | x = HTS_GStreamSet_get_speech(gss, i);
17 | if (x > 32767.0)
18 | temp = 32767;
19 | else if (x < -32768.0)
20 | temp = -32768;
21 | else
22 | temp = (short) x;
23 | memcpy(wav+i*sizeof(short),&temp,sizeof(short));
24 | }
25 | return PyByteArray_FromStringAndSize(wav,wav_bytes);
26 | }
27 | static PyObject *
28 | hts_engine_python_synthesize(PyObject *self, PyObject *args)
29 | {
30 | const char *model_filename;
31 | HTS_Engine engine;
32 | PyObject *full_label_list;
33 | int full_label_length;
34 | char **full_label_array;
35 | PyObject *PySampwidth,*PyFramerate,*PyChannels,*PyWav;
36 | PyObject *PyResult=PyTuple_New(4);
37 | int i;
38 |
39 | if (!PyArg_ParseTuple(args, "sO!", &model_filename,&PyList_Type, &full_label_list))
40 | return NULL;
41 |
42 | HTS_Engine_initialize(&engine);
43 |
44 | /* load HTS voices */
45 | if (HTS_Engine_load(&engine, (char**)&model_filename, 1) != TRUE) {
46 | fprintf(stderr, "Error: HTS voices cannot be loaded.\n");
47 | HTS_Engine_clear(&engine);
48 | exit(1);
49 | }
50 |
51 | full_label_length=PyList_Size(full_label_list);
52 | full_label_array=malloc(sizeof(char*)*full_label_length);
53 | for(i=0;i
59 |
60 | #include "HTS_engine.h"
61 |
62 | /* usage: output usage */
63 | void usage(void)
64 | {
65 | fprintf(stderr, "%s\n", HTS_COPYRIGHT);
66 | fprintf(stderr, "hts_engine - The HMM-based speech synthesis engine \"hts_engine API\"\n");
67 | fprintf(stderr, "\n");
68 | fprintf(stderr, " usage:\n");
69 | fprintf(stderr, " hts_engine [ options ] [ infile ]\n");
70 | fprintf(stderr, " options: [ def][ min-- max]\n");
71 | fprintf(stderr, " -m htsvoice : HTS voice files [ N/A]\n");
72 | fprintf(stderr, " -od s : filename of output label with duration [ N/A]\n");
73 | fprintf(stderr, " -om s : filename of output spectrum [ N/A]\n");
74 | fprintf(stderr, " -of s : filename of output log F0 [ N/A]\n");
75 | fprintf(stderr, " -ol s : filename of output low-pass filter [ N/A]\n");
76 | fprintf(stderr, " -or s : filename of output raw audio (generated speech) [ N/A]\n");
77 | fprintf(stderr, " -ow s : filename of output wav audio (generated speech) [ N/A]\n");
78 | fprintf(stderr, " -ot s : filename of output trace information [ N/A]\n");
79 | fprintf(stderr, " -vp : use phoneme alignment for duration [ N/A]\n");
80 | fprintf(stderr, " -i i f1 .. fi : enable interpolation & specify number(i),coefficient(f) [ N/A]\n");
81 | fprintf(stderr, " -s i : sampling frequency [ auto][ 1-- ]\n");
82 | fprintf(stderr, " -p i : frame period (point) [ auto][ 1-- ]\n");
83 | fprintf(stderr, " -a f : all-pass constant [ auto][ 0.0-- 1.0]\n");
84 | fprintf(stderr, " -b f : postfiltering coefficient [ 0.0][ 0.0-- 1.0]\n");
85 | fprintf(stderr, " -r f : speech speed rate [ 1.0][ 0.0-- ]\n");
86 | fprintf(stderr, " -fm f : additional half-tone [ 0.0][ -- ]\n");
87 | fprintf(stderr, " -u f : voiced/unvoiced threshold [ 0.5][ 0.0-- 1.0]\n");
88 | fprintf(stderr, " -jm f : weight of GV for spectrum [ 1.0][ 0.0-- ]\n");
89 | fprintf(stderr, " -jf f : weight of GV for log F0 [ 1.0][ 0.0-- ]\n");
90 | fprintf(stderr, " -g f : volume (dB) [ 0.0][ -- ]\n");
91 | fprintf(stderr, " -z i : audio buffer size (if i==0, turn off) [ 0][ 0-- ]\n");
92 | fprintf(stderr, " infile:\n");
93 | fprintf(stderr, " label file\n");
94 | fprintf(stderr, " note:\n");
95 | fprintf(stderr, " generated spectrum, log F0, and low-pass filter coefficient\n");
96 | fprintf(stderr, " sequences are saved in natural endian, binary (float) format.\n");
97 | fprintf(stderr, "\n");
98 |
99 | exit(0);
100 | }
101 |
102 | int main(int argc, char **argv)
103 | {
104 | int i;
105 | double f;
106 |
107 | /* hts_engine API */
108 | HTS_Engine engine;
109 |
110 | /* HTS voices */
111 | size_t num_voices;
112 | char **fn_voices;
113 |
114 | /* input label file name */
115 | char *labfn = NULL;
116 |
117 | /* output file pointers */
118 | FILE *durfp = NULL, *mgcfp = NULL, *lf0fp = NULL, *lpffp = NULL, *wavfp = NULL, *rawfp = NULL, *tracefp = NULL;
119 |
120 | /* interpolation weights */
121 | size_t num_interpolation_weights;
122 |
123 | /* output usage */
124 | if (argc <= 1)
125 | usage();
126 |
127 | /* initialize hts_engine API */
128 | HTS_Engine_initialize(&engine);
129 |
130 | /* get HTS voice file names */
131 | num_voices = 0;
132 | fn_voices = (char **) malloc(argc * sizeof(char *));
133 | for (i = 0; i < argc; i++) {
134 | if (argv[i][0] == '-' && argv[i][1] == 'm')
135 | fn_voices[num_voices++] = argv[++i];
136 | if (argv[i][0] == '-' && argv[i][1] == 'h')
137 | usage();
138 | }
139 | if (num_voices == 0) {
140 | fprintf(stderr, "Error: HTS voice must be specified.\n");
141 | free(fn_voices);
142 | exit(1);
143 | }
144 |
145 | /* load HTS voices */
146 | if (HTS_Engine_load(&engine, fn_voices, num_voices) != TRUE) {
147 | fprintf(stderr, "Error: HTS voices cannot be loaded.\n");
148 | free(fn_voices);
149 | HTS_Engine_clear(&engine);
150 | exit(1);
151 | }
152 | free(fn_voices);
153 |
154 | /* get options */
155 | while (--argc) {
156 | if (**++argv == '-') {
157 | switch (*(*argv + 1)) {
158 | case 'v':
159 | switch (*(*argv + 2)) {
160 | case 'p':
161 | HTS_Engine_set_phoneme_alignment_flag(&engine, TRUE);
162 | break;
163 | default:
164 | fprintf(stderr, "Error: Invalid option '-v%c'.\n", *(*argv + 2));
165 | HTS_Engine_clear(&engine);
166 | exit(1);
167 | }
168 | break;
169 | case 'o':
170 | switch (*(*argv + 2)) {
171 | case 'w':
172 | wavfp = fopen(*++argv, "wb");
173 | break;
174 | case 'r':
175 | rawfp = fopen(*++argv, "wb");
176 | break;
177 | case 'd':
178 | durfp = fopen(*++argv, "wt");
179 | break;
180 | case 'm':
181 | mgcfp = fopen(*++argv, "wb");
182 | break;
183 | case 'f':
184 | case 'p':
185 | lf0fp = fopen(*++argv, "wb");
186 | break;
187 | case 'l':
188 | lpffp = fopen(*++argv, "wb");
189 | break;
190 | case 't':
191 | tracefp = fopen(*++argv, "wt");
192 | break;
193 | default:
194 | fprintf(stderr, "Error: Invalid option '-o%c'.\n", *(*argv + 2));
195 | HTS_Engine_clear(&engine);
196 | exit(1);
197 | }
198 | --argc;
199 | break;
200 | case 'h':
201 | usage();
202 | break;
203 | case 'm':
204 | argv++; /* HTS voices were already loaded */
205 | --argc;
206 | break;
207 | case 's':
208 | HTS_Engine_set_sampling_frequency(&engine, (size_t) atoi(*++argv));
209 | --argc;
210 | break;
211 | case 'p':
212 | HTS_Engine_set_fperiod(&engine, (size_t) atoi(*++argv));
213 | --argc;
214 | break;
215 | case 'a':
216 | HTS_Engine_set_alpha(&engine, atof(*++argv));
217 | --argc;
218 | break;
219 | case 'b':
220 | HTS_Engine_set_beta(&engine, atof(*++argv));
221 | --argc;
222 | break;
223 | case 'r':
224 | HTS_Engine_set_speed(&engine, atof(*++argv));
225 | --argc;
226 | break;
227 | case 'f':
228 | switch (*(*argv + 2)) {
229 | case 'm':
230 | HTS_Engine_add_half_tone(&engine, atof(*++argv));
231 | break;
232 | default:
233 | fprintf(stderr, "Error: Invalid option '-f%c'.\n", *(*argv + 2));
234 | HTS_Engine_clear(&engine);
235 | exit(1);
236 | }
237 | --argc;
238 | break;
239 | case 'u':
240 | HTS_Engine_set_msd_threshold(&engine, 1, atof(*++argv));
241 | --argc;
242 | break;
243 | case 'i':
244 | num_interpolation_weights = atoi(*++argv);
245 | argc--;
246 | if (num_interpolation_weights != num_voices) {
247 | HTS_Engine_clear(&engine);
248 | exit(1);
249 | }
250 | for (i = 0; i < num_interpolation_weights; i++) {
251 | f = atof(*++argv);
252 | argc--;
253 | HTS_Engine_set_duration_interpolation_weight(&engine, i, f);
254 | HTS_Engine_set_parameter_interpolation_weight(&engine, i, 0, f);
255 | HTS_Engine_set_parameter_interpolation_weight(&engine, i, 1, f);
256 | HTS_Engine_set_gv_interpolation_weight(&engine, i, 0, f);
257 | HTS_Engine_set_gv_interpolation_weight(&engine, i, 1, f);
258 | }
259 | break;
260 | case 'j':
261 | switch (*(*argv + 2)) {
262 | case 'm':
263 | HTS_Engine_set_gv_weight(&engine, 0, atof(*++argv));
264 | break;
265 | case 'f':
266 | case 'p':
267 | HTS_Engine_set_gv_weight(&engine, 1, atof(*++argv));
268 | break;
269 | default:
270 | fprintf(stderr, "Error: Invalid option '-j%c'.\n", *(*argv + 2));
271 | HTS_Engine_clear(&engine);
272 | exit(1);
273 | }
274 | --argc;
275 | break;
276 | case 'g':
277 | HTS_Engine_set_volume(&engine, atof(*++argv));
278 | --argc;
279 | break;
280 | case 'z':
281 | HTS_Engine_set_audio_buff_size(&engine, (size_t) atoi(*++argv));
282 | --argc;
283 | break;
284 | default:
285 | fprintf(stderr, "Error: Invalid option '-%c'.\n", *(*argv + 1));
286 | HTS_Engine_clear(&engine);
287 | exit(1);
288 | }
289 | } else {
290 | labfn = *argv;
291 | }
292 | }
293 |
294 | /* synthesize */
295 | if (HTS_Engine_synthesize_from_fn(&engine, labfn) != TRUE) {
296 | fprintf(stderr, "Error: waveform cannot be synthesized.\n");
297 | HTS_Engine_clear(&engine);
298 | exit(1);
299 | }
300 |
301 | /* output */
302 | if (tracefp != NULL)
303 | HTS_Engine_save_information(&engine, tracefp);
304 | if (durfp != NULL)
305 | HTS_Engine_save_label(&engine, durfp);
306 | if (rawfp)
307 | HTS_Engine_save_generated_speech(&engine, rawfp);
308 | if (wavfp)
309 | HTS_Engine_save_riff(&engine, wavfp);
310 | if (mgcfp)
311 | HTS_Engine_save_generated_parameter(&engine, 0, mgcfp);
312 | if (lf0fp)
313 | HTS_Engine_save_generated_parameter(&engine, 1, lf0fp);
314 | if (lpffp)
315 | HTS_Engine_save_generated_parameter(&engine, 2, lpffp);
316 |
317 | /* reset */
318 | HTS_Engine_refresh(&engine);
319 |
320 | /* free memory */
321 | HTS_Engine_clear(&engine);
322 |
323 | /* close files */
324 | if (durfp != NULL)
325 | fclose(durfp);
326 | if (mgcfp != NULL)
327 | fclose(mgcfp);
328 | if (lf0fp != NULL)
329 | fclose(lf0fp);
330 | if (lpffp != NULL)
331 | fclose(lpffp);
332 | if (wavfp != NULL)
333 | fclose(wavfp);
334 | if (rawfp != NULL)
335 | fclose(rawfp);
336 | if (tracefp != NULL)
337 | fclose(tracefp);
338 |
339 | return 0;
340 | }
341 |
342 | HTS_ENGINE_C_END;
343 |
344 | #endif /* !HTS_ENGINE_C */
345 |
--------------------------------------------------------------------------------
/src/config/.cvsignore:
--------------------------------------------------------------------------------
1 | depcomp
2 | install-sh
3 | missing
4 | config.guess
5 | config.sub
6 |
--------------------------------------------------------------------------------
/src/configure.ac:
--------------------------------------------------------------------------------
1 | # -*- Autoconf -*-
2 | # Process this file with autoconf to produce a configure script.
3 |
4 | AC_PREREQ(2.59)
5 | AC_INIT(hts_engine_API, 1.10, hts-engine-users@lists.sourceforge.net, hts_engine_API)
6 | AC_CONFIG_AUX_DIR([config])
7 | AC_COPYRIGHT(Copyright 2001-2015 Nagoya Institute of Technology)
8 | AC_COPYRIGHT(Copyright 2001-2008 Tokyo Institute of Technology)
9 | AM_INIT_AUTOMAKE
10 |
11 | # Checks for C compiler
12 | AC_PROG_CC
13 | AM_PROG_CC_C_O
14 | AC_PROG_INSTALL
15 | AC_PROG_RANLIB
16 | AN_MAKEVAR([AR], [AC_PROG_AR])
17 | AN_PROGRAM([ar], [AC_PROG_AR])
18 | AC_DEFUN([AC_PROG_AR], [AC_CHECK_TOOL(AR, ar, :)])
19 | AC_PROG_AR
20 |
21 |
22 | # Checks for libraries.
23 | AC_CHECK_LIB([m], [log])
24 |
25 |
26 | # Checks for header files.
27 | AC_HEADER_STDC
28 | AC_CHECK_HEADERS([stdlib.h string.h])
29 |
30 |
31 | # Checks for typedefs, structures, and compiler characteristics.
32 | AC_C_CONST
33 | AC_TYPE_SIZE_T
34 |
35 | # Checks for library functions.
36 | AC_FUNC_VPRINTF
37 | AC_CHECK_FUNCS([sqrt strchr strrchr strstr])
38 |
39 |
40 | # Checks for embedded device compile
41 | AC_ARG_ENABLE(embedded, [ --enable-embedded turn on compiling for embedded devices (default=no)],,enable_embedded=no)
42 | AC_MSG_CHECKING(whether to enable compiling for embedded devices)
43 | if test x$enable_embedded = xyes; then
44 | AC_MSG_RESULT(yes)
45 | AC_DEFINE(HTS_EMBEDDED)
46 | else
47 | AC_MSG_RESULT(no)
48 | fi
49 |
50 |
51 | # Checks for using festival
52 | AC_ARG_ENABLE(festival, [ --enable-festival use memory allocation/free functions of speech tools (default=no)],,enable_festival=no)
53 | AC_MSG_CHECKING(whether to use memory allocation/free functions of speech tools)
54 | if test x$enable_festival = xyes; then
55 | AC_MSG_RESULT(yes)
56 | AC_DEFINE(FESTIVAL)
57 | else
58 | AC_MSG_RESULT(no)
59 | fi
60 |
61 |
62 | AC_CANONICAL_HOST
63 | AC_C_BIGENDIAN
64 |
65 |
66 | # Checks library for windows audio devices
67 | case "$host_os" in
68 | *win32* | *wince* | *cygwin* | *mingw* )
69 | AC_HAVE_LIBRARY([winmm],,AC_MSG_ERROR(No winmm))
70 | ;;
71 | *)
72 | ;;
73 | esac
74 |
75 |
76 | AC_CONFIG_FILES([Makefile bin/Makefile lib/Makefile])
77 |
78 | AC_OUTPUT
79 |
80 |
--------------------------------------------------------------------------------
/src/include/HTS_engine.h:
--------------------------------------------------------------------------------
1 | /* ----------------------------------------------------------------- */
2 | /* The HMM-Based Speech Synthesis Engine "hts_engine API" */
3 | /* developed by HTS Working Group */
4 | /* http://hts-engine.sourceforge.net/ */
5 | /* ----------------------------------------------------------------- */
6 | /* */
7 | /* Copyright (c) 2001-2015 Nagoya Institute of Technology */
8 | /* Department of Computer Science */
9 | /* */
10 | /* 2001-2008 Tokyo Institute of Technology */
11 | /* Interdisciplinary Graduate School of */
12 | /* Science and Engineering */
13 | /* */
14 | /* All rights reserved. */
15 | /* */
16 | /* Redistribution and use in source and binary forms, with or */
17 | /* without modification, are permitted provided that the following */
18 | /* conditions are met: */
19 | /* */
20 | /* - Redistributions of source code must retain the above copyright */
21 | /* notice, this list of conditions and the following disclaimer. */
22 | /* - Redistributions in binary form must reproduce the above */
23 | /* copyright notice, this list of conditions and the following */
24 | /* disclaimer in the documentation and/or other materials provided */
25 | /* with the distribution. */
26 | /* - Neither the name of the HTS working group nor the names of its */
27 | /* contributors may be used to endorse or promote products derived */
28 | /* from this software without specific prior written permission. */
29 | /* */
30 | /* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND */
31 | /* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, */
32 | /* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
33 | /* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
34 | /* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS */
35 | /* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, */
36 | /* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED */
37 | /* TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, */
38 | /* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON */
39 | /* ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, */
40 | /* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY */
41 | /* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
42 | /* POSSIBILITY OF SUCH DAMAGE. */
43 | /* ----------------------------------------------------------------- */
44 |
45 | #ifndef HTS_ENGINE_H
46 | #define HTS_ENGINE_H
47 |
48 | #ifdef __cplusplus
49 | #define HTS_ENGINE_H_START extern "C" {
50 | #define HTS_ENGINE_H_END }
51 | #else
52 | #define HTS_ENGINE_H_START
53 | #define HTS_ENGINE_H_END
54 | #endif /* __CPLUSPLUS */
55 |
56 | HTS_ENGINE_H_START;
57 |
58 | #include
59 |
60 | /* common ---------------------------------------------------------- */
61 |
62 | typedef char HTS_Boolean;
63 |
64 | #ifndef TRUE
65 | #define TRUE 1
66 | #endif /* !TRUE */
67 |
68 | #ifndef FALSE
69 | #define FALSE 0
70 | #endif /* !FALSE */
71 |
72 | #ifndef HTS_NODATA
73 | #define HTS_NODATA (-1.0e+10)
74 | #endif /* HTS_NODATA */
75 |
76 | /* copyright ------------------------------------------------------- */
77 |
78 | #define HTS_COPYRIGHT "The HMM-Based Speech Synthesis Engine \"hts_engine API\"\nVersion 1.10 (http://hts-engine.sourceforge.net/)\nCopyright (C) 2001-2015 Nagoya Institute of Technology\n 2001-2008 Tokyo Institute of Technology\nAll rights reserved.\n"
79 |
80 | /* audio ----------------------------------------------------------- */
81 |
82 | /* HTS_Audio: audio output wrapper */
83 | typedef struct _HTS_Audio {
84 | size_t sampling_frequency; /* sampling frequency */
85 | size_t max_buff_size; /* buffer size for audio output interface */
86 | short *buff; /* current buffer */
87 | size_t buff_size; /* current buffer size */
88 | void *audio_interface; /* audio interface specified in compile step */
89 | } HTS_Audio;
90 |
91 | /* model ----------------------------------------------------------- */
92 |
93 | /* HTS_Window: window coefficients to calculate dynamic features. */
94 | typedef struct _HTS_Window {
95 | size_t size; /* # of windows (static + deltas) */
96 | int *l_width; /* left width of windows */
97 | int *r_width; /* right width of windows */
98 | double **coefficient; /* window coefficient */
99 | size_t max_width; /* maximum width of windows */
100 | } HTS_Window;
101 |
102 | /* HTS_Pattern: list of patterns in a question and a tree. */
103 | typedef struct _HTS_Pattern {
104 | char *string; /* pattern string */
105 | struct _HTS_Pattern *next; /* pointer to the next pattern */
106 | } HTS_Pattern;
107 |
108 | /* HTS_Question: list of questions in a tree. */
109 | typedef struct _HTS_Question {
110 | char *string; /* name of this question */
111 | HTS_Pattern *head; /* pointer to the head of pattern list */
112 | struct _HTS_Question *next; /* pointer to the next question */
113 | } HTS_Question;
114 |
115 | /* HTS_Node: list of tree nodes in a tree. */
116 | typedef struct _HTS_Node {
117 | int index; /* index of this node */
118 | size_t pdf; /* index of PDF for this node (leaf node only) */
119 | struct _HTS_Node *yes; /* pointer to its child node (yes) */
120 | struct _HTS_Node *no; /* pointer to its child node (no) */
121 | struct _HTS_Node *next; /* pointer to the next node */
122 | HTS_Question *quest; /* question applied at this node */
123 | } HTS_Node;
124 |
125 | /* HTS_Tree: list of decision trees in a model. */
126 | typedef struct _HTS_Tree {
127 | HTS_Pattern *head; /* pointer to the head of pattern list for this tree */
128 | struct _HTS_Tree *next; /* pointer to next tree */
129 | HTS_Node *root; /* root node of this tree */
130 | size_t state; /* state index of this tree */
131 | } HTS_Tree;
132 |
133 | /* HTS_Model: set of PDFs, decision trees and questions. */
134 | typedef struct _HTS_Model {
135 | size_t vector_length; /* vector length (static features only) */
136 | size_t num_windows; /* # of windows for delta */
137 | HTS_Boolean is_msd; /* flag for MSD */
138 | size_t ntree; /* # of trees */
139 | size_t *npdf; /* # of PDFs at each tree */
140 | float ***pdf; /* PDFs */
141 | HTS_Tree *tree; /* pointer to the list of trees */
142 | HTS_Question *question; /* pointer to the list of questions */
143 | } HTS_Model;
144 |
145 | /* HTS_ModelSet: set of duration models, HMMs and GV models. */
146 | typedef struct _HTS_ModelSet {
147 | char *hts_voice_version; /* version of HTS voice format */
148 | size_t sampling_frequency; /* sampling frequency */
149 | size_t frame_period; /* frame period */
150 | size_t num_voices; /* # of HTS voices */
151 | size_t num_states; /* # of HMM states */
152 | size_t num_streams; /* # of streams */
153 | char *stream_type; /* stream type */
154 | char *fullcontext_format; /* fullcontext label format */
155 | char *fullcontext_version; /* version of fullcontext label */
156 | HTS_Question *gv_off_context; /* GV switch */
157 | char **option; /* options for each stream */
158 | HTS_Model *duration; /* duration PDFs and trees */
159 | HTS_Window *window; /* window coefficients for delta */
160 | HTS_Model **stream; /* parameter PDFs and trees */
161 | HTS_Model **gv; /* GV PDFs and trees */
162 | } HTS_ModelSet;
163 |
164 | /* label ----------------------------------------------------------- */
165 |
166 | /* HTS_LabelString: individual label string with time information */
167 | typedef struct _HTS_LabelString {
168 | struct _HTS_LabelString *next; /* pointer to next label string */
169 | char *name; /* label string */
170 | double start; /* start frame specified in the given label */
171 | double end; /* end frame specified in the given label */
172 | } HTS_LabelString;
173 |
174 | /* HTS_Label: list of label strings */
175 | typedef struct _HTS_Label {
176 | HTS_LabelString *head; /* pointer to the head of label string */
177 | size_t size; /* # of label strings */
178 | } HTS_Label;
179 |
180 | /* sstream --------------------------------------------------------- */
181 |
182 | /* HTS_SStream: individual state stream */
183 | typedef struct _HTS_SStream {
184 | size_t vector_length; /* vector length (static features only) */
185 | double **mean; /* mean vector sequence */
186 | double **vari; /* variance vector sequence */
187 | double *msd; /* MSD parameter sequence */
188 | size_t win_size; /* # of windows (static + deltas) */
189 | int *win_l_width; /* left width of windows */
190 | int *win_r_width; /* right width of windows */
191 | double **win_coefficient; /* window cofficients */
192 | size_t win_max_width; /* maximum width of windows */
193 | double *gv_mean; /* mean vector of GV */
194 | double *gv_vari; /* variance vector of GV */
195 | HTS_Boolean *gv_switch; /* GV flag sequence */
196 | } HTS_SStream;
197 |
198 | /* HTS_SStreamSet: set of state stream */
199 | typedef struct _HTS_SStreamSet {
200 | HTS_SStream *sstream; /* state streams */
201 | size_t nstream; /* # of streams */
202 | size_t nstate; /* # of states */
203 | size_t *duration; /* duration sequence */
204 | size_t total_state; /* total state */
205 | size_t total_frame; /* total frame */
206 | } HTS_SStreamSet;
207 |
208 | /* pstream --------------------------------------------------------- */
209 |
210 | /* HTS_SMatrices: matrices/vectors used in the speech parameter generation algorithm. */
211 | typedef struct _HTS_SMatrices {
212 | double **mean; /* mean vector sequence */
213 | double **ivar; /* inverse diag variance sequence */
214 | double *g; /* vector used in the forward substitution */
215 | double **wuw; /* W' U^-1 W */
216 | double *wum; /* W' U^-1 mu */
217 | } HTS_SMatrices;
218 |
219 | /* HTS_PStream: individual PDF stream. */
220 | typedef struct _HTS_PStream {
221 | size_t vector_length; /* vector length (static features only) */
222 | size_t length; /* stream length */
223 | size_t width; /* width of dynamic window */
224 | double **par; /* output parameter vector */
225 | HTS_SMatrices sm; /* matrices for parameter generation */
226 | size_t win_size; /* # of windows (static + deltas) */
227 | int *win_l_width; /* left width of windows */
228 | int *win_r_width; /* right width of windows */
229 | double **win_coefficient; /* window coefficients */
230 | HTS_Boolean *msd_flag; /* Boolean sequence for MSD */
231 | double *gv_mean; /* mean vector of GV */
232 | double *gv_vari; /* variance vector of GV */
233 | HTS_Boolean *gv_switch; /* GV flag sequence */
234 | size_t gv_length; /* frame length for GV calculation */
235 | } HTS_PStream;
236 |
237 | /* HTS_PStreamSet: set of PDF streams. */
238 | typedef struct _HTS_PStreamSet {
239 | HTS_PStream *pstream; /* PDF streams */
240 | size_t nstream; /* # of PDF streams */
241 | size_t total_frame; /* total frame */
242 | } HTS_PStreamSet;
243 |
244 | /* gstream --------------------------------------------------------- */
245 |
246 | /* HTS_GStream: generated parameter stream. */
247 | typedef struct _HTS_GStream {
248 | size_t vector_length; /* vector length (static features only) */
249 | double **par; /* generated parameter */
250 | } HTS_GStream;
251 |
252 | /* HTS_GStreamSet: set of generated parameter stream. */
253 | typedef struct _HTS_GStreamSet {
254 | size_t total_nsample; /* total sample */
255 | size_t total_frame; /* total frame */
256 | size_t nstream; /* # of streams */
257 | HTS_GStream *gstream; /* generated parameter streams */
258 | double *gspeech; /* generated speech */
259 | } HTS_GStreamSet;
260 |
261 | /* engine ---------------------------------------------------------- */
262 |
263 | /* HTS_Condition: synthesis condition */
264 | typedef struct _HTS_Condition {
265 | /* global */
266 | size_t sampling_frequency; /* sampling frequency */
267 | size_t fperiod; /* frame period */
268 | size_t audio_buff_size; /* audio buffer size (for audio device) */
269 | HTS_Boolean stop; /* stop flag */
270 | double volume; /* volume */
271 | double *msd_threshold; /* MSD thresholds */
272 | double *gv_weight; /* GV weights */
273 |
274 | /* duration */
275 | HTS_Boolean phoneme_alignment_flag; /* flag for using phoneme alignment in label */
276 | double speed; /* speech speed */
277 |
278 | /* spectrum */
279 | size_t stage; /* if stage=0 then gamma=0 else gamma=-1/stage */
280 | HTS_Boolean use_log_gain; /* log gain flag (for LSP) */
281 | double alpha; /* all-pass constant */
282 | double beta; /* postfiltering coefficient */
283 |
284 | /* log F0 */
285 | double additional_half_tone; /* additional half tone */
286 |
287 | /* interpolation weights */
288 | double *duration_iw; /* weights for duration interpolation */
289 | double **parameter_iw; /* weights for parameter interpolation */
290 | double **gv_iw; /* weights for GV interpolation */
291 | } HTS_Condition;
292 |
293 | /* HTS_Engine: Engine itself. */
294 | typedef struct _HTS_Engine {
295 | HTS_Condition condition; /* synthesis condition */
296 | HTS_Audio audio; /* audio output */
297 | HTS_ModelSet ms; /* set of duration models, HMMs and GV models */
298 | HTS_Label label; /* label */
299 | HTS_SStreamSet sss; /* set of state streams */
300 | HTS_PStreamSet pss; /* set of PDF streams */
301 | HTS_GStreamSet gss; /* set of generated parameter streams */
302 | } HTS_Engine;
303 |
304 | /* engine method --------------------------------------------------- */
305 |
306 | /* HTS_Engine_initialize: initialize engine */
307 | void HTS_Engine_initialize(HTS_Engine * engine);
308 |
309 | /* HTS_Engine_load: load HTS voices */
310 | HTS_Boolean HTS_Engine_load(HTS_Engine * engine, char **voices, size_t num_voices);
311 |
312 | /* HTS_Engine_set_sampling_frequency: set sampling fraquency */
313 | void HTS_Engine_set_sampling_frequency(HTS_Engine * engine, size_t i);
314 |
315 | /* HTS_Engine_get_sampling_frequency: get sampling frequency */
316 | size_t HTS_Engine_get_sampling_frequency(HTS_Engine * engine);
317 |
318 | /* HTS_Engine_set_fperiod: set frame period */
319 | void HTS_Engine_set_fperiod(HTS_Engine * engine, size_t i);
320 |
321 | /* HTS_Engine_get_fperiod: get frame period */
322 | size_t HTS_Engine_get_fperiod(HTS_Engine * engine);
323 |
324 | /* HTS_Engine_set_audio_buff_size: set audio buffer size */
325 | void HTS_Engine_set_audio_buff_size(HTS_Engine * engine, size_t i);
326 |
327 | /* HTS_Engine_get_audio_buff_size: get audio buffer size */
328 | size_t HTS_Engine_get_audio_buff_size(HTS_Engine * engine);
329 |
330 | /* HTS_Engine_set_stop_flag: set stop flag */
331 | void HTS_Engine_set_stop_flag(HTS_Engine * engine, HTS_Boolean b);
332 |
333 | /* HTS_Engine_get_stop_flag: get stop flag */
334 | HTS_Boolean HTS_Engine_get_stop_flag(HTS_Engine * engine);
335 |
336 | /* HTS_Engine_set_volume: set volume in db */
337 | void HTS_Engine_set_volume(HTS_Engine * engine, double f);
338 |
339 | /* HTS_Engine_get_volume: get volume in db */
340 | double HTS_Engine_get_volume(HTS_Engine * engine);
341 |
342 | /* HTS_Egnine_set_msd_threshold: set MSD threshold */
343 | void HTS_Engine_set_msd_threshold(HTS_Engine * engine, size_t stream_index, double f);
344 |
345 | /* HTS_Engine_get_msd_threshold: get MSD threshold */
346 | double HTS_Engine_get_msd_threshold(HTS_Engine * engine, size_t stream_index);
347 |
348 | /* HTS_Engine_set_gv_weight: set GV weight */
349 | void HTS_Engine_set_gv_weight(HTS_Engine * engine, size_t stream_index, double f);
350 |
351 | /* HTS_Engine_get_gv_weight: get GV weight */
352 | double HTS_Engine_get_gv_weight(HTS_Engine * engine, size_t stream_index);
353 |
354 | /* HTS_Engine_set_speed: set speech speed */
355 | void HTS_Engine_set_speed(HTS_Engine * engine, double f);
356 |
357 | /* HTS_Engine_set_phoneme_alignment_flag: set flag for using phoneme alignment in label */
358 | void HTS_Engine_set_phoneme_alignment_flag(HTS_Engine * engine, HTS_Boolean b);
359 |
360 | /* HTS_Engine_set_alpha: set alpha */
361 | void HTS_Engine_set_alpha(HTS_Engine * engine, double f);
362 |
363 | /* HTS_Engine_get_alpha: get alpha */
364 | double HTS_Engine_get_alpha(HTS_Engine * engine);
365 |
366 | /* HTS_Engine_set_beta: set beta */
367 | void HTS_Engine_set_beta(HTS_Engine * engine, double f);
368 |
369 | /* HTS_Engine_get_beta: get beta */
370 | double HTS_Engine_get_beta(HTS_Engine * engine);
371 |
372 | /* HTS_Engine_add_half_tone: add half tone */
373 | void HTS_Engine_add_half_tone(HTS_Engine * engine, double f);
374 |
375 | /* HTS_Engine_set_duration_interpolation_weight: set interpolation weight for duration */
376 | void HTS_Engine_set_duration_interpolation_weight(HTS_Engine * engine, size_t voice_index, double f);
377 |
378 | /* HTS_Engine_get_duration_interpolation_weight: get interpolation weight for duration */
379 | double HTS_Engine_get_duration_interpolation_weight(HTS_Engine * engine, size_t voice_index);
380 |
381 | /* HTS_Engine_set_parameter_interpolation_weight: set interpolation weight for parameter */
382 | void HTS_Engine_set_parameter_interpolation_weight(HTS_Engine * engine, size_t voice_index, size_t stream_index, double f);
383 |
384 | /* HTS_Engine_get_parameter_interpolation_weight: get interpolation weight for parameter */
385 | double HTS_Engine_get_parameter_interpolation_weight(HTS_Engine * engine, size_t voice_index, size_t stream_index);
386 |
387 | /* HTS_Engine_set_gv_interpolation_weight: set interpolation weight for GV */
388 | void HTS_Engine_set_gv_interpolation_weight(HTS_Engine * engine, size_t voice_index, size_t stream_index, double f);
389 |
390 | /* HTS_Engine_get_gv_interpolation_weight: get interpolation weight for GV */
391 | double HTS_Engine_get_gv_interpolation_weight(HTS_Engine * engine, size_t voice_index, size_t stream_index);
392 |
393 | /* HTS_Engine_get_total_state: get total number of state */
394 | size_t HTS_Engine_get_total_state(HTS_Engine * engine);
395 |
396 | /* HTS_Engine_set_state_mean: set mean value of state */
397 | void HTS_Engine_set_state_mean(HTS_Engine * engine, size_t stream_index, size_t state_index, size_t vector_index, double f);
398 |
399 | /* HTS_Engine_get_state_mean: get mean value of state */
400 | double HTS_Engine_get_state_mean(HTS_Engine * engine, size_t stream_index, size_t state_index, size_t vector_index);
401 |
402 | /* HTS_Engine_get_state_duration: get state duration */
403 | size_t HTS_Engine_get_state_duration(HTS_Engine * engine, size_t state_index);
404 |
405 | /* HTS_Engine_get_nvoices: get number of voices */
406 | size_t HTS_Engine_get_nvoices(HTS_Engine * engine);
407 |
408 | /* HTS_Engine_get_nstream: get number of stream */
409 | size_t HTS_Engine_get_nstream(HTS_Engine * engine);
410 |
411 | /* HTS_Engine_get_nstate: get number of state */
412 | size_t HTS_Engine_get_nstate(HTS_Engine * engine);
413 |
414 | /* HTS_Engine_get_fullcontext_label_format: get full context label format */
415 | const char *HTS_Engine_get_fullcontext_label_format(HTS_Engine * engine);
416 |
417 | /* HTS_Engine_get_fullcontext_label_version: get full context label version */
418 | const char *HTS_Engine_get_fullcontext_label_version(HTS_Engine * engine);
419 |
420 | /* HTS_Engine_get_total_frame: get total number of frame */
421 | size_t HTS_Engine_get_total_frame(HTS_Engine * engine);
422 |
423 | /* HTS_Engine_get_nsamples: get number of samples */
424 | size_t HTS_Engine_get_nsamples(HTS_Engine * engine);
425 |
426 | /* HTS_Engine_get_generated_parameter: output generated parameter */
427 | double HTS_Engine_get_generated_parameter(HTS_Engine * engine, size_t stream_index, size_t frame_index, size_t vector_index);
428 |
429 | /* HTS_Engine_get_generated_speech: output generated speech */
430 | double HTS_Engine_get_generated_speech(HTS_Engine * engine, size_t index);
431 |
432 | /* HTS_Engine_synthesize_from_fn: synthesize speech from file name */
433 | HTS_Boolean HTS_Engine_synthesize_from_fn(HTS_Engine * engine, const char *fn);
434 |
435 | /* HTS_Engine_synthesize_from_strings: synthesize speech from string list */
436 | HTS_Boolean HTS_Engine_synthesize_from_strings(HTS_Engine * engine, char **lines, size_t num_lines);
437 |
438 | /* HTS_Engine_generate_state_sequence_from_fn: generate state sequence from file name (1st synthesis step) */
439 | HTS_Boolean HTS_Engine_generate_state_sequence_from_fn(HTS_Engine * engine, const char *fn);
440 |
441 | /* HTS_Engine_generate_state_sequence_from_strings: generate state sequence from string list (1st synthesis step) */
442 | HTS_Boolean HTS_Engine_generate_state_sequence_from_strings(HTS_Engine * engine, char **lines, size_t num_lines);
443 |
444 | /* HTS_Engine_generate_parameter_sequence: generate parameter sequence (2nd synthesis step) */
445 | HTS_Boolean HTS_Engine_generate_parameter_sequence(HTS_Engine * engine);
446 |
447 | /* HTS_Engine_generate_sample_sequence: generate sample sequence (3rd synthesis step) */
448 | HTS_Boolean HTS_Engine_generate_sample_sequence(HTS_Engine * engine);
449 |
450 | /* HTS_Engine_save_information: save trace information */
451 | void HTS_Engine_save_information(HTS_Engine * engine, FILE * fp);
452 |
453 | /* HTS_Engine_save_label: save label with time */
454 | void HTS_Engine_save_label(HTS_Engine * engine, FILE * fp);
455 |
456 | /* HTS_Engine_save_generated_parameter: save generated parameter */
457 | void HTS_Engine_save_generated_parameter(HTS_Engine * engine, size_t stream_index, FILE * fp);
458 |
459 | /* HTS_Engine_save_generated_speech: save generated speech */
460 | void HTS_Engine_save_generated_speech(HTS_Engine * engine, FILE * fp);
461 |
462 | /* HTS_Engine_save_riff: save RIFF format file */
463 | void HTS_Engine_save_riff(HTS_Engine * engine, FILE * fp);
464 |
465 | /* HTS_Engine_refresh: free memory per one time synthesis */
466 | void HTS_Engine_refresh(HTS_Engine * engine);
467 |
468 | /* HTS_Engine_clear: free engine */
469 | void HTS_Engine_clear(HTS_Engine * engine);
470 |
471 | HTS_ENGINE_H_END;
472 |
473 | #endif /* !HTS_ENGINE_H */
474 |
--------------------------------------------------------------------------------
/src/lib/.cvsignore:
--------------------------------------------------------------------------------
1 | Makefile
2 | Makefile.in
3 | libHTSEngine.a
4 | .deps
5 |
--------------------------------------------------------------------------------
/src/lib/HTS_audio.c:
--------------------------------------------------------------------------------
1 | /* ----------------------------------------------------------------- */
2 | /* The HMM-Based Speech Synthesis Engine "hts_engine API" */
3 | /* developed by HTS Working Group */
4 | /* http://hts-engine.sourceforge.net/ */
5 | /* ----------------------------------------------------------------- */
6 | /* */
7 | /* Copyright (c) 2001-2015 Nagoya Institute of Technology */
8 | /* Department of Computer Science */
9 | /* */
10 | /* 2001-2008 Tokyo Institute of Technology */
11 | /* Interdisciplinary Graduate School of */
12 | /* Science and Engineering */
13 | /* */
14 | /* All rights reserved. */
15 | /* */
16 | /* Redistribution and use in source and binary forms, with or */
17 | /* without modification, are permitted provided that the following */
18 | /* conditions are met: */
19 | /* */
20 | /* - Redistributions of source code must retain the above copyright */
21 | /* notice, this list of conditions and the following disclaimer. */
22 | /* - Redistributions in binary form must reproduce the above */
23 | /* copyright notice, this list of conditions and the following */
24 | /* disclaimer in the documentation and/or other materials provided */
25 | /* with the distribution. */
26 | /* - Neither the name of the HTS working group nor the names of its */
27 | /* contributors may be used to endorse or promote products derived */
28 | /* from this software without specific prior written permission. */
29 | /* */
30 | /* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND */
31 | /* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, */
32 | /* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
33 | /* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
34 | /* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS */
35 | /* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, */
36 | /* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED */
37 | /* TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, */
38 | /* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON */
39 | /* ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, */
40 | /* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY */
41 | /* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
42 | /* POSSIBILITY OF SUCH DAMAGE. */
43 | /* ----------------------------------------------------------------- */
44 |
45 | #ifndef HTS_AUDIO_C
46 | #define HTS_AUDIO_C
47 |
48 | #ifdef __cplusplus
49 | #define HTS_AUDIO_C_START extern "C" {
50 | #define HTS_AUDIO_C_END }
51 | #else
52 | #define HTS_AUDIO_C_START
53 | #define HTS_AUDIO_C_END
54 | #endif /* __CPLUSPLUS */
55 |
56 | HTS_AUDIO_C_START;
57 |
58 | #if !defined(AUDIO_PLAY_WIN32) && !defined(AUDIO_PLAY_PORTAUDIO) && !defined(AUDIO_PLAY_NONE)
59 | #if defined(__WINCE__) || defined(_WINCE) || defined(_WINCE) || defined(__WINCE) || defined(__WIN32__) || defined(__WIN32) || defined(_WIN32) || defined(WIN32) || defined(__CYGWIN__) || defined(__MINGW32__)
60 | #define AUDIO_PLAY_WIN32
61 | #else
62 | #define AUDIO_PLAY_NONE
63 | #endif /* __WINCE__ || _WINCE || _WINCE || __WINCE || __WIN32__ || __WIN32 || _WIN32 || WIN32 || __CYGWIN__ || __MINGW32__ */
64 | #endif /* !AUDIO_PLAY_WIN32 && !AUDIO_PLAY_PORTAUDIO && !AUDIO_PLAY_NONE */
65 |
66 | /* hts_engine libralies */
67 | #include "HTS_hidden.h"
68 |
69 | #ifdef AUDIO_PLAY_WIN32
70 |
71 | #include
72 | #include
73 | #define AUDIO_WAIT_BUFF_MS 10 /* wait time (0.01 sec) */
74 | #define AUDIO_CHANNEL 1 /* monaural */
75 | #ifdef _M_X64
76 | #define AUDIO_POINTER_TYPE DWORD_PTR
77 | #else
78 | #define AUDIO_POINTER_TYPE DWORD
79 | #endif
80 |
81 | /* HTS_Audio: audio interface for Windows */
82 | typedef struct _HTS_AudioInterface {
83 | HWAVEOUT hwaveout; /* audio device handle */
84 | WAVEFORMATEX waveformatex; /* wave formatex */
85 | unsigned char which_buff; /* double buffering flag */
86 | HTS_Boolean now_buff_1; /* double buffering flag */
87 | HTS_Boolean now_buff_2; /* double buffering flag */
88 | WAVEHDR buff_1; /* buffer */
89 | WAVEHDR buff_2; /* buffer */
90 | } HTS_AudioInterface;
91 |
92 | /* HTS_AudioInterface_callback_function: callback function from audio device */
93 | static void CALLBACK HTS_AudioInterface_callback_function(HWAVEOUT hwaveout, UINT msg, AUDIO_POINTER_TYPE user_data, AUDIO_POINTER_TYPE param1, AUDIO_POINTER_TYPE param2)
94 | {
95 | WAVEHDR *wavehdr = (WAVEHDR *) param1;
96 | HTS_AudioInterface *audio_interface = (HTS_AudioInterface *) user_data;
97 |
98 | if (msg == MM_WOM_DONE && wavehdr && (wavehdr->dwFlags & WHDR_DONE)) {
99 | if (audio_interface->now_buff_1 == TRUE && wavehdr == &(audio_interface->buff_1)) {
100 | audio_interface->now_buff_1 = FALSE;
101 | } else if (audio_interface->now_buff_2 == TRUE && wavehdr == &(audio_interface->buff_2)) {
102 | audio_interface->now_buff_2 = FALSE;
103 | }
104 | }
105 | }
106 |
107 | /* HTS_AudioInterface_write: send buffer to audio device */
108 | static HTS_Boolean HTS_AudioInterface_write(HTS_AudioInterface * audio_interface, const short *buff, size_t buff_size)
109 | {
110 | MMRESULT result;
111 |
112 | if (audio_interface->which_buff == 1) {
113 | while (audio_interface->now_buff_1 == TRUE)
114 | Sleep(AUDIO_WAIT_BUFF_MS);
115 | audio_interface->now_buff_1 = TRUE;
116 | audio_interface->which_buff = 2;
117 | memcpy(audio_interface->buff_1.lpData, buff, buff_size * sizeof(short));
118 | audio_interface->buff_1.dwBufferLength = (DWORD) buff_size *sizeof(short);
119 | result = waveOutWrite(audio_interface->hwaveout, &(audio_interface->buff_1), sizeof(WAVEHDR));
120 | } else {
121 | while (audio_interface->now_buff_2 == TRUE)
122 | Sleep(AUDIO_WAIT_BUFF_MS);
123 | audio_interface->now_buff_2 = TRUE;
124 | audio_interface->which_buff = 1;
125 | memcpy(audio_interface->buff_2.lpData, buff, buff_size * sizeof(short));
126 | audio_interface->buff_2.dwBufferLength = (DWORD) buff_size *sizeof(short);
127 | result = waveOutWrite(audio_interface->hwaveout, &(audio_interface->buff_2), sizeof(WAVEHDR));
128 | }
129 |
130 | if (result != MMSYSERR_NOERROR)
131 | HTS_error(0, "hts_engine: Cannot send datablocks to your output audio device to play waveform.\n");
132 |
133 | return (result == MMSYSERR_NOERROR) ? TRUE : FALSE;
134 | }
135 |
136 | /* HTS_AudioInterface_close: close audio device */
137 | static void HTS_AudioInterface_close(HTS_AudioInterface * audio_interface)
138 | {
139 | MMRESULT result;
140 |
141 | /* stop audio */
142 | result = waveOutReset(audio_interface->hwaveout);
143 | if (result != MMSYSERR_NOERROR)
144 | HTS_error(0, "hts_engine: Cannot stop and reset your output audio device.\n");
145 | /* unprepare */
146 | result = waveOutUnprepareHeader(audio_interface->hwaveout, &(audio_interface->buff_1), sizeof(WAVEHDR));
147 | if (result != MMSYSERR_NOERROR)
148 | HTS_error(0, "hts_engine: Cannot cleanup the audio datablocks to play waveform.\n");
149 | result = waveOutUnprepareHeader(audio_interface->hwaveout, &(audio_interface->buff_2), sizeof(WAVEHDR));
150 | if (result != MMSYSERR_NOERROR)
151 | HTS_error(0, "hts_engine: Cannot cleanup the audio datablocks to play waveform.\n");
152 | /* close */
153 | result = waveOutClose(audio_interface->hwaveout);
154 | if (result != MMSYSERR_NOERROR)
155 | HTS_error(0, "hts_engine: Failed to close your output audio device.\n");
156 | if (audio_interface->buff_1.lpData != NULL)
157 | HTS_free(audio_interface->buff_1.lpData);
158 | if (audio_interface->buff_2.lpData != NULL)
159 | HTS_free(audio_interface->buff_2.lpData);
160 |
161 | HTS_free(audio_interface);
162 | }
163 |
164 | static HTS_AudioInterface *HTS_AudioInterface_open(size_t sampling_frequency, size_t max_buff_size)
165 | {
166 | HTS_AudioInterface *audio_interface;
167 | MMRESULT result;
168 |
169 | /* make audio interface */
170 | audio_interface = (HTS_AudioInterface *) HTS_calloc(1, sizeof(HTS_AudioInterface));
171 |
172 | audio_interface->hwaveout = 0;
173 | audio_interface->which_buff = 1;
174 | audio_interface->now_buff_1 = FALSE;
175 | audio_interface->now_buff_2 = FALSE;
176 |
177 | /* format */
178 | audio_interface->waveformatex.wFormatTag = WAVE_FORMAT_PCM;
179 | audio_interface->waveformatex.nChannels = AUDIO_CHANNEL;
180 | audio_interface->waveformatex.nSamplesPerSec = (DWORD) sampling_frequency;
181 | audio_interface->waveformatex.wBitsPerSample = sizeof(short) * 8;
182 | audio_interface->waveformatex.nBlockAlign = AUDIO_CHANNEL * audio_interface->waveformatex.wBitsPerSample / 8;
183 | audio_interface->waveformatex.nAvgBytesPerSec = (DWORD) sampling_frequency *audio_interface->waveformatex.nBlockAlign;
184 | /* open */
185 | result = waveOutOpen(&audio_interface->hwaveout, WAVE_MAPPER, &audio_interface->waveformatex, (AUDIO_POINTER_TYPE) HTS_AudioInterface_callback_function, (AUDIO_POINTER_TYPE) audio_interface, CALLBACK_FUNCTION);
186 | if (result != MMSYSERR_NOERROR) {
187 | HTS_error(0, "hts_engine: Failed to open your output audio_interface device to play waveform.\n");
188 | HTS_free(audio_interface);
189 | return NULL;
190 | }
191 |
192 | /* prepare */
193 | audio_interface->buff_1.lpData = (LPSTR) HTS_calloc(max_buff_size, sizeof(short));
194 | audio_interface->buff_1.dwBufferLength = (DWORD) max_buff_size *sizeof(short);
195 | audio_interface->buff_1.dwFlags = WHDR_BEGINLOOP | WHDR_ENDLOOP;
196 | audio_interface->buff_1.dwLoops = 1;
197 | audio_interface->buff_1.lpNext = 0;
198 | audio_interface->buff_1.reserved = 0;
199 | result = waveOutPrepareHeader(audio_interface->hwaveout, &(audio_interface->buff_1), sizeof(WAVEHDR));
200 | if (result != MMSYSERR_NOERROR) {
201 | HTS_error(0, "hts_engine: Cannot initialize audio_interface datablocks to play waveform.\n");
202 | HTS_free(audio_interface->buff_1.lpData);
203 | HTS_free(audio_interface);
204 | return NULL;
205 | }
206 | audio_interface->buff_2.lpData = (LPSTR) HTS_calloc(max_buff_size, sizeof(short));
207 | audio_interface->buff_2.dwBufferLength = (DWORD) max_buff_size *sizeof(short);
208 | audio_interface->buff_2.dwFlags = WHDR_BEGINLOOP | WHDR_ENDLOOP;
209 | audio_interface->buff_2.dwLoops = 1;
210 | audio_interface->buff_2.lpNext = 0;
211 | audio_interface->buff_2.reserved = 0;
212 | result = waveOutPrepareHeader(audio_interface->hwaveout, &(audio_interface->buff_2), sizeof(WAVEHDR));
213 | if (result != MMSYSERR_NOERROR) {
214 | HTS_error(0, "hts_engine: Cannot initialize audio_interface datablocks to play waveform.\n");
215 | HTS_free(audio_interface->buff_1.lpData);
216 | HTS_free(audio_interface->buff_2.lpData);
217 | HTS_free(audio_interface);
218 | return NULL;
219 | }
220 |
221 | return audio_interface;
222 | }
223 |
224 | /* HTS_Audio_initialize: initialize audio */
225 | void HTS_Audio_initialize(HTS_Audio * audio)
226 | {
227 | if (audio == NULL)
228 | return;
229 |
230 | audio->sampling_frequency = 0;
231 | audio->max_buff_size = 0;
232 | audio->buff = NULL;
233 | audio->buff_size = 0;
234 | audio->audio_interface = NULL;
235 | }
236 |
237 | /* HTS_Audio_set_parameter: set parameters for audio */
238 | void HTS_Audio_set_parameter(HTS_Audio * audio, size_t sampling_frequency, size_t max_buff_size)
239 | {
240 | if (audio == NULL)
241 | return;
242 |
243 | if (audio->sampling_frequency == sampling_frequency && audio->max_buff_size == max_buff_size)
244 | return;
245 |
246 | HTS_Audio_clear(audio);
247 |
248 | if (sampling_frequency == 0 || max_buff_size == 0)
249 | return;
250 |
251 | audio->audio_interface = HTS_AudioInterface_open(sampling_frequency, max_buff_size);
252 | if (audio->audio_interface == NULL)
253 | return;
254 |
255 | audio->sampling_frequency = sampling_frequency;
256 | audio->max_buff_size = max_buff_size;
257 | audio->buff = (short *) HTS_calloc(max_buff_size, sizeof(short));
258 | audio->buff_size = 0;
259 | }
260 |
261 | /* HTS_Audio_write: send data to audio */
262 | void HTS_Audio_write(HTS_Audio * audio, short data)
263 | {
264 | if (audio == NULL || audio->audio_interface == NULL)
265 | return;
266 |
267 | audio->buff[audio->buff_size++] = data;
268 |
269 | if (audio->buff_size >= audio->max_buff_size) {
270 | if (HTS_AudioInterface_write((HTS_AudioInterface *) audio->audio_interface, audio->buff, audio->buff_size) != TRUE) {
271 | HTS_Audio_clear(audio);
272 | return;
273 | }
274 | audio->buff_size = 0;
275 | }
276 | }
277 |
278 | /* HTS_Audio_flush: flush remain data */
279 | void HTS_Audio_flush(HTS_Audio * audio)
280 | {
281 | HTS_AudioInterface *audio_interface;
282 |
283 | if (audio == NULL || audio->audio_interface == NULL)
284 | return;
285 |
286 | audio_interface = (HTS_AudioInterface *) audio->audio_interface;
287 | if (audio->buff_size > 0) {
288 | if (HTS_AudioInterface_write(audio_interface, audio->buff, audio->buff_size) != TRUE) {
289 | HTS_Audio_clear(audio);
290 | return;
291 | }
292 | audio->buff_size = 0;
293 | }
294 | while (audio_interface->now_buff_1 == TRUE || audio_interface->now_buff_2 == TRUE)
295 | Sleep(AUDIO_WAIT_BUFF_MS);
296 | }
297 |
298 | /* HTS_Audio_clear: free audio */
299 | void HTS_Audio_clear(HTS_Audio * audio)
300 | {
301 | HTS_AudioInterface *audio_interface;
302 |
303 | if (audio == NULL || audio->audio_interface == NULL)
304 | return;
305 |
306 | audio_interface = (HTS_AudioInterface *) audio->audio_interface;
307 | HTS_AudioInterface_close(audio_interface);
308 | if (audio->buff != NULL)
309 | free(audio->buff);
310 | HTS_Audio_initialize(audio);
311 | }
312 |
313 | #endif /* AUDIO_PLAY_WIN32 */
314 |
315 | #ifdef AUDIO_PLAY_PORTAUDIO
316 |
317 | #include "portaudio.h"
318 |
319 | /* HTS_AudioInterface: audio output for PortAudio */
320 | typedef struct _HTS_AudioInterface {
321 | PaStreamParameters parameters; /* parameters for output stream */
322 | PaStream *stream; /* output stream */
323 | } HTS_AudioInterface;
324 |
325 | /* HTS_AudioInterface_write: send data to audio device */
326 | static void HTS_AudioInterface_write(HTS_AudioInterface * audio_interface, const short *buff, size_t buff_size)
327 | {
328 | PaError err;
329 |
330 | err = Pa_WriteStream(audio_interface->stream, buff, buff_size);
331 | if (err != paNoError && err != paOutputUnderflowed)
332 | HTS_error(0, "hts_engine: Cannot send datablocks to your output audio device to play waveform.\n");
333 | }
334 |
335 | /* HTS_AudioInterface_close: close audio device */
336 | static void HTS_AudioInterface_close(HTS_AudioInterface * audio_interface)
337 | {
338 | PaError err;
339 |
340 | err = Pa_StopStream(audio_interface->stream);
341 | if (err != paNoError)
342 | HTS_error(0, "hts_engine: Cannot stop your output audio device.\n");
343 | err = Pa_CloseStream(audio_interface->stream);
344 | if (err != paNoError)
345 | HTS_error(0, "hts_engine: Failed to close your output audio device.\n");
346 | Pa_Terminate();
347 |
348 | HTS_free(audio_interface);
349 | }
350 |
351 | static HTS_AudioInterface *HTS_AudioInterface_open(size_t sampling_frequency, size_t max_buff_size)
352 | {
353 | HTS_AudioInterface *audio_interface;
354 | PaError err;
355 |
356 | audio_interface = HTS_calloc(1, sizeof(HTS_AudioInterface));
357 | audio_interface->stream = NULL;
358 |
359 | err = Pa_Initialize();
360 | if (err != paNoError) {
361 | HTS_error(0, "hts_engine: Failed to initialize your output audio device to play waveform.\n");
362 | HTS_free(audio_interface);
363 | return NULL;
364 | }
365 |
366 | audio_interface->parameters.device = Pa_GetDefaultOutputDevice();
367 | audio_interface->parameters.channelCount = 1;
368 | audio_interface->parameters.sampleFormat = paInt16;
369 | audio_interface->parameters.suggestedLatency = Pa_GetDeviceInfo(audio_interface->parameters.device)->defaultLowOutputLatency;
370 | audio_interface->parameters.hostApiSpecificStreamInfo = NULL;
371 |
372 | err = Pa_OpenStream(&audio_interface->stream, NULL, &audio_interface->parameters, sampling_frequency, max_buff_size, paClipOff, NULL, NULL);
373 | if (err != paNoError) {
374 | HTS_error(0, "hts_engine: Failed to open your output audio device to play waveform.\n");
375 | Pa_Terminate();
376 | HTS_free(audio_interface);
377 | return NULL;
378 | }
379 |
380 | err = Pa_StartStream(audio_interface->stream);
381 | if (err != paNoError) {
382 | HTS_error(0, "hts_engine: Failed to start your output audio device to play waveform.\n");
383 | Pa_CloseStream(audio_interface->stream);
384 | Pa_Terminate();
385 | HTS_free(audio_interface);
386 | return NULL;
387 | }
388 |
389 | return audio_interface;
390 | }
391 |
392 | /* HTS_Audio_initialize: initialize audio */
393 | void HTS_Audio_initialize(HTS_Audio * audio)
394 | {
395 | if (audio == NULL)
396 | return;
397 |
398 | audio->sampling_frequency = 0;
399 | audio->max_buff_size = 0;
400 | audio->buff = NULL;
401 | audio->buff_size = 0;
402 | audio->audio_interface = NULL;
403 | }
404 |
405 | /* HTS_Audio_set_parameter: set parameters for audio */
406 | void HTS_Audio_set_parameter(HTS_Audio * audio, size_t sampling_frequency, size_t max_buff_size)
407 | {
408 | if (audio == NULL)
409 | return;
410 |
411 | if (audio->sampling_frequency == sampling_frequency && audio->max_buff_size == max_buff_size)
412 | return;
413 |
414 | HTS_Audio_clear(audio);
415 |
416 | if (sampling_frequency == 0 || max_buff_size == 0)
417 | return;
418 |
419 | audio->audio_interface = HTS_AudioInterface_open(sampling_frequency, max_buff_size);
420 | if (audio->audio_interface == NULL)
421 | return;
422 |
423 | audio->sampling_frequency = sampling_frequency;
424 | audio->max_buff_size = max_buff_size;
425 | audio->buff = (short *) HTS_calloc(max_buff_size, sizeof(short));
426 | audio->buff_size = 0;
427 | }
428 |
429 | /* HTS_Audio_write: send data to audio device */
430 | void HTS_Audio_write(HTS_Audio * audio, short data)
431 | {
432 | if (audio == NULL)
433 | return;
434 |
435 | audio->buff[audio->buff_size++] = data;
436 |
437 | if (audio->buff_size >= audio->max_buff_size) {
438 | if (audio->audio_interface != NULL)
439 | HTS_AudioInterface_write((HTS_AudioInterface *) audio->audio_interface, audio->buff, audio->max_buff_size);
440 | audio->buff_size = 0;
441 | }
442 | }
443 |
444 | /* HTS_Audio_flush: flush remain data */
445 | void HTS_Audio_flush(HTS_Audio * audio)
446 | {
447 | HTS_AudioInterface *audio_interface;
448 |
449 | if (audio == NULL || audio->audio_interface == NULL)
450 | return;
451 |
452 | audio_interface = (HTS_AudioInterface *) audio->audio_interface;
453 | if (audio->buff_size > 0) {
454 | HTS_AudioInterface_write(audio_interface, audio->buff, audio->buff_size);
455 | audio->buff_size = 0;
456 | }
457 | }
458 |
459 | /* HTS_Audio_clear: free audio */
460 | void HTS_Audio_clear(HTS_Audio * audio)
461 | {
462 | HTS_AudioInterface *audio_interface;
463 |
464 | if (audio == NULL || audio->audio_interface == NULL)
465 | return;
466 | audio_interface = (HTS_AudioInterface *) audio->audio_interface;
467 |
468 | HTS_Audio_flush(audio);
469 | HTS_AudioInterface_close(audio_interface);
470 | if (audio->buff != NULL)
471 | HTS_free(audio->buff);
472 | HTS_Audio_initialize(audio);
473 | }
474 |
475 | #endif /* AUDIO_PLAY_PORTAUDIO */
476 |
477 | #ifdef AUDIO_PLAY_NONE
478 |
479 | /* HTS_Audio_initialize: initialize audio */
480 | void HTS_Audio_initialize(HTS_Audio * audio)
481 | {
482 | }
483 |
484 | /* HTS_Audio_set_parameter: set parameters for audio */
485 | void HTS_Audio_set_parameter(HTS_Audio * audio, size_t sampling_frequeny, size_t max_buff_size)
486 | {
487 | }
488 |
489 | /* HTS_Audio_write: send data to audio */
490 | void HTS_Audio_write(HTS_Audio * audio, short data)
491 | {
492 | }
493 |
494 | /* HTS_Audio_flush: flush remain data */
495 | void HTS_Audio_flush(HTS_Audio * audio)
496 | {
497 | }
498 |
499 | /* HTS_Audio_clear: free audio */
500 | void HTS_Audio_clear(HTS_Audio * audio)
501 | {
502 | }
503 |
504 | #endif /* AUDIO_PLAY_NONE */
505 |
506 | HTS_AUDIO_C_END;
507 |
508 | #endif /* !HTS_AUDIO_C */
509 |
--------------------------------------------------------------------------------
/src/lib/HTS_gstream.c:
--------------------------------------------------------------------------------
1 | /* ----------------------------------------------------------------- */
2 | /* The HMM-Based Speech Synthesis Engine "hts_engine API" */
3 | /* developed by HTS Working Group */
4 | /* http://hts-engine.sourceforge.net/ */
5 | /* ----------------------------------------------------------------- */
6 | /* */
7 | /* Copyright (c) 2001-2015 Nagoya Institute of Technology */
8 | /* Department of Computer Science */
9 | /* */
10 | /* 2001-2008 Tokyo Institute of Technology */
11 | /* Interdisciplinary Graduate School of */
12 | /* Science and Engineering */
13 | /* */
14 | /* All rights reserved. */
15 | /* */
16 | /* Redistribution and use in source and binary forms, with or */
17 | /* without modification, are permitted provided that the following */
18 | /* conditions are met: */
19 | /* */
20 | /* - Redistributions of source code must retain the above copyright */
21 | /* notice, this list of conditions and the following disclaimer. */
22 | /* - Redistributions in binary form must reproduce the above */
23 | /* copyright notice, this list of conditions and the following */
24 | /* disclaimer in the documentation and/or other materials provided */
25 | /* with the distribution. */
26 | /* - Neither the name of the HTS working group nor the names of its */
27 | /* contributors may be used to endorse or promote products derived */
28 | /* from this software without specific prior written permission. */
29 | /* */
30 | /* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND */
31 | /* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, */
32 | /* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
33 | /* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
34 | /* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS */
35 | /* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, */
36 | /* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED */
37 | /* TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, */
38 | /* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON */
39 | /* ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, */
40 | /* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY */
41 | /* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
42 | /* POSSIBILITY OF SUCH DAMAGE. */
43 | /* ----------------------------------------------------------------- */
44 |
45 | #ifndef HTS_GSTREAM_C
46 | #define HTS_GSTREAM_C
47 |
48 | #ifdef __cplusplus
49 | #define HTS_GSTREAM_C_START extern "C" {
50 | #define HTS_GSTREAM_C_END }
51 | #else
52 | #define HTS_GSTREAM_C_START
53 | #define HTS_GSTREAM_C_END
54 | #endif /* __CPLUSPLUS */
55 |
56 | HTS_GSTREAM_C_START;
57 |
58 | /* hts_engine libraries */
59 | #include "HTS_hidden.h"
60 |
61 | /* HTS_GStreamSet_initialize: initialize generated parameter stream set */
62 | void HTS_GStreamSet_initialize(HTS_GStreamSet * gss)
63 | {
64 | gss->nstream = 0;
65 | gss->total_frame = 0;
66 | gss->total_nsample = 0;
67 | gss->gstream = NULL;
68 | gss->gspeech = NULL;
69 | }
70 |
71 | /* HTS_GStreamSet_create: generate speech */
72 | HTS_Boolean HTS_GStreamSet_create(HTS_GStreamSet * gss, HTS_PStreamSet * pss, size_t stage, HTS_Boolean use_log_gain, size_t sampling_rate, size_t fperiod, double alpha, double beta, HTS_Boolean * stop, double volume, HTS_Audio * audio)
73 | {
74 | size_t i, j, k;
75 | size_t msd_frame;
76 | HTS_Vocoder v;
77 | size_t nlpf = 0;
78 | double *lpf = NULL;
79 |
80 | /* check */
81 | if (gss->gstream || gss->gspeech) {
82 | HTS_error(1, "HTS_GStreamSet_create: HTS_GStreamSet is not initialized.\n");
83 | return FALSE;
84 | }
85 |
86 | /* initialize */
87 | gss->nstream = HTS_PStreamSet_get_nstream(pss);
88 | gss->total_frame = HTS_PStreamSet_get_total_frame(pss);
89 | gss->total_nsample = fperiod * gss->total_frame;
90 | gss->gstream = (HTS_GStream *) HTS_calloc(gss->nstream, sizeof(HTS_GStream));
91 | for (i = 0; i < gss->nstream; i++) {
92 | gss->gstream[i].vector_length = HTS_PStreamSet_get_vector_length(pss, i);
93 | gss->gstream[i].par = (double **) HTS_calloc(gss->total_frame, sizeof(double *));
94 | for (j = 0; j < gss->total_frame; j++)
95 | gss->gstream[i].par[j] = (double *) HTS_calloc(gss->gstream[i].vector_length, sizeof(double));
96 | }
97 | gss->gspeech = (double *) HTS_calloc(gss->total_nsample, sizeof(double));
98 |
99 | /* copy generated parameter */
100 | for (i = 0; i < gss->nstream; i++) {
101 | if (HTS_PStreamSet_is_msd(pss, i)) { /* for MSD */
102 | for (j = 0, msd_frame = 0; j < gss->total_frame; j++)
103 | if (HTS_PStreamSet_get_msd_flag(pss, i, j) == TRUE) {
104 | for (k = 0; k < gss->gstream[i].vector_length; k++)
105 | gss->gstream[i].par[j][k] = HTS_PStreamSet_get_parameter(pss, i, msd_frame, k);
106 | msd_frame++;
107 | } else
108 | for (k = 0; k < gss->gstream[i].vector_length; k++)
109 | gss->gstream[i].par[j][k] = HTS_NODATA;
110 | } else { /* for non MSD */
111 | for (j = 0; j < gss->total_frame; j++)
112 | for (k = 0; k < gss->gstream[i].vector_length; k++)
113 | gss->gstream[i].par[j][k] = HTS_PStreamSet_get_parameter(pss, i, j, k);
114 | }
115 | }
116 |
117 | /* check */
118 | if (gss->nstream != 2 && gss->nstream != 3) {
119 | HTS_error(1, "HTS_GStreamSet_create: The number of streams should be 2 or 3.\n");
120 | HTS_GStreamSet_clear(gss);
121 | return FALSE;
122 | }
123 | if (HTS_PStreamSet_get_vector_length(pss, 1) != 1) {
124 | HTS_error(1, "HTS_GStreamSet_create: The size of lf0 static vector should be 1.\n");
125 | HTS_GStreamSet_clear(gss);
126 | return FALSE;
127 | }
128 | if (gss->nstream >= 3 && gss->gstream[2].vector_length % 2 == 0) {
129 | HTS_error(1, "HTS_GStreamSet_create: The number of low-pass filter coefficient should be odd numbers.");
130 | HTS_GStreamSet_clear(gss);
131 | return FALSE;
132 | }
133 |
134 | /* synthesize speech waveform */
135 | HTS_Vocoder_initialize(&v, gss->gstream[0].vector_length - 1, stage, use_log_gain, sampling_rate, fperiod);
136 | if (gss->nstream >= 3)
137 | nlpf = gss->gstream[2].vector_length;
138 | for (i = 0; i < gss->total_frame && (*stop) == FALSE; i++) {
139 | j = i * fperiod;
140 | if (gss->nstream >= 3)
141 | lpf = &gss->gstream[2].par[i][0];
142 | HTS_Vocoder_synthesize(&v, gss->gstream[0].vector_length - 1, gss->gstream[1].par[i][0], &gss->gstream[0].par[i][0], nlpf, lpf, alpha, beta, volume, &gss->gspeech[j], audio);
143 | }
144 | HTS_Vocoder_clear(&v);
145 | if (audio)
146 | HTS_Audio_flush(audio);
147 |
148 | return TRUE;
149 | }
150 |
151 | /* HTS_GStreamSet_get_total_nsamples: get total number of sample */
152 | size_t HTS_GStreamSet_get_total_nsamples(HTS_GStreamSet * gss)
153 | {
154 | return gss->total_nsample;
155 | }
156 |
157 | /* HTS_GStreamSet_get_total_frame: get total number of frame */
158 | size_t HTS_GStreamSet_get_total_frame(HTS_GStreamSet * gss)
159 | {
160 | return gss->total_frame;
161 | }
162 |
163 | /* HTS_GStreamSet_get_vector_length: get features length */
164 | size_t HTS_GStreamSet_get_vector_length(HTS_GStreamSet * gss, size_t stream_index)
165 | {
166 | return gss->gstream[stream_index].vector_length;
167 | }
168 |
169 | /* HTS_GStreamSet_get_speech: get synthesized speech parameter */
170 | double HTS_GStreamSet_get_speech(HTS_GStreamSet * gss, size_t sample_index)
171 | {
172 | return gss->gspeech[sample_index];
173 | }
174 |
175 | /* HTS_GStreamSet_get_parameter: get generated parameter */
176 | double HTS_GStreamSet_get_parameter(HTS_GStreamSet * gss, size_t stream_index, size_t frame_index, size_t vector_index)
177 | {
178 | return gss->gstream[stream_index].par[frame_index][vector_index];
179 | }
180 |
181 | /* HTS_GStreamSet_clear: free generated parameter stream set */
182 | void HTS_GStreamSet_clear(HTS_GStreamSet * gss)
183 | {
184 | size_t i, j;
185 |
186 | if (gss->gstream) {
187 | for (i = 0; i < gss->nstream; i++) {
188 | if (gss->gstream[i].par != NULL) {
189 | for (j = 0; j < gss->total_frame; j++)
190 | HTS_free(gss->gstream[i].par[j]);
191 | HTS_free(gss->gstream[i].par);
192 | }
193 | }
194 | HTS_free(gss->gstream);
195 | }
196 | if (gss->gspeech)
197 | HTS_free(gss->gspeech);
198 | HTS_GStreamSet_initialize(gss);
199 | }
200 |
201 | HTS_GSTREAM_C_END;
202 |
203 | #endif /* !HTS_GSTREAM_C */
204 |
--------------------------------------------------------------------------------
/src/lib/HTS_hidden.h:
--------------------------------------------------------------------------------
1 | /* ----------------------------------------------------------------- */
2 | /* The HMM-Based Speech Synthesis Engine "hts_engine API" */
3 | /* developed by HTS Working Group */
4 | /* http://hts-engine.sourceforge.net/ */
5 | /* ----------------------------------------------------------------- */
6 | /* */
7 | /* Copyright (c) 2001-2015 Nagoya Institute of Technology */
8 | /* Department of Computer Science */
9 | /* */
10 | /* 2001-2008 Tokyo Institute of Technology */
11 | /* Interdisciplinary Graduate School of */
12 | /* Science and Engineering */
13 | /* */
14 | /* All rights reserved. */
15 | /* */
16 | /* Redistribution and use in source and binary forms, with or */
17 | /* without modification, are permitted provided that the following */
18 | /* conditions are met: */
19 | /* */
20 | /* - Redistributions of source code must retain the above copyright */
21 | /* notice, this list of conditions and the following disclaimer. */
22 | /* - Redistributions in binary form must reproduce the above */
23 | /* copyright notice, this list of conditions and the following */
24 | /* disclaimer in the documentation and/or other materials provided */
25 | /* with the distribution. */
26 | /* - Neither the name of the HTS working group nor the names of its */
27 | /* contributors may be used to endorse or promote products derived */
28 | /* from this software without specific prior written permission. */
29 | /* */
30 | /* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND */
31 | /* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, */
32 | /* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
33 | /* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
34 | /* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS */
35 | /* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, */
36 | /* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED */
37 | /* TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, */
38 | /* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON */
39 | /* ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, */
40 | /* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY */
41 | /* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
42 | /* POSSIBILITY OF SUCH DAMAGE. */
43 | /* ----------------------------------------------------------------- */
44 |
45 | #ifndef HTS_HIDDEN_H
46 | #define HTS_HIDDEN_H
47 |
48 | #ifdef __cplusplus
49 | #define HTS_HIDDEN_H_START extern "C" {
50 | #define HTS_HIDDEN_H_END }
51 | #else
52 | #define HTS_HIDDEN_H_START
53 | #define HTS_HIDDEN_H_END
54 | #endif /* __CPLUSPLUS */
55 |
56 | HTS_HIDDEN_H_START;
57 |
58 | /* hts_engine libraries */
59 | #include "HTS_engine.h"
60 |
61 | /* common ---------------------------------------------------------- */
62 |
63 | #define HTS_MAXBUFLEN 1024
64 |
65 | #if !defined(WORDS_BIGENDIAN) && !defined(WORDS_LITTLEENDIAN)
66 | #define WORDS_LITTLEENDIAN
67 | #endif /* !WORDS_BIGENDIAN && !WORDS_LITTLEENDIAN */
68 | #if defined(WORDS_BIGENDIAN) && defined(WORDS_LITTLEENDIAN)
69 | #undef WORDS_BIGENDIAN
70 | #endif /* WORDS_BIGENDIAN && WORDS_LITTLEENDIAN */
71 |
72 | #define MAX_F0 20000.0
73 | #define MIN_F0 20.0
74 | #define MAX_LF0 9.9034875525361280454891979401956 /* log(20000.0) */
75 | #define MIN_LF0 2.9957322735539909934352235761425 /* log(20.0) */
76 | #define HALF_TONE 0.05776226504666210911810267678818 /* log(2.0) / 12.0 */
77 | #define DB 0.11512925464970228420089957273422 /* log(10.0) / 20.0 */
78 |
79 | /* misc ------------------------------------------------------------ */
80 |
81 | typedef struct _HTS_File {
82 | unsigned char type;
83 | void *pointer;
84 | } HTS_File;
85 |
86 | /* HTS_fopen: wrapper for fopen */
87 | HTS_File *HTS_fopen_from_fn(const char *name, const char *opt);
88 |
89 | /* HTS_fopen_from_fp: wrapper for fopen */
90 | HTS_File *HTS_fopen_from_fp(HTS_File * fp, size_t size);
91 |
92 | /* HTS_fopen_from_data: wrapper for fopen */
93 | HTS_File *HTS_fopen_from_data(void *data, size_t size);
94 |
95 | /* HTS_fclose: wrapper for fclose */
96 | void HTS_fclose(HTS_File * fp);
97 |
98 | /* HTS_fgetc: wrapper for fgetc */
99 | int HTS_fgetc(HTS_File * fp);
100 |
101 | /* HTS_feof: wrapper for feof */
102 | int HTS_feof(HTS_File * fp);
103 |
104 | /* HTS_fseek: wrapper for fseek */
105 | int HTS_fseek(HTS_File * fp, long offset, int origin);
106 |
107 | /* HTS_ftell: wrapper for ftell */
108 | size_t HTS_ftell(HTS_File * fp);
109 |
110 | /* HTS_fread_big_endian: fread with byteswap */
111 | size_t HTS_fread_big_endian(void *buf, size_t size, size_t n, HTS_File * fp);
112 |
113 | /* HTS_fread_little_endian: fread with byteswap */
114 | size_t HTS_fread_little_endian(void *buf, size_t size, size_t n, HTS_File * fp);
115 |
116 | /* HTS_fwrite_little_endian: fwrite with byteswap */
117 | size_t HTS_fwrite_little_endian(const void *buf, size_t size, size_t n, FILE * fp);
118 |
119 | /* HTS_get_pattern_token: get pattern token (single/double quote can be used) */
120 | HTS_Boolean HTS_get_pattern_token(HTS_File * fp, char *buff);
121 |
122 | /* HTS_get_token: get token from file pointer (separators are space,tab,line break) */
123 | HTS_Boolean HTS_get_token_from_fp(HTS_File * fp, char *buff);
124 |
125 | /* HTS_get_token: get token from file pointer with specified separator */
126 | HTS_Boolean HTS_get_token_from_fp_with_separator(HTS_File * fp, char *buff, char separator);
127 |
128 | /* HTS_get_token_from_string: get token from string (separator are space,tab,line break) */
129 | HTS_Boolean HTS_get_token_from_string(const char *string, size_t * index, char *buff);
130 |
131 | /* HTS_get_token_from_string_with_separator: get token from string with specified separator */
132 | HTS_Boolean HTS_get_token_from_string_with_separator(const char *str, size_t * index, char *buff, char separator);
133 |
134 | /* HTS_calloc: wrapper for calloc */
135 | void *HTS_calloc(const size_t num, const size_t size);
136 |
137 | /* HTS_strdup: wrapper for strdup */
138 | char *HTS_strdup(const char *string);
139 |
140 | /* HTS_calloc_matrix: allocate double matrix */
141 | double **HTS_alloc_matrix(size_t x, size_t y);
142 |
143 | /* HTS_free_matrix: free double matrix */
144 | void HTS_free_matrix(double **p, size_t x);
145 |
146 | /* HTS_Free: wrapper for free */
147 | void HTS_free(void *p);
148 |
149 | /* HTS_error: output error message */
150 | void HTS_error(int error, const char *message, ...);
151 |
152 | /* audio ----------------------------------------------------------- */
153 |
154 | /* HTS_Audio_initialize: initialize audio */
155 | void HTS_Audio_initialize(HTS_Audio * audio);
156 |
157 | /* HTS_Audio_set_parameter: set parameters for audio */
158 | void HTS_Audio_set_parameter(HTS_Audio * audio, size_t sampling_frequency, size_t max_buff_size);
159 |
160 | /* HTS_Audio_write: send data to audio */
161 | void HTS_Audio_write(HTS_Audio * audio, short data);
162 |
163 | /* HTS_Audio_flush: flush remain data */
164 | void HTS_Audio_flush(HTS_Audio * audio);
165 |
166 | /* HTS_Audio_clear: free audio */
167 | void HTS_Audio_clear(HTS_Audio * audio);
168 |
169 | /* model ----------------------------------------------------------- */
170 |
171 | /* HTS_ModelSet_initialize: initialize model set */
172 | void HTS_ModelSet_initialize(HTS_ModelSet * ms);
173 |
174 | /* HTS_ModelSet_load: load HTS voices */
175 | HTS_Boolean HTS_ModelSet_load(HTS_ModelSet * ms, char **voices, size_t num_voices);
176 |
177 | /* HTS_ModelSet_get_sampling_frequency: get sampling frequency of HTS voices */
178 | size_t HTS_ModelSet_get_sampling_frequency(HTS_ModelSet * ms);
179 |
180 | /* HTS_ModelSet_get_fperiod: get frame period of HTS voices */
181 | size_t HTS_ModelSet_get_fperiod(HTS_ModelSet * ms);
182 |
183 | /* HTS_ModelSet_get_fperiod: get stream option */
184 | const char *HTS_ModelSet_get_option(HTS_ModelSet * ms, size_t stream_index);
185 |
186 | /* HTS_ModelSet_get_gv_flag: get GV flag */
187 | HTS_Boolean HTS_ModelSet_get_gv_flag(HTS_ModelSet * ms, const char *string);
188 |
189 | /* HTS_ModelSet_get_nstate: get number of state */
190 | size_t HTS_ModelSet_get_nstate(HTS_ModelSet * ms);
191 |
192 | /* HTS_Engine_get_fullcontext_label_format: get full-context label format */
193 | const char *HTS_ModelSet_get_fullcontext_label_format(HTS_ModelSet * ms);
194 |
195 | /* HTS_Engine_get_fullcontext_label_version: get full-context label version */
196 | const char *HTS_ModelSet_get_fullcontext_label_version(HTS_ModelSet * ms);
197 |
198 | /* HTS_ModelSet_get_nstream: get number of stream */
199 | size_t HTS_ModelSet_get_nstream(HTS_ModelSet * ms);
200 |
201 | /* HTS_ModelSet_get_nvoices: get number of HTS voices */
202 | size_t HTS_ModelSet_get_nvoices(HTS_ModelSet * ms);
203 |
204 | /* HTS_ModelSet_get_vector_length: get vector length */
205 | size_t HTS_ModelSet_get_vector_length(HTS_ModelSet * ms, size_t stream_index);
206 |
207 | /* HTS_ModelSet_is_msd: get MSD flag */
208 | HTS_Boolean HTS_ModelSet_is_msd(HTS_ModelSet * ms, size_t stream_index);
209 |
210 | /* HTS_ModelSet_get_window_size: get dynamic window size */
211 | size_t HTS_ModelSet_get_window_size(HTS_ModelSet * ms, size_t stream_index);
212 |
213 | /* HTS_ModelSet_get_window_left_width: get left width of dynamic window */
214 | int HTS_ModelSet_get_window_left_width(HTS_ModelSet * ms, size_t stream_index, size_t window_index);
215 |
216 | /* HTS_ModelSet_get_window_right_width: get right width of dynamic window */
217 | int HTS_ModelSet_get_window_right_width(HTS_ModelSet * ms, size_t stream_index, size_t window_index);
218 |
219 | /* HTS_ModelSet_get_window_coefficient: get coefficient of dynamic window */
220 | double HTS_ModelSet_get_window_coefficient(HTS_ModelSet * ms, size_t stream_index, size_t window_index, size_t coefficient_index);
221 |
222 | /* HTS_ModelSet_get_window_max_width: get max width of dynamic window */
223 | size_t HTS_ModelSet_get_window_max_width(HTS_ModelSet * ms, size_t stream_index);
224 |
225 | /* HTS_ModelSet_use_gv: get GV flag */
226 | HTS_Boolean HTS_ModelSet_use_gv(HTS_ModelSet * ms, size_t stream_index);
227 |
228 | /* HTS_ModelSet_get_duration_index: get index of duration tree and PDF */
229 | void HTS_ModelSet_get_duration_index(HTS_ModelSet * ms, size_t voice_index, const char *string, size_t * tree_index, size_t * pdf_index);
230 |
231 | /* HTS_ModelSet_get_duration: get duration using interpolation weight */
232 | void HTS_ModelSet_get_duration(HTS_ModelSet * ms, const char *string, const double *iw, double *mean, double *vari);
233 |
234 | /* HTS_ModelSet_get_parameter_index: get index of parameter tree and PDF */
235 | void HTS_ModelSet_get_parameter_index(HTS_ModelSet * ms, size_t voice_index, size_t stream_index, size_t state_index, const char *string, size_t * tree_index, size_t * pdf_index);
236 |
237 | /* HTS_ModelSet_get_parameter: get parameter using interpolation weight */
238 | void HTS_ModelSet_get_parameter(HTS_ModelSet * ms, size_t stream_index, size_t state_index, const char *string, const double *const *iw, double *mean, double *vari, double *msd);
239 |
240 | void HTS_ModelSet_get_gv_index(HTS_ModelSet * ms, size_t voice_index, size_t stream_index, const char *string, size_t * tree_index, size_t * pdf_index);
241 |
242 | /* HTS_ModelSet_get_gv: get GV using interpolation weight */
243 | void HTS_ModelSet_get_gv(HTS_ModelSet * ms, size_t stream_index, const char *string, const double *const *iw, double *mean, double *vari);
244 |
245 | /* HTS_ModelSet_clear: free model set */
246 | void HTS_ModelSet_clear(HTS_ModelSet * ms);
247 |
248 | /* label ----------------------------------------------------------- */
249 |
250 | /* HTS_Label_initialize: initialize label */
251 | void HTS_Label_initialize(HTS_Label * label);
252 |
253 | /* HTS_Label_load_from_fn: load label from file name */
254 | void HTS_Label_load_from_fn(HTS_Label * label, size_t sampling_rate, size_t fperiod, const char *fn);
255 |
256 | /* HTS_Label_load_from_strings: load label list from string list */
257 | void HTS_Label_load_from_strings(HTS_Label * label, size_t sampling_rate, size_t fperiod, char **lines, size_t num_lines);
258 |
259 | /* HTS_Label_get_size: get number of label string */
260 | size_t HTS_Label_get_size(HTS_Label * label);
261 |
262 | /* HTS_Label_get_string: get label string */
263 | const char *HTS_Label_get_string(HTS_Label * label, size_t index);
264 |
265 | /* HTS_Label_get_start_frame: get start frame */
266 | double HTS_Label_get_start_frame(HTS_Label * label, size_t index);
267 |
268 | /* HTS_Label_get_end_frame: get end frame */
269 | double HTS_Label_get_end_frame(HTS_Label * label, size_t index);
270 |
271 | /* HTS_Label_clear: free label */
272 | void HTS_Label_clear(HTS_Label * label);
273 |
274 | /* sstream --------------------------------------------------------- */
275 |
276 | /* HTS_SStreamSet_initialize: initialize state stream set */
277 | void HTS_SStreamSet_initialize(HTS_SStreamSet * sss);
278 |
279 | /* HTS_SStreamSet_create: parse label and determine state duration */
280 | HTS_Boolean HTS_SStreamSet_create(HTS_SStreamSet * sss, HTS_ModelSet * ms, HTS_Label * label, HTS_Boolean phoneme_alignment_flag, double speed, double *duration_iw, double **parameter_iw, double **gv_iw);
281 |
282 | /* HTS_SStreamSet_get_nstream: get number of stream */
283 | size_t HTS_SStreamSet_get_nstream(HTS_SStreamSet * sss);
284 |
285 | /* HTS_SStreamSet_get_vector_length: get vector length */
286 | size_t HTS_SStreamSet_get_vector_length(HTS_SStreamSet * sss, size_t stream_index);
287 |
288 | /* HTS_SStreamSet_is_msd: get MSD flag */
289 | HTS_Boolean HTS_SStreamSet_is_msd(HTS_SStreamSet * sss, size_t stream_index);
290 |
291 | /* HTS_SStreamSet_get_total_state: get total number of state */
292 | size_t HTS_SStreamSet_get_total_state(HTS_SStreamSet * sss);
293 |
294 | /* HTS_SStreamSet_get_total_frame: get total number of frame */
295 | size_t HTS_SStreamSet_get_total_frame(HTS_SStreamSet * sss);
296 |
297 | /* HTS_SStreamSet_get_msd: get msd parameter */
298 | double HTS_SStreamSet_get_msd(HTS_SStreamSet * sss, size_t stream_index, size_t state_index);
299 |
300 | /* HTS_SStreamSet_window_size: get dynamic window size */
301 | size_t HTS_SStreamSet_get_window_size(HTS_SStreamSet * sss, size_t stream_index);
302 |
303 | /* HTS_SStreamSet_get_window_left_width: get left width of dynamic window */
304 | int HTS_SStreamSet_get_window_left_width(HTS_SStreamSet * sss, size_t stream_index, size_t window_index);
305 |
306 | /* HTS_SStreamSet_get_window_right_width: get right width of dynamic window */
307 | int HTS_SStreamSet_get_window_right_width(HTS_SStreamSet * sss, size_t stream_index, size_t window_index);
308 |
309 | /* HTS_SStreamSet_get_window_coefficient: get coefficient of dynamic window */
310 | double HTS_SStreamSet_get_window_coefficient(HTS_SStreamSet * sss, size_t stream_index, size_t window_index, int coefficient_index);
311 |
312 | /* HTS_SStreamSet_get_window_max_width: get max width of dynamic window */
313 | size_t HTS_SStreamSet_get_window_max_width(HTS_SStreamSet * sss, size_t stream_index);
314 |
315 | /* HTS_SStreamSet_use_gv: get GV flag */
316 | HTS_Boolean HTS_SStreamSet_use_gv(HTS_SStreamSet * sss, size_t stream_index);
317 |
318 | /* HTS_SStreamSet_get_duration: get state duration */
319 | size_t HTS_SStreamSet_get_duration(HTS_SStreamSet * sss, size_t state_index);
320 |
321 | /* HTS_SStreamSet_get_mean: get mean parameter */
322 | double HTS_SStreamSet_get_mean(HTS_SStreamSet * sss, size_t stream_index, size_t state_index, size_t vector_index);
323 |
324 | /* HTS_SStreamSet_set_mean: set mean parameter */
325 | void HTS_SStreamSet_set_mean(HTS_SStreamSet * sss, size_t stream_index, size_t state_index, size_t vector_index, double f);
326 |
327 | /* HTS_SStreamSet_get_vari: get variance parameter */
328 | double HTS_SStreamSet_get_vari(HTS_SStreamSet * sss, size_t stream_index, size_t state_index, size_t vector_index);
329 |
330 | /* HTS_SStreamSet_set_vari: set variance parameter */
331 | void HTS_SStreamSet_set_vari(HTS_SStreamSet * sss, size_t stream_index, size_t state_index, size_t vector_index, double f);
332 |
333 | /* HTS_SStreamSet_get_gv_mean: get GV mean parameter */
334 | double HTS_SStreamSet_get_gv_mean(HTS_SStreamSet * sss, size_t stream_index, size_t vector_index);
335 |
336 | /* HTS_SStreamSet_get_gv_mean: get GV variance parameter */
337 | double HTS_SStreamSet_get_gv_vari(HTS_SStreamSet * sss, size_t stream_index, size_t vector_index);
338 |
339 | /* HTS_SStreamSet_set_gv_switch: set GV switch */
340 | void HTS_SStreamSet_set_gv_switch(HTS_SStreamSet * sss, size_t stream_index, size_t state_index, HTS_Boolean i);
341 |
342 | /* HTS_SStreamSet_get_gv_switch: get GV switch */
343 | HTS_Boolean HTS_SStreamSet_get_gv_switch(HTS_SStreamSet * sss, size_t stream_index, size_t state_index);
344 |
345 | /* HTS_SStreamSet_clear: free state stream set */
346 | void HTS_SStreamSet_clear(HTS_SStreamSet * sss);
347 |
348 | /* pstream --------------------------------------------------------- */
349 |
350 | /* check variance in finv() */
351 | #define INFTY ((double) 1.0e+38)
352 | #define INFTY2 ((double) 1.0e+19)
353 | #define INVINF ((double) 1.0e-38)
354 | #define INVINF2 ((double) 1.0e-19)
355 |
356 | /* GV */
357 | #define STEPINIT 0.1
358 | #define STEPDEC 0.5
359 | #define STEPINC 1.2
360 | #define W1 1.0
361 | #define W2 1.0
362 | #define GV_MAX_ITERATION 5
363 |
364 | /* HTS_PStreamSet_initialize: initialize parameter stream set */
365 | void HTS_PStreamSet_initialize(HTS_PStreamSet * pss);
366 |
367 | /* HTS_PStreamSet_create: parameter generation using GV weight */
368 | HTS_Boolean HTS_PStreamSet_create(HTS_PStreamSet * pss, HTS_SStreamSet * sss, double *msd_threshold, double *gv_weight);
369 |
370 | /* HTS_PStreamSet_get_nstream: get number of stream */
371 | size_t HTS_PStreamSet_get_nstream(HTS_PStreamSet * pss);
372 |
373 | /* HTS_PStreamSet_get_static_length: get features length */
374 | size_t HTS_PStreamSet_get_vector_length(HTS_PStreamSet * pss, size_t stream_index);
375 |
376 | /* HTS_PStreamSet_get_total_frame: get total number of frame */
377 | size_t HTS_PStreamSet_get_total_frame(HTS_PStreamSet * pss);
378 |
379 | /* HTS_PStreamSet_get_parameter: get parameter */
380 | double HTS_PStreamSet_get_parameter(HTS_PStreamSet * pss, size_t stream_index, size_t frame_index, size_t vector_index);
381 |
382 | /* HTS_PStreamSet_get_parameter_vector: get parameter vector */
383 | double *HTS_PStreamSet_get_parameter_vector(HTS_PStreamSet * pss, size_t stream_index, size_t frame_index);
384 |
385 | /* HTS_PStreamSet_get_msd_flag: get generated MSD flag per frame */
386 | HTS_Boolean HTS_PStreamSet_get_msd_flag(HTS_PStreamSet * pss, size_t stream_index, size_t frame_index);
387 |
388 | /* HTS_PStreamSet_is_msd: get MSD flag */
389 | HTS_Boolean HTS_PStreamSet_is_msd(HTS_PStreamSet * pss, size_t stream_index);
390 |
391 | /* HTS_PStreamSet_clear: free parameter stream set */
392 | void HTS_PStreamSet_clear(HTS_PStreamSet * pss);
393 |
394 | /* gstream --------------------------------------------------------- */
395 |
396 | /* HTS_GStreamSet_initialize: initialize generated parameter stream set */
397 | void HTS_GStreamSet_initialize(HTS_GStreamSet * gss);
398 |
399 | /* HTS_GStreamSet_create: generate speech */
400 | HTS_Boolean HTS_GStreamSet_create(HTS_GStreamSet * gss, HTS_PStreamSet * pss, size_t stage, HTS_Boolean use_log_gain, size_t sampling_rate, size_t fperiod, double alpha, double beta, HTS_Boolean * stop, double volume, HTS_Audio * audio);
401 |
402 | /* HTS_GStreamSet_get_total_nsamples: get total number of sample */
403 | size_t HTS_GStreamSet_get_total_nsamples(HTS_GStreamSet * gss);
404 |
405 | /* HTS_GStreamSet_get_total_frame: get total number of frame */
406 | size_t HTS_GStreamSet_get_total_frame(HTS_GStreamSet * gss);
407 |
408 | /* HTS_GStreamSet_get_static_length: get features length */
409 | size_t HTS_GStreamSet_get_vector_length(HTS_GStreamSet * gss, size_t stream_index);
410 |
411 | /* HTS_GStreamSet_get_speech: get synthesized speech parameter */
412 | double HTS_GStreamSet_get_speech(HTS_GStreamSet * gss, size_t sample_index);
413 |
414 | /* HTS_GStreamSet_get_parameter: get generated parameter */
415 | double HTS_GStreamSet_get_parameter(HTS_GStreamSet * gss, size_t stream_index, size_t frame_index, size_t vector_index);
416 |
417 | /* HTS_GStreamSet_clear: free generated parameter stream set */
418 | void HTS_GStreamSet_clear(HTS_GStreamSet * gss);
419 |
420 | /* vocoder --------------------------------------------------------- */
421 |
422 | #ifndef LZERO
423 | #define LZERO (-1.0e+10) /* ~log(0) */
424 | #endif /* !LZERO */
425 |
426 | #ifndef ZERO
427 | #define ZERO (1.0e-10) /* ~(0) */
428 | #endif /* !ZERO */
429 |
430 | #ifndef PI
431 | #define PI 3.14159265358979323846
432 | #endif /* !PI */
433 |
434 | #ifndef PI2
435 | #define PI2 6.28318530717958647692
436 | #endif /* !PI2 */
437 |
438 | #define RANDMAX 32767
439 |
440 | #define SEED 1
441 | #define B0 0x00000001
442 | #define B28 0x10000000
443 | #define B31 0x80000000
444 | #define B31_ 0x7fffffff
445 | #define Z 0x00000000
446 |
447 | #ifdef HTS_EMBEDDED
448 | #define GAUSS FALSE
449 | #define PADEORDER 4 /* pade order (for MLSA filter) */
450 | #define IRLENG 384 /* length of impulse response */
451 | #else
452 | #define GAUSS TRUE
453 | #define PADEORDER 5
454 | #define IRLENG 576
455 | #endif /* HTS_EMBEDDED */
456 |
457 | #define CHECK_LSP_STABILITY_MIN 0.25
458 | #define CHECK_LSP_STABILITY_NUM 4
459 |
460 | /* for MGLSA filter */
461 | #define NORMFLG1 TRUE
462 | #define NORMFLG2 FALSE
463 | #define MULGFLG1 TRUE
464 | #define MULGFLG2 FALSE
465 | #define NGAIN FALSE
466 |
467 | /* HTS_Vocoder: structure for setting of vocoder */
468 | typedef struct _HTS_Vocoder {
469 | HTS_Boolean is_first;
470 | size_t stage; /* Gamma=-1/stage: if stage=0 then Gamma=0 */
471 | double gamma; /* Gamma */
472 | HTS_Boolean use_log_gain; /* log gain flag (for LSP) */
473 | size_t fprd; /* frame shift */
474 | unsigned long next; /* temporary variable for random generator */
475 | HTS_Boolean gauss; /* flag to use Gaussian noise */
476 | double rate; /* sampling rate */
477 | double pitch_of_curr_point; /* used in excitation generation */
478 | double pitch_counter; /* used in excitation generation */
479 | double pitch_inc_per_point; /* used in excitation generation */
480 | double *excite_ring_buff; /* used in excitation generation */
481 | size_t excite_buff_size; /* used in excitation generation */
482 | size_t excite_buff_index; /* used in excitation generation */
483 | unsigned char sw; /* switch used in random generator */
484 | int x; /* excitation signal */
485 | double *freqt_buff; /* used in freqt */
486 | size_t freqt_size; /* buffer size for freqt */
487 | double *spectrum2en_buff; /* used in spectrum2en */
488 | size_t spectrum2en_size; /* buffer size for spectrum2en */
489 | double r1, r2, s; /* used in random generator */
490 | double *postfilter_buff; /* used in postfiltering */
491 | size_t postfilter_size; /* buffer size for postfiltering */
492 | double *c, *cc, *cinc, *d1; /* used in the MLSA/MGLSA filter */
493 | double *lsp2lpc_buff; /* used in lsp2lpc */
494 | size_t lsp2lpc_size; /* buffer size of lsp2lpc */
495 | double *gc2gc_buff; /* used in gc2gc */
496 | size_t gc2gc_size; /* buffer size for gc2gc */
497 | } HTS_Vocoder;
498 |
499 | /* HTS_Vocoder_initialize: initialize vocoder */
500 | void HTS_Vocoder_initialize(HTS_Vocoder * v, size_t m, size_t stage, HTS_Boolean use_log_gain, size_t rate, size_t fperiod);
501 |
502 | /* HTS_Vocoder_synthesize: pulse/noise excitation and MLSA/MGLSA filster based waveform synthesis */
503 | void HTS_Vocoder_synthesize(HTS_Vocoder * v, size_t m, double lf0, double *spectrum, size_t nlpf, double *lpf, double alpha, double beta, double volume, double *rawdata, HTS_Audio * audio);
504 |
505 | /* HTS_Vocoder_clear: clear vocoder */
506 | void HTS_Vocoder_clear(HTS_Vocoder * v);
507 |
508 | HTS_HIDDEN_H_END;
509 |
510 | #endif /* !HTS_HIDDEN_H */
511 |
--------------------------------------------------------------------------------
/src/lib/HTS_label.c:
--------------------------------------------------------------------------------
1 | /* ----------------------------------------------------------------- */
2 | /* The HMM-Based Speech Synthesis Engine "hts_engine API" */
3 | /* developed by HTS Working Group */
4 | /* http://hts-engine.sourceforge.net/ */
5 | /* ----------------------------------------------------------------- */
6 | /* */
7 | /* Copyright (c) 2001-2015 Nagoya Institute of Technology */
8 | /* Department of Computer Science */
9 | /* */
10 | /* 2001-2008 Tokyo Institute of Technology */
11 | /* Interdisciplinary Graduate School of */
12 | /* Science and Engineering */
13 | /* */
14 | /* All rights reserved. */
15 | /* */
16 | /* Redistribution and use in source and binary forms, with or */
17 | /* without modification, are permitted provided that the following */
18 | /* conditions are met: */
19 | /* */
20 | /* - Redistributions of source code must retain the above copyright */
21 | /* notice, this list of conditions and the following disclaimer. */
22 | /* - Redistributions in binary form must reproduce the above */
23 | /* copyright notice, this list of conditions and the following */
24 | /* disclaimer in the documentation and/or other materials provided */
25 | /* with the distribution. */
26 | /* - Neither the name of the HTS working group nor the names of its */
27 | /* contributors may be used to endorse or promote products derived */
28 | /* from this software without specific prior written permission. */
29 | /* */
30 | /* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND */
31 | /* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, */
32 | /* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
33 | /* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
34 | /* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS */
35 | /* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, */
36 | /* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED */
37 | /* TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, */
38 | /* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON */
39 | /* ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, */
40 | /* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY */
41 | /* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
42 | /* POSSIBILITY OF SUCH DAMAGE. */
43 | /* ----------------------------------------------------------------- */
44 |
45 | #ifndef HTS_LABEL_C
46 | #define HTS_LABEL_C
47 |
48 | #ifdef __cplusplus
49 | #define HTS_LABEL_C_START extern "C" {
50 | #define HTS_LABEL_C_END }
51 | #else
52 | #define HTS_LABEL_C_START
53 | #define HTS_LABEL_C_END
54 | #endif /* __CPLUSPLUS */
55 |
56 | HTS_LABEL_C_START;
57 |
58 | #include /* for atof() */
59 | #include /* for isgraph(),isdigit() */
60 |
61 | /* hts_engine libraries */
62 | #include "HTS_hidden.h"
63 |
64 | static HTS_Boolean isdigit_string(char *str)
65 | {
66 | int i;
67 |
68 | if (sscanf(str, "%d", &i) == 1)
69 | return TRUE;
70 | else
71 | return FALSE;
72 | }
73 |
74 | /* HTS_Label_initialize: initialize label */
75 | void HTS_Label_initialize(HTS_Label * label)
76 | {
77 | label->head = NULL;
78 | label->size = 0;
79 | }
80 |
81 | /* HTS_Label_check_time: check label */
82 | static void HTS_Label_check_time(HTS_Label * label)
83 | {
84 | HTS_LabelString *lstring = label->head;
85 | HTS_LabelString *next = NULL;
86 |
87 | if (lstring)
88 | lstring->start = 0.0;
89 | while (lstring) {
90 | next = lstring->next;
91 | if (!next)
92 | break;
93 | if (lstring->end < 0.0 && next->start >= 0.0)
94 | lstring->end = next->start;
95 | else if (lstring->end >= 0.0 && next->start < 0.0)
96 | next->start = lstring->end;
97 | if (lstring->start < 0.0)
98 | lstring->start = -1.0;
99 | if (lstring->end < 0.0)
100 | lstring->end = -1.0;
101 | lstring = next;
102 | }
103 | }
104 |
105 | /* HTS_Label_load: load label */
106 | static void HTS_Label_load(HTS_Label * label, size_t sampling_rate, size_t fperiod, HTS_File * fp)
107 | {
108 | char buff[HTS_MAXBUFLEN];
109 | HTS_LabelString *lstring = NULL;
110 | double start, end;
111 | const double rate = (double) sampling_rate / ((double) fperiod * 1e+7);
112 |
113 | if (label->head || label->size != 0) {
114 | HTS_error(1, "HTS_Label_load_from_fp: label is not initialized.\n");
115 | return;
116 | }
117 |
118 | /* parse label file */
119 | while (HTS_get_token_from_fp(fp, buff)) {
120 | if (!isgraph((int) buff[0]))
121 | break;
122 | label->size++;
123 |
124 | if (lstring) {
125 | lstring->next = (HTS_LabelString *) HTS_calloc(1, sizeof(HTS_LabelString));
126 | lstring = lstring->next;
127 | } else { /* first time */
128 | lstring = (HTS_LabelString *) HTS_calloc(1, sizeof(HTS_LabelString));
129 | label->head = lstring;
130 | }
131 | if (isdigit_string(buff)) { /* has frame infomation */
132 | start = atof(buff);
133 | HTS_get_token_from_fp(fp, buff);
134 | end = atof(buff);
135 | HTS_get_token_from_fp(fp, buff);
136 | lstring->start = rate * start;
137 | lstring->end = rate * end;
138 | } else {
139 | lstring->start = -1.0;
140 | lstring->end = -1.0;
141 | }
142 | lstring->next = NULL;
143 | lstring->name = HTS_strdup(buff);
144 | }
145 | HTS_Label_check_time(label);
146 | }
147 |
148 | /* HTS_Label_load_from_fn: load label from file name */
149 | void HTS_Label_load_from_fn(HTS_Label * label, size_t sampling_rate, size_t fperiod, const char *fn)
150 | {
151 | HTS_File *fp = HTS_fopen_from_fn(fn, "r");
152 | HTS_Label_load(label, sampling_rate, fperiod, fp);
153 | HTS_fclose(fp);
154 | }
155 |
156 | /* HTS_Label_load_from_strings: load label from strings */
157 | void HTS_Label_load_from_strings(HTS_Label * label, size_t sampling_rate, size_t fperiod, char **lines, size_t num_lines)
158 | {
159 | char buff[HTS_MAXBUFLEN];
160 | HTS_LabelString *lstring = NULL;
161 | size_t i;
162 | size_t data_index;
163 | double start, end;
164 | const double rate = (double) sampling_rate / ((double) fperiod * 1e+7);
165 |
166 | if (label->head || label->size != 0) {
167 | HTS_error(1, "HTS_Label_load_from_fp: label list is not initialized.\n");
168 | return;
169 | }
170 | /* copy label */
171 | for (i = 0; i < num_lines; i++) {
172 | if (!isgraph((int) lines[i][0]))
173 | break;
174 | label->size++;
175 |
176 | if (lstring) {
177 | lstring->next = (HTS_LabelString *) HTS_calloc(1, sizeof(HTS_LabelString));
178 | lstring = lstring->next;
179 | } else { /* first time */
180 | lstring = (HTS_LabelString *) HTS_calloc(1, sizeof(HTS_LabelString));
181 | label->head = lstring;
182 | }
183 | data_index = 0;
184 | if (isdigit_string(lines[i])) { /* has frame infomation */
185 | HTS_get_token_from_string(lines[i], &data_index, buff);
186 | start = atof(buff);
187 | HTS_get_token_from_string(lines[i], &data_index, buff);
188 | end = atof(buff);
189 | HTS_get_token_from_string(lines[i], &data_index, buff);
190 | lstring->name = HTS_strdup(buff);
191 | lstring->start = rate * start;
192 | lstring->end = rate * end;
193 | } else {
194 | lstring->start = -1.0;
195 | lstring->end = -1.0;
196 | lstring->name = HTS_strdup(lines[i]);
197 | }
198 | lstring->next = NULL;
199 | }
200 | HTS_Label_check_time(label);
201 | }
202 |
203 | /* HTS_Label_get_size: get number of label string */
204 | size_t HTS_Label_get_size(HTS_Label * label)
205 | {
206 | return label->size;
207 | }
208 |
209 | /* HTS_Label_get_string: get label string */
210 | const char *HTS_Label_get_string(HTS_Label * label, size_t index)
211 | {
212 | size_t i;
213 | HTS_LabelString *lstring = label->head;
214 |
215 | for (i = 0; i < index && lstring; i++)
216 | lstring = lstring->next;
217 | if (!lstring)
218 | return NULL;
219 | return lstring->name;
220 | }
221 |
222 | /* HTS_Label_get_start_frame: get start frame */
223 | double HTS_Label_get_start_frame(HTS_Label * label, size_t index)
224 | {
225 | size_t i;
226 | HTS_LabelString *lstring = label->head;
227 |
228 | for (i = 0; i < index && lstring; i++)
229 | lstring = lstring->next;
230 | if (!lstring)
231 | return -1.0;
232 | return lstring->start;
233 | }
234 |
235 | /* HTS_Label_get_end_frame: get end frame */
236 | double HTS_Label_get_end_frame(HTS_Label * label, size_t index)
237 | {
238 | size_t i;
239 | HTS_LabelString *lstring = label->head;
240 |
241 | for (i = 0; i < index && lstring; i++)
242 | lstring = lstring->next;
243 | if (!lstring)
244 | return -1.0;
245 | return lstring->end;
246 | }
247 |
248 | /* HTS_Label_clear: free label */
249 | void HTS_Label_clear(HTS_Label * label)
250 | {
251 | HTS_LabelString *lstring, *next_lstring;
252 |
253 | for (lstring = label->head; lstring; lstring = next_lstring) {
254 | next_lstring = lstring->next;
255 | HTS_free(lstring->name);
256 | HTS_free(lstring);
257 | }
258 | HTS_Label_initialize(label);
259 | }
260 |
261 | HTS_LABEL_C_END;
262 |
263 | #endif /* !HTS_LABEL_C */
264 |
--------------------------------------------------------------------------------
/src/lib/HTS_misc.c:
--------------------------------------------------------------------------------
1 | /* ----------------------------------------------------------------- */
2 | /* The HMM-Based Speech Synthesis Engine "hts_engine API" */
3 | /* developed by HTS Working Group */
4 | /* http://hts-engine.sourceforge.net/ */
5 | /* ----------------------------------------------------------------- */
6 | /* */
7 | /* Copyright (c) 2001-2015 Nagoya Institute of Technology */
8 | /* Department of Computer Science */
9 | /* */
10 | /* 2001-2008 Tokyo Institute of Technology */
11 | /* Interdisciplinary Graduate School of */
12 | /* Science and Engineering */
13 | /* */
14 | /* All rights reserved. */
15 | /* */
16 | /* Redistribution and use in source and binary forms, with or */
17 | /* without modification, are permitted provided that the following */
18 | /* conditions are met: */
19 | /* */
20 | /* - Redistributions of source code must retain the above copyright */
21 | /* notice, this list of conditions and the following disclaimer. */
22 | /* - Redistributions in binary form must reproduce the above */
23 | /* copyright notice, this list of conditions and the following */
24 | /* disclaimer in the documentation and/or other materials provided */
25 | /* with the distribution. */
26 | /* - Neither the name of the HTS working group nor the names of its */
27 | /* contributors may be used to endorse or promote products derived */
28 | /* from this software without specific prior written permission. */
29 | /* */
30 | /* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND */
31 | /* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, */
32 | /* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
33 | /* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
34 | /* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS */
35 | /* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, */
36 | /* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED */
37 | /* TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, */
38 | /* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON */
39 | /* ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, */
40 | /* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY */
41 | /* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
42 | /* POSSIBILITY OF SUCH DAMAGE. */
43 | /* ----------------------------------------------------------------- */
44 |
45 | #ifndef HTS_MISC_C
46 | #define HTS_MISC_C
47 |
48 | #ifdef __cplusplus
49 | #define HTS_MISC_C_START extern "C" {
50 | #define HTS_MISC_C_END }
51 | #else
52 | #define HTS_MISC_C_START
53 | #define HTS_MISC_C_END
54 | #endif /* __CPLUSPLUS */
55 |
56 | HTS_MISC_C_START;
57 |
58 | #include /* for exit(),calloc(),free() */
59 | #include /* for va_list */
60 | #include /* for strcpy(),strlen() */
61 |
62 | /* hts_engine libraries */
63 | #include "HTS_hidden.h"
64 |
65 | #ifdef FESTIVAL
66 | #include "EST_walloc.h"
67 | #endif /* FESTIVAL */
68 |
69 | #define HTS_FILE 0
70 | #define HTS_DATA 1
71 |
72 | typedef struct _HTS_Data {
73 | unsigned char *data;
74 | size_t size;
75 | size_t index;
76 | } HTS_Data;
77 |
78 | /* HTS_fopen_from_fn: wrapper for fopen */
79 | HTS_File *HTS_fopen_from_fn(const char *name, const char *opt)
80 | {
81 | HTS_File *fp = (HTS_File *) HTS_calloc(1, sizeof(HTS_File));
82 |
83 | fp->type = HTS_FILE;
84 | fp->pointer = (void *) fopen(name, opt);
85 |
86 | if (fp->pointer == NULL) {
87 | HTS_error(0, "HTS_fopen: Cannot open %s.\n", name);
88 | HTS_free(fp);
89 | return NULL;
90 | }
91 |
92 | return fp;
93 | }
94 |
95 | /* HTS_fopen_from_fp: wrapper for fopen */
96 | HTS_File *HTS_fopen_from_fp(HTS_File * fp, size_t size)
97 | {
98 | if (fp == NULL || size == 0)
99 | return NULL;
100 | else if (fp->type == HTS_FILE) {
101 | HTS_Data *d;
102 | HTS_File *f;
103 | d = (HTS_Data *) HTS_calloc(1, sizeof(HTS_Data));
104 | d->data = (unsigned char *) HTS_calloc(size, sizeof(unsigned char));
105 | d->size = size;
106 | d->index = 0;
107 | if (fread(d->data, sizeof(unsigned char), size, (FILE *) fp->pointer) != size) {
108 | free(d->data);
109 | free(d);
110 | return NULL;
111 | }
112 | f = (HTS_File *) HTS_calloc(1, sizeof(HTS_File));
113 | f->type = HTS_DATA;
114 | f->pointer = (void *) d;
115 | return f;
116 | } else if (fp->type == HTS_DATA) {
117 | HTS_File *f;
118 | HTS_Data *tmp1, *tmp2;
119 | tmp1 = (HTS_Data *) fp->pointer;
120 | if (tmp1->index + size > tmp1->size)
121 | return NULL;
122 | tmp2 = (HTS_Data *) HTS_calloc(1, sizeof(HTS_Data));
123 | tmp2->data = (unsigned char *) HTS_calloc(size, sizeof(unsigned char));
124 | tmp2->size = size;
125 | tmp2->index = 0;
126 | memcpy(tmp2->data, &tmp1->data[tmp1->index], size);
127 | tmp1->index += size;
128 | f = (HTS_File *) HTS_calloc(1, sizeof(HTS_File));
129 | f->type = HTS_DATA;
130 | f->pointer = (void *) tmp2;
131 | return f;
132 | }
133 |
134 | HTS_error(0, "HTS_fopen_from_fp: Unknown file type.\n");
135 | return NULL;
136 | }
137 |
138 | /* HTS_fopen_from_data: wrapper for fopen */
139 | HTS_File *HTS_fopen_from_data(void *data, size_t size)
140 | {
141 | HTS_Data *d;
142 | HTS_File *f;
143 |
144 | if (data == NULL || size == 0)
145 | return NULL;
146 |
147 | d = (HTS_Data *) HTS_calloc(1, sizeof(HTS_Data));
148 | d->data = (unsigned char *) HTS_calloc(size, sizeof(unsigned char));
149 | d->size = size;
150 | d->index = 0;
151 |
152 | memcpy(d->data, data, size);
153 |
154 | f = (HTS_File *) HTS_calloc(1, sizeof(HTS_File));
155 | f->type = HTS_DATA;
156 | f->pointer = (void *) d;
157 |
158 | return f;
159 | }
160 |
161 | /* HTS_fclose: wrapper for fclose */
162 | void HTS_fclose(HTS_File * fp)
163 | {
164 | if (fp == NULL) {
165 | return;
166 | } else if (fp->type == HTS_FILE) {
167 | if (fp->pointer != NULL)
168 | fclose((FILE *) fp->pointer);
169 | HTS_free(fp);
170 | return;
171 | } else if (fp->type == HTS_DATA) {
172 | if (fp->pointer != NULL) {
173 | HTS_Data *d = (HTS_Data *) fp->pointer;
174 | if (d->data != NULL)
175 | HTS_free(d->data);
176 | HTS_free(d);
177 | }
178 | HTS_free(fp);
179 | return;
180 | }
181 | HTS_error(0, "HTS_fclose: Unknown file type.\n");
182 | }
183 |
184 | /* HTS_fgetc: wrapper for fgetc */
185 | int HTS_fgetc(HTS_File * fp)
186 | {
187 | if (fp == NULL) {
188 | return EOF;
189 | } else if (fp->type == HTS_FILE) {
190 | return fgetc((FILE *) fp->pointer);
191 | } else if (fp->type == HTS_DATA) {
192 | HTS_Data *d = (HTS_Data *) fp->pointer;
193 | if (d->size <= d->index)
194 | return EOF;
195 | return (int) d->data[d->index++];
196 | }
197 | HTS_error(0, "HTS_fgetc: Unknown file type.\n");
198 | return EOF;
199 | }
200 |
201 | /* HTS_feof: wrapper for feof */
202 | int HTS_feof(HTS_File * fp)
203 | {
204 | if (fp == NULL) {
205 | return 1;
206 | } else if (fp->type == HTS_FILE) {
207 | return feof((FILE *) fp->pointer);
208 | } else if (fp->type == HTS_DATA) {
209 | HTS_Data *d = (HTS_Data *) fp->pointer;
210 | return d->size <= d->index ? 1 : 0;
211 | }
212 | HTS_error(0, "HTS_feof: Unknown file type.\n");
213 | return 1;
214 | }
215 |
216 | /* HTS_fseek: wrapper for fseek */
217 | int HTS_fseek(HTS_File * fp, long offset, int origin)
218 | {
219 | if (fp == NULL) {
220 | return 1;
221 | } else if (fp->type == HTS_FILE) {
222 | return fseek((FILE *) fp->pointer, offset, origin);
223 | } else if (fp->type == HTS_DATA) {
224 | HTS_Data *d = (HTS_Data *) fp->pointer;
225 | if (origin == SEEK_SET) {
226 | d->index = (size_t) offset;
227 | } else if (origin == SEEK_CUR) {
228 | d->index += offset;
229 | } else if (origin == SEEK_END) {
230 | d->index = d->size + offset;
231 | } else {
232 | return 1;
233 | }
234 | return 0;
235 | }
236 | HTS_error(0, "HTS_fseek: Unknown file type.\n");
237 | return 1;
238 | }
239 |
240 | /* HTS_ftell: rapper for ftell */
241 | size_t HTS_ftell(HTS_File * fp)
242 | {
243 | if (fp == NULL) {
244 | return 0;
245 | } else if (fp->type == HTS_FILE) {
246 | fpos_t pos;
247 | fgetpos((FILE *) fp->pointer, &pos);
248 | #if defined(_WIN32) || defined(__CYGWIN__) || defined(__APPLE__) || defined(__ANDROID__)
249 | return (size_t) pos;
250 | #else
251 | return (size_t) pos.__pos;
252 | #endif /* _WIN32 || __CYGWIN__ || __APPLE__ || __ANDROID__ */
253 | } else if (fp->type == HTS_DATA) {
254 | HTS_Data *d = (HTS_Data *) fp->pointer;
255 | return d->index;
256 | }
257 | HTS_error(0, "HTS_ftell: Unknown file type.\n");
258 | return 0;
259 | }
260 |
261 | /* HTS_fread: wrapper for fread */
262 | static size_t HTS_fread(void *buf, size_t size, size_t n, HTS_File * fp)
263 | {
264 | if (fp == NULL || size == 0 || n == 0) {
265 | return 0;
266 | }
267 | if (fp->type == HTS_FILE) {
268 | return fread(buf, size, n, (FILE *) fp->pointer);
269 | } else if (fp->type == HTS_DATA) {
270 | HTS_Data *d = (HTS_Data *) fp->pointer;
271 | size_t i, length = size * n;
272 | unsigned char *c = (unsigned char *) buf;
273 | for (i = 0; i < length; i++) {
274 | if (d->index < d->size)
275 | c[i] = d->data[d->index++];
276 | else
277 | break;
278 | }
279 | if (i == 0)
280 | return 0;
281 | else
282 | return i / size;
283 | }
284 | HTS_error(0, "HTS_fread: Unknown file type.\n");
285 | return 0;
286 | }
287 |
288 | /* HTS_byte_swap: byte swap */
289 | static void HTS_byte_swap(void *p, size_t size, size_t block)
290 | {
291 | char *q, tmp;
292 | size_t i, j;
293 |
294 | q = (char *) p;
295 |
296 | for (i = 0; i < block; i++) {
297 | for (j = 0; j < (size / 2); j++) {
298 | tmp = *(q + j);
299 | *(q + j) = *(q + (size - 1 - j));
300 | *(q + (size - 1 - j)) = tmp;
301 | }
302 | q += size;
303 | }
304 | }
305 |
306 | /* HTS_fread_big_endian: fread with byteswap */
307 | size_t HTS_fread_big_endian(void *buf, size_t size, size_t n, HTS_File * fp)
308 | {
309 | size_t block = HTS_fread(buf, size, n, fp);
310 |
311 | #ifdef WORDS_LITTLEENDIAN
312 | HTS_byte_swap(buf, size, block);
313 | #endif /* WORDS_LITTLEENDIAN */
314 |
315 | return block;
316 | }
317 |
318 | /* HTS_fread_little_endian: fread with byteswap */
319 | size_t HTS_fread_little_endian(void *buf, size_t size, size_t n, HTS_File * fp)
320 | {
321 | size_t block = HTS_fread(buf, size, n, fp);
322 |
323 | #ifdef WORDS_BIGENDIAN
324 | HTS_byte_swap(buf, size, block);
325 | #endif /* WORDS_BIGENDIAN */
326 |
327 | return block;
328 | }
329 |
330 | /* HTS_fwrite_little_endian: fwrite with byteswap */
331 | size_t HTS_fwrite_little_endian(const void *buf, size_t size, size_t n, FILE * fp)
332 | {
333 | #ifdef WORDS_BIGENDIAN
334 | HTS_byte_swap(buf, size, n * size);
335 | #endif /* WORDS_BIGENDIAN */
336 | return fwrite(buf, size, n, fp);
337 | }
338 |
339 | /* HTS_get_pattern_token: get pattern token (single/double quote can be used) */
340 | HTS_Boolean HTS_get_pattern_token(HTS_File * fp, char *buff)
341 | {
342 | char c;
343 | size_t i;
344 | HTS_Boolean squote = FALSE, dquote = FALSE;
345 |
346 | if (fp == NULL || HTS_feof(fp))
347 | return FALSE;
348 | c = HTS_fgetc(fp);
349 |
350 | while (c == ' ' || c == '\n') {
351 | if (HTS_feof(fp))
352 | return FALSE;
353 | c = HTS_fgetc(fp);
354 | }
355 |
356 | if (c == '\'') { /* single quote case */
357 | if (HTS_feof(fp))
358 | return FALSE;
359 | c = HTS_fgetc(fp);
360 | squote = TRUE;
361 | }
362 |
363 | if (c == '\"') { /*double quote case */
364 | if (HTS_feof(fp))
365 | return FALSE;
366 | c = HTS_fgetc(fp);
367 | dquote = TRUE;
368 | }
369 |
370 | if (c == ',') { /*special character ',' */
371 | strcpy(buff, ",");
372 | return TRUE;
373 | }
374 |
375 | i = 0;
376 | while (1) {
377 | buff[i++] = c;
378 | c = HTS_fgetc(fp);
379 | if (squote && c == '\'')
380 | break;
381 | if (dquote && c == '\"')
382 | break;
383 | if (!squote && !dquote) {
384 | if (c == ' ')
385 | break;
386 | if (c == '\n')
387 | break;
388 | if (HTS_feof(fp))
389 | break;
390 | }
391 | }
392 |
393 | buff[i] = '\0';
394 | return TRUE;
395 | }
396 |
397 | /* HTS_get_token: get token from file pointer (separators are space, tab, and line break) */
398 | HTS_Boolean HTS_get_token_from_fp(HTS_File * fp, char *buff)
399 | {
400 | char c;
401 | size_t i;
402 |
403 | if (fp == NULL || HTS_feof(fp))
404 | return FALSE;
405 | c = HTS_fgetc(fp);
406 | while (c == ' ' || c == '\n' || c == '\t') {
407 | if (HTS_feof(fp))
408 | return FALSE;
409 | c = HTS_fgetc(fp);
410 | if (c == EOF)
411 | return FALSE;
412 | }
413 |
414 | for (i = 0; c != ' ' && c != '\n' && c != '\t';) {
415 | buff[i++] = c;
416 | if (HTS_feof(fp))
417 | break;
418 | c = HTS_fgetc(fp);
419 | if (c == EOF)
420 | break;
421 | }
422 |
423 | buff[i] = '\0';
424 | return TRUE;
425 | }
426 |
427 | /* HTS_get_token_with_separator: get token from file pointer with specified separator */
428 | HTS_Boolean HTS_get_token_from_fp_with_separator(HTS_File * fp, char *buff, char separator)
429 | {
430 | char c;
431 | size_t i;
432 |
433 | if (fp == NULL || HTS_feof(fp))
434 | return FALSE;
435 | c = HTS_fgetc(fp);
436 | while (c == separator) {
437 | if (HTS_feof(fp))
438 | return FALSE;
439 | c = HTS_fgetc(fp);
440 | if (c == EOF)
441 | return FALSE;
442 | }
443 |
444 | for (i = 0; c != separator;) {
445 | buff[i++] = c;
446 | if (HTS_feof(fp))
447 | break;
448 | c = HTS_fgetc(fp);
449 | if (c == EOF)
450 | break;
451 | }
452 |
453 | buff[i] = '\0';
454 | return TRUE;
455 | }
456 |
457 | /* HTS_get_token_from_string: get token from string (separators are space, tab, and line break) */
458 | HTS_Boolean HTS_get_token_from_string(const char *string, size_t * index, char *buff)
459 | {
460 | char c;
461 | size_t i;
462 |
463 | c = string[(*index)];
464 | if (c == '\0')
465 | return FALSE;
466 | c = string[(*index)++];
467 | if (c == '\0')
468 | return FALSE;
469 | while (c == ' ' || c == '\n' || c == '\t') {
470 | if (c == '\0')
471 | return FALSE;
472 | c = string[(*index)++];
473 | }
474 | for (i = 0; c != ' ' && c != '\n' && c != '\t' && c != '\0'; i++) {
475 | buff[i] = c;
476 | c = string[(*index)++];
477 | }
478 |
479 | buff[i] = '\0';
480 | return TRUE;
481 | }
482 |
483 | /* HTS_get_token_from_string_with_separator: get token from string with specified separator */
484 | HTS_Boolean HTS_get_token_from_string_with_separator(const char *str, size_t * index, char *buff, char separator)
485 | {
486 | char c;
487 | size_t len = 0;
488 |
489 | if (str == NULL)
490 | return FALSE;
491 |
492 | c = str[(*index)];
493 | if (c == '\0')
494 | return FALSE;
495 | while (c == separator) {
496 | if (c == '\0')
497 | return FALSE;
498 | (*index)++;
499 | c = str[(*index)];
500 | }
501 | while (c != separator && c != '\0') {
502 | buff[len++] = c;
503 | (*index)++;
504 | c = str[(*index)];
505 | }
506 | if (c != '\0')
507 | (*index)++;
508 |
509 | buff[len] = '\0';
510 |
511 | if (len > 0)
512 | return TRUE;
513 | else
514 | return FALSE;
515 | }
516 |
517 | /* HTS_calloc: wrapper for calloc */
518 | void *HTS_calloc(const size_t num, const size_t size)
519 | {
520 | size_t n = num * size;
521 | void *mem;
522 |
523 | if (n == 0)
524 | return NULL;
525 |
526 | #ifdef FESTIVAL
527 | mem = (void *) safe_wcalloc(n);
528 | #else
529 | mem = (void *) malloc(n);
530 | #endif /* FESTIVAL */
531 |
532 | memset(mem, 0, n);
533 |
534 | if (mem == NULL)
535 | HTS_error(1, "HTS_calloc: Cannot allocate memory.\n");
536 |
537 | return mem;
538 | }
539 |
540 | /* HTS_Free: wrapper for free */
541 | void HTS_free(void *ptr)
542 | {
543 | #ifdef FESTIVAL
544 | wfree(ptr);
545 | #else
546 | free(ptr);
547 | #endif /* FESTIVAL */
548 | }
549 |
550 | /* HTS_strdup: wrapper for strdup */
551 | char *HTS_strdup(const char *string)
552 | {
553 | #ifdef FESTIVAL
554 | return (wstrdup(string));
555 | #else
556 | char *buff = (char *) HTS_calloc(strlen(string) + 1, sizeof(char));
557 | strcpy(buff, string);
558 | return buff;
559 | #endif /* FESTIVAL */
560 | }
561 |
562 | /* HTS_alloc_matrix: allocate double matrix */
563 | double **HTS_alloc_matrix(size_t x, size_t y)
564 | {
565 | size_t i;
566 | double **p;
567 |
568 | if (x == 0 || y == 0)
569 | return NULL;
570 |
571 | p = (double **) HTS_calloc(x, sizeof(double *));
572 |
573 | for (i = 0; i < x; i++)
574 | p[i] = (double *) HTS_calloc(y, sizeof(double));
575 | return p;
576 | }
577 |
578 | /* HTS_free_matrix: free double matrix */
579 | void HTS_free_matrix(double **p, size_t x)
580 | {
581 | size_t i;
582 |
583 | for (i = 0; i < x; i++)
584 | HTS_free(p[i]);
585 | HTS_free(p);
586 | }
587 |
588 | /* HTS_error: output error message */
589 | void HTS_error(int error, const char *message, ...)
590 | {
591 | va_list arg;
592 |
593 | fflush(stdout);
594 | fflush(stderr);
595 |
596 | if (error > 0)
597 | fprintf(stderr, "\nError: ");
598 | else
599 | fprintf(stderr, "\nWarning: ");
600 |
601 | va_start(arg, message);
602 | vfprintf(stderr, message, arg);
603 | va_end(arg);
604 |
605 | fflush(stderr);
606 |
607 | if (error > 0)
608 | exit(error);
609 | }
610 |
611 | HTS_MISC_C_END;
612 |
613 | #endif /* !HTS_MISC_C */
614 |
--------------------------------------------------------------------------------
/src/lib/HTS_pstream.c:
--------------------------------------------------------------------------------
1 | /* ----------------------------------------------------------------- */
2 | /* The HMM-Based Speech Synthesis Engine "hts_engine API" */
3 | /* developed by HTS Working Group */
4 | /* http://hts-engine.sourceforge.net/ */
5 | /* ----------------------------------------------------------------- */
6 | /* */
7 | /* Copyright (c) 2001-2015 Nagoya Institute of Technology */
8 | /* Department of Computer Science */
9 | /* */
10 | /* 2001-2008 Tokyo Institute of Technology */
11 | /* Interdisciplinary Graduate School of */
12 | /* Science and Engineering */
13 | /* */
14 | /* All rights reserved. */
15 | /* */
16 | /* Redistribution and use in source and binary forms, with or */
17 | /* without modification, are permitted provided that the following */
18 | /* conditions are met: */
19 | /* */
20 | /* - Redistributions of source code must retain the above copyright */
21 | /* notice, this list of conditions and the following disclaimer. */
22 | /* - Redistributions in binary form must reproduce the above */
23 | /* copyright notice, this list of conditions and the following */
24 | /* disclaimer in the documentation and/or other materials provided */
25 | /* with the distribution. */
26 | /* - Neither the name of the HTS working group nor the names of its */
27 | /* contributors may be used to endorse or promote products derived */
28 | /* from this software without specific prior written permission. */
29 | /* */
30 | /* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND */
31 | /* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, */
32 | /* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
33 | /* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
34 | /* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS */
35 | /* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, */
36 | /* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED */
37 | /* TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, */
38 | /* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON */
39 | /* ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, */
40 | /* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY */
41 | /* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
42 | /* POSSIBILITY OF SUCH DAMAGE. */
43 | /* ----------------------------------------------------------------- */
44 |
45 | #ifndef HTS_PSTREAM_C
46 | #define HTS_PSTREAM_C
47 |
48 | #ifdef __cplusplus
49 | #define HTS_PSTREAM_C_START extern "C" {
50 | #define HTS_PSTREAM_C_END }
51 | #else
52 | #define HTS_PSTREAM_C_START
53 | #define HTS_PSTREAM_C_END
54 | #endif /* __CPLUSPLUS */
55 |
56 | HTS_PSTREAM_C_START;
57 |
58 | #include /* for sqrt() */
59 |
60 | /* hts_engine libraries */
61 | #include "HTS_hidden.h"
62 |
63 | /* HTS_finv: calculate 1.0/variance function */
64 | static double HTS_finv(const double x)
65 | {
66 | if (x >= INFTY2)
67 | return 0.0;
68 | if (x <= -INFTY2)
69 | return 0.0;
70 | if (x <= INVINF2 && x >= 0)
71 | return INFTY;
72 | if (x >= -INVINF2 && x < 0)
73 | return -INFTY;
74 |
75 | return (1.0 / x);
76 | }
77 |
78 | /* HTS_PStream_calc_wuw_and_wum: calcurate W'U^{-1}W and W'U^{-1}M */
79 | static void HTS_PStream_calc_wuw_and_wum(HTS_PStream * pst, size_t m)
80 | {
81 | size_t t, i, j;
82 | int shift;
83 | double wu;
84 |
85 | for (t = 0; t < pst->length; t++) {
86 | /* initialize */
87 | pst->sm.wum[t] = 0.0;
88 | for (i = 0; i < pst->width; i++)
89 | pst->sm.wuw[t][i] = 0.0;
90 |
91 | /* calc WUW & WUM */
92 | for (i = 0; i < pst->win_size; i++)
93 | for (shift = pst->win_l_width[i]; shift <= pst->win_r_width[i]; shift++)
94 | if (((int) t + shift >= 0) && ((int) t + shift < pst->length) && (pst->win_coefficient[i][-shift] != 0.0)) {
95 | wu = pst->win_coefficient[i][-shift] * pst->sm.ivar[t + shift][i * pst->vector_length + m];
96 | pst->sm.wum[t] += wu * pst->sm.mean[t + shift][i * pst->vector_length + m];
97 | for (j = 0; (j < pst->width) && (t + j < pst->length); j++)
98 | if (((int) j <= pst->win_r_width[i] + shift) && (pst->win_coefficient[i][j - shift] != 0.0))
99 | pst->sm.wuw[t][j] += wu * pst->win_coefficient[i][j - shift];
100 | }
101 | }
102 | }
103 |
104 |
105 | /* HTS_PStream_ldl_factorization: Factorize W'*U^{-1}*W to L*D*L' (L: lower triangular, D: diagonal) */
106 | static void HTS_PStream_ldl_factorization(HTS_PStream * pst)
107 | {
108 | size_t t, i, j;
109 |
110 | for (t = 0; t < pst->length; t++) {
111 | for (i = 1; (i < pst->width) && (t >= i); i++)
112 | pst->sm.wuw[t][0] -= pst->sm.wuw[t - i][i] * pst->sm.wuw[t - i][i] * pst->sm.wuw[t - i][0];
113 |
114 | for (i = 1; i < pst->width; i++) {
115 | for (j = 1; (i + j < pst->width) && (t >= j); j++)
116 | pst->sm.wuw[t][i] -= pst->sm.wuw[t - j][j] * pst->sm.wuw[t - j][i + j] * pst->sm.wuw[t - j][0];
117 | pst->sm.wuw[t][i] /= pst->sm.wuw[t][0];
118 | }
119 | }
120 | }
121 |
122 | /* HTS_PStream_forward_substitution: forward subtitution for mlpg */
123 | static void HTS_PStream_forward_substitution(HTS_PStream * pst)
124 | {
125 | size_t t, i;
126 |
127 | for (t = 0; t < pst->length; t++) {
128 | pst->sm.g[t] = pst->sm.wum[t];
129 | for (i = 1; (i < pst->width) && (t >= i); i++)
130 | pst->sm.g[t] -= pst->sm.wuw[t - i][i] * pst->sm.g[t - i];
131 | }
132 | }
133 |
134 | /* HTS_PStream_backward_substitution: backward subtitution for mlpg */
135 | static void HTS_PStream_backward_substitution(HTS_PStream * pst, size_t m)
136 | {
137 | size_t rev, t, i;
138 |
139 | for (rev = 0; rev < pst->length; rev++) {
140 | t = pst->length - 1 - rev;
141 | pst->par[t][m] = pst->sm.g[t] / pst->sm.wuw[t][0];
142 | for (i = 1; (i < pst->width) && (t + i < pst->length); i++)
143 | pst->par[t][m] -= pst->sm.wuw[t][i] * pst->par[t + i][m];
144 | }
145 | }
146 |
147 | /* HTS_PStream_calc_gv: subfunction for mlpg using GV */
148 | static void HTS_PStream_calc_gv(HTS_PStream * pst, size_t m, double *mean, double *vari)
149 | {
150 | size_t t;
151 |
152 | *mean = 0.0;
153 | for (t = 0; t < pst->length; t++)
154 | if (pst->gv_switch[t])
155 | *mean += pst->par[t][m];
156 | *mean /= pst->gv_length;
157 | *vari = 0.0;
158 | for (t = 0; t < pst->length; t++)
159 | if (pst->gv_switch[t])
160 | *vari += (pst->par[t][m] - *mean) * (pst->par[t][m] - *mean);
161 | *vari /= pst->gv_length;
162 | }
163 |
164 | /* HTS_PStream_conv_gv: subfunction for mlpg using GV */
165 | static void HTS_PStream_conv_gv(HTS_PStream * pst, size_t m)
166 | {
167 | size_t t;
168 | double ratio;
169 | double mean;
170 | double vari;
171 |
172 | HTS_PStream_calc_gv(pst, m, &mean, &vari);
173 | ratio = sqrt(pst->gv_mean[m] / vari);
174 | for (t = 0; t < pst->length; t++)
175 | if (pst->gv_switch[t])
176 | pst->par[t][m] = ratio * (pst->par[t][m] - mean) + mean;
177 | }
178 |
179 | /* HTS_PStream_calc_derivative: subfunction for mlpg using GV */
180 | static double HTS_PStream_calc_derivative(HTS_PStream * pst, size_t m)
181 | {
182 | size_t t, i;
183 | double mean;
184 | double vari;
185 | double dv;
186 | double h;
187 | double gvobj;
188 | double hmmobj;
189 | double w = 1.0 / (pst->win_size * pst->length);
190 |
191 | HTS_PStream_calc_gv(pst, m, &mean, &vari);
192 | gvobj = -0.5 * W2 * vari * pst->gv_vari[m] * (vari - 2.0 * pst->gv_mean[m]);
193 | dv = -2.0 * pst->gv_vari[m] * (vari - pst->gv_mean[m]) / pst->length;
194 |
195 | for (t = 0; t < pst->length; t++) {
196 | pst->sm.g[t] = pst->sm.wuw[t][0] * pst->par[t][m];
197 | for (i = 1; i < pst->width; i++) {
198 | if (t + i < pst->length)
199 | pst->sm.g[t] += pst->sm.wuw[t][i] * pst->par[t + i][m];
200 | if (t + 1 > i)
201 | pst->sm.g[t] += pst->sm.wuw[t - i][i] * pst->par[t - i][m];
202 | }
203 | }
204 |
205 | for (t = 0, hmmobj = 0.0; t < pst->length; t++) {
206 | hmmobj += W1 * w * pst->par[t][m] * (pst->sm.wum[t] - 0.5 * pst->sm.g[t]);
207 | h = -W1 * w * pst->sm.wuw[t][1 - 1] - W2 * 2.0 / (pst->length * pst->length) * ((pst->length - 1) * pst->gv_vari[m] * (vari - pst->gv_mean[m]) + 2.0 * pst->gv_vari[m] * (pst->par[t][m] - mean) * (pst->par[t][m] - mean));
208 | if (pst->gv_switch[t])
209 | pst->sm.g[t] = 1.0 / h * (W1 * w * (-pst->sm.g[t] + pst->sm.wum[t]) + W2 * dv * (pst->par[t][m] - mean));
210 | else
211 | pst->sm.g[t] = 1.0 / h * (W1 * w * (-pst->sm.g[t] + pst->sm.wum[t]));
212 | }
213 |
214 | return (-(hmmobj + gvobj));
215 | }
216 |
217 | /* HTS_PStream_gv_parmgen: function for mlpg using GV */
218 | static void HTS_PStream_gv_parmgen(HTS_PStream * pst, size_t m)
219 | {
220 | size_t t, i;
221 | double step = STEPINIT;
222 | double prev = 0.0;
223 | double obj;
224 |
225 | if (pst->gv_length == 0)
226 | return;
227 |
228 | HTS_PStream_conv_gv(pst, m);
229 | if (GV_MAX_ITERATION > 0) {
230 | HTS_PStream_calc_wuw_and_wum(pst, m);
231 | for (i = 1; i <= GV_MAX_ITERATION; i++) {
232 | obj = HTS_PStream_calc_derivative(pst, m);
233 | if (i > 1) {
234 | if (obj > prev)
235 | step *= STEPDEC;
236 | if (obj < prev)
237 | step *= STEPINC;
238 | }
239 | for (t = 0; t < pst->length; t++) {
240 | if (pst->gv_switch[t])
241 | pst->par[t][m] += step * pst->sm.g[t];
242 | }
243 | prev = obj;
244 | }
245 | }
246 | }
247 |
248 | /* HTS_PStream_mlpg: generate sequence of speech parameter vector maximizing its output probability for given pdf sequence */
249 | static void HTS_PStream_mlpg(HTS_PStream * pst)
250 | {
251 | size_t m;
252 |
253 | if (pst->length == 0)
254 | return;
255 |
256 | for (m = 0; m < pst->vector_length; m++) {
257 | HTS_PStream_calc_wuw_and_wum(pst, m);
258 | HTS_PStream_ldl_factorization(pst); /* LDL factorization */
259 | HTS_PStream_forward_substitution(pst); /* forward substitution */
260 | HTS_PStream_backward_substitution(pst, m); /* backward substitution */
261 | if (pst->gv_length > 0)
262 | HTS_PStream_gv_parmgen(pst, m);
263 | }
264 | }
265 |
266 | /* HTS_PStreamSet_initialize: initialize parameter stream set */
267 | void HTS_PStreamSet_initialize(HTS_PStreamSet * pss)
268 | {
269 | pss->pstream = NULL;
270 | pss->nstream = 0;
271 | pss->total_frame = 0;
272 | }
273 |
274 | /* HTS_PStreamSet_create: parameter generation using GV weight */
275 | HTS_Boolean HTS_PStreamSet_create(HTS_PStreamSet * pss, HTS_SStreamSet * sss, double *msd_threshold, double *gv_weight)
276 | {
277 | size_t i, j, k, l, m;
278 | int shift;
279 | size_t frame, msd_frame, state;
280 |
281 | HTS_PStream *pst;
282 | HTS_Boolean not_bound;
283 |
284 | if (pss->nstream != 0) {
285 | HTS_error(1, "HTS_PstreamSet_create: HTS_PStreamSet should be clear.\n");
286 | return FALSE;
287 | }
288 |
289 | /* initialize */
290 | pss->nstream = HTS_SStreamSet_get_nstream(sss);
291 | pss->pstream = (HTS_PStream *) HTS_calloc(pss->nstream, sizeof(HTS_PStream));
292 | pss->total_frame = HTS_SStreamSet_get_total_frame(sss);
293 |
294 | /* create */
295 | for (i = 0; i < pss->nstream; i++) {
296 | pst = &pss->pstream[i];
297 | if (HTS_SStreamSet_is_msd(sss, i) == TRUE) { /* for MSD */
298 | pst->length = 0;
299 | for (state = 0; state < HTS_SStreamSet_get_total_state(sss); state++)
300 | if (HTS_SStreamSet_get_msd(sss, i, state) > msd_threshold[i])
301 | pst->length += HTS_SStreamSet_get_duration(sss, state);
302 | pst->msd_flag = (HTS_Boolean *) HTS_calloc(pss->total_frame, sizeof(HTS_Boolean));
303 | for (state = 0, frame = 0; state < HTS_SStreamSet_get_total_state(sss); state++) {
304 | if (HTS_SStreamSet_get_msd(sss, i, state) > msd_threshold[i]) {
305 | for (j = 0; j < HTS_SStreamSet_get_duration(sss, state); j++) {
306 | pst->msd_flag[frame] = TRUE;
307 | frame++;
308 | }
309 | } else {
310 | for (j = 0; j < HTS_SStreamSet_get_duration(sss, state); j++) {
311 | pst->msd_flag[frame] = FALSE;
312 | frame++;
313 | }
314 | }
315 | }
316 | } else { /* for non MSD */
317 | pst->length = pss->total_frame;
318 | pst->msd_flag = NULL;
319 | }
320 | pst->vector_length = HTS_SStreamSet_get_vector_length(sss, i);
321 | pst->width = HTS_SStreamSet_get_window_max_width(sss, i) * 2 + 1; /* band width of R */
322 | pst->win_size = HTS_SStreamSet_get_window_size(sss, i);
323 | if (pst->length > 0) {
324 | pst->sm.mean = HTS_alloc_matrix(pst->length, pst->vector_length * pst->win_size);
325 | pst->sm.ivar = HTS_alloc_matrix(pst->length, pst->vector_length * pst->win_size);
326 | pst->sm.wum = (double *) HTS_calloc(pst->length, sizeof(double));
327 | pst->sm.wuw = HTS_alloc_matrix(pst->length, pst->width);
328 | pst->sm.g = (double *) HTS_calloc(pst->length, sizeof(double));
329 | pst->par = HTS_alloc_matrix(pst->length, pst->vector_length);
330 | }
331 | /* copy dynamic window */
332 | pst->win_l_width = (int *) HTS_calloc(pst->win_size, sizeof(int));
333 | pst->win_r_width = (int *) HTS_calloc(pst->win_size, sizeof(int));
334 | pst->win_coefficient = (double **) HTS_calloc(pst->win_size, sizeof(double));
335 | for (j = 0; j < pst->win_size; j++) {
336 | pst->win_l_width[j] = HTS_SStreamSet_get_window_left_width(sss, i, j);
337 | pst->win_r_width[j] = HTS_SStreamSet_get_window_right_width(sss, i, j);
338 | if (pst->win_l_width[j] + pst->win_r_width[j] == 0)
339 | pst->win_coefficient[j] = (double *)
340 | HTS_calloc(-2 * pst->win_l_width[j] + 1, sizeof(double));
341 | else
342 | pst->win_coefficient[j] = (double *)
343 | HTS_calloc(-2 * pst->win_l_width[j], sizeof(double));
344 | pst->win_coefficient[j] -= pst->win_l_width[j];
345 | for (shift = pst->win_l_width[j]; shift <= pst->win_r_width[j]; shift++)
346 | pst->win_coefficient[j][shift] = HTS_SStreamSet_get_window_coefficient(sss, i, j, shift);
347 | }
348 | /* copy GV */
349 | if (HTS_SStreamSet_use_gv(sss, i)) {
350 | pst->gv_mean = (double *) HTS_calloc(pst->vector_length, sizeof(double));
351 | pst->gv_vari = (double *) HTS_calloc(pst->vector_length, sizeof(double));
352 | for (j = 0; j < pst->vector_length; j++) {
353 | pst->gv_mean[j] = HTS_SStreamSet_get_gv_mean(sss, i, j) * gv_weight[i];
354 | pst->gv_vari[j] = HTS_SStreamSet_get_gv_vari(sss, i, j);
355 | }
356 | pst->gv_switch = (HTS_Boolean *) HTS_calloc(pst->length, sizeof(HTS_Boolean));
357 | if (HTS_SStreamSet_is_msd(sss, i) == TRUE) { /* for MSD */
358 | for (state = 0, frame = 0, msd_frame = 0; state < HTS_SStreamSet_get_total_state(sss); state++)
359 | for (j = 0; j < HTS_SStreamSet_get_duration(sss, state); j++, frame++)
360 | if (pst->msd_flag[frame] == TRUE)
361 | pst->gv_switch[msd_frame++] = HTS_SStreamSet_get_gv_switch(sss, i, state);
362 | } else { /* for non MSD */
363 | for (state = 0, frame = 0; state < HTS_SStreamSet_get_total_state(sss); state++)
364 | for (j = 0; j < HTS_SStreamSet_get_duration(sss, state); j++)
365 | pst->gv_switch[frame++] = HTS_SStreamSet_get_gv_switch(sss, i, state);
366 | }
367 | for (j = 0, pst->gv_length = 0; j < pst->length; j++)
368 | if (pst->gv_switch[j])
369 | pst->gv_length++;
370 | } else {
371 | pst->gv_switch = NULL;
372 | pst->gv_length = 0;
373 | pst->gv_mean = NULL;
374 | pst->gv_vari = NULL;
375 | }
376 | /* copy pdfs */
377 | if (HTS_SStreamSet_is_msd(sss, i) == TRUE) { /* for MSD */
378 | for (state = 0, frame = 0, msd_frame = 0; state < HTS_SStreamSet_get_total_state(sss); state++) {
379 | for (j = 0; j < HTS_SStreamSet_get_duration(sss, state); j++) {
380 | if (pst->msd_flag[frame] == TRUE) {
381 | /* check current frame is MSD boundary or not */
382 | for (k = 0; k < pst->win_size; k++) {
383 | not_bound = TRUE;
384 | for (shift = pst->win_l_width[k]; shift <= pst->win_r_width[k]; shift++)
385 | if ((int) frame + shift < 0 || (int) pss->total_frame <= (int) frame + shift || pst->msd_flag[frame + shift] != TRUE) {
386 | not_bound = FALSE;
387 | break;
388 | }
389 | for (l = 0; l < pst->vector_length; l++) {
390 | m = pst->vector_length * k + l;
391 | pst->sm.mean[msd_frame][m] = HTS_SStreamSet_get_mean(sss, i, state, m);
392 | if (not_bound || k == 0)
393 | pst->sm.ivar[msd_frame][m] = HTS_finv(HTS_SStreamSet_get_vari(sss, i, state, m));
394 | else
395 | pst->sm.ivar[msd_frame][m] = 0.0;
396 | }
397 | }
398 | msd_frame++;
399 | }
400 | frame++;
401 | }
402 | }
403 | } else { /* for non MSD */
404 | for (state = 0, frame = 0; state < HTS_SStreamSet_get_total_state(sss); state++) {
405 | for (j = 0; j < HTS_SStreamSet_get_duration(sss, state); j++) {
406 | for (k = 0; k < pst->win_size; k++) {
407 | not_bound = TRUE;
408 | for (shift = pst->win_l_width[k]; shift <= pst->win_r_width[k]; shift++)
409 | if ((int) frame + shift < 0 || (int) pss->total_frame <= (int) frame + shift) {
410 | not_bound = FALSE;
411 | break;
412 | }
413 | for (l = 0; l < pst->vector_length; l++) {
414 | m = pst->vector_length * k + l;
415 | pst->sm.mean[frame][m] = HTS_SStreamSet_get_mean(sss, i, state, m);
416 | if (not_bound || k == 0)
417 | pst->sm.ivar[frame][m] = HTS_finv(HTS_SStreamSet_get_vari(sss, i, state, m));
418 | else
419 | pst->sm.ivar[frame][m] = 0.0;
420 | }
421 | }
422 | frame++;
423 | }
424 | }
425 | }
426 | /* parameter generation */
427 | HTS_PStream_mlpg(pst);
428 | }
429 |
430 | return TRUE;
431 | }
432 |
433 | /* HTS_PStreamSet_get_nstream: get number of stream */
434 | size_t HTS_PStreamSet_get_nstream(HTS_PStreamSet * pss)
435 | {
436 | return pss->nstream;
437 | }
438 |
439 | /* HTS_PStreamSet_get_vector_length: get feature length */
440 | size_t HTS_PStreamSet_get_vector_length(HTS_PStreamSet * pss, size_t stream_index)
441 | {
442 | return pss->pstream[stream_index].vector_length;
443 | }
444 |
445 | /* HTS_PStreamSet_get_total_frame: get total number of frame */
446 | size_t HTS_PStreamSet_get_total_frame(HTS_PStreamSet * pss)
447 | {
448 | return pss->total_frame;
449 | }
450 |
451 | /* HTS_PStreamSet_get_parameter: get parameter */
452 | double HTS_PStreamSet_get_parameter(HTS_PStreamSet * pss, size_t stream_index, size_t frame_index, size_t vector_index)
453 | {
454 | return pss->pstream[stream_index].par[frame_index][vector_index];
455 | }
456 |
457 | /* HTS_PStreamSet_get_parameter_vector: get parameter vector*/
458 | double *HTS_PStreamSet_get_parameter_vector(HTS_PStreamSet * pss, size_t stream_index, size_t frame_index)
459 | {
460 | return pss->pstream[stream_index].par[frame_index];
461 | }
462 |
463 | /* HTS_PStreamSet_get_msd_flag: get generated MSD flag per frame */
464 | HTS_Boolean HTS_PStreamSet_get_msd_flag(HTS_PStreamSet * pss, size_t stream_index, size_t frame_index)
465 | {
466 | return pss->pstream[stream_index].msd_flag[frame_index];
467 | }
468 |
469 | /* HTS_PStreamSet_is_msd: get MSD flag */
470 | HTS_Boolean HTS_PStreamSet_is_msd(HTS_PStreamSet * pss, size_t stream_index)
471 | {
472 | return pss->pstream[stream_index].msd_flag ? TRUE : FALSE;
473 | }
474 |
475 | /* HTS_PStreamSet_clear: free parameter stream set */
476 | void HTS_PStreamSet_clear(HTS_PStreamSet * pss)
477 | {
478 | size_t i, j;
479 | HTS_PStream *pstream;
480 |
481 | if (pss->pstream) {
482 | for (i = 0; i < pss->nstream; i++) {
483 | pstream = &pss->pstream[i];
484 | if (pstream->sm.wum)
485 | HTS_free(pstream->sm.wum);
486 | if (pstream->sm.g)
487 | HTS_free(pstream->sm.g);
488 | if (pstream->sm.wuw)
489 | HTS_free_matrix(pstream->sm.wuw, pstream->length);
490 | if (pstream->sm.ivar)
491 | HTS_free_matrix(pstream->sm.ivar, pstream->length);
492 | if (pstream->sm.mean)
493 | HTS_free_matrix(pstream->sm.mean, pstream->length);
494 | if (pstream->par)
495 | HTS_free_matrix(pstream->par, pstream->length);
496 | if (pstream->msd_flag)
497 | HTS_free(pstream->msd_flag);
498 | if (pstream->win_coefficient) {
499 | for (j = 0; j < pstream->win_size; j++) {
500 | pstream->win_coefficient[j] += pstream->win_l_width[j];
501 | HTS_free(pstream->win_coefficient[j]);
502 | }
503 | }
504 | if (pstream->gv_mean)
505 | HTS_free(pstream->gv_mean);
506 | if (pstream->gv_vari)
507 | HTS_free(pstream->gv_vari);
508 | if (pstream->win_coefficient)
509 | HTS_free(pstream->win_coefficient);
510 | if (pstream->win_l_width)
511 | HTS_free(pstream->win_l_width);
512 | if (pstream->win_r_width)
513 | HTS_free(pstream->win_r_width);
514 | if (pstream->gv_switch)
515 | HTS_free(pstream->gv_switch);
516 | }
517 | HTS_free(pss->pstream);
518 | }
519 | HTS_PStreamSet_initialize(pss);
520 | }
521 |
522 | HTS_PSTREAM_C_END;
523 |
524 | #endif /* !HTS_PSTREAM_C */
525 |
--------------------------------------------------------------------------------
/src/lib/HTS_sstream.c:
--------------------------------------------------------------------------------
1 | /* ----------------------------------------------------------------- */
2 | /* The HMM-Based Speech Synthesis Engine "hts_engine API" */
3 | /* developed by HTS Working Group */
4 | /* http://hts-engine.sourceforge.net/ */
5 | /* ----------------------------------------------------------------- */
6 | /* */
7 | /* Copyright (c) 2001-2015 Nagoya Institute of Technology */
8 | /* Department of Computer Science */
9 | /* */
10 | /* 2001-2008 Tokyo Institute of Technology */
11 | /* Interdisciplinary Graduate School of */
12 | /* Science and Engineering */
13 | /* */
14 | /* All rights reserved. */
15 | /* */
16 | /* Redistribution and use in source and binary forms, with or */
17 | /* without modification, are permitted provided that the following */
18 | /* conditions are met: */
19 | /* */
20 | /* - Redistributions of source code must retain the above copyright */
21 | /* notice, this list of conditions and the following disclaimer. */
22 | /* - Redistributions in binary form must reproduce the above */
23 | /* copyright notice, this list of conditions and the following */
24 | /* disclaimer in the documentation and/or other materials provided */
25 | /* with the distribution. */
26 | /* - Neither the name of the HTS working group nor the names of its */
27 | /* contributors may be used to endorse or promote products derived */
28 | /* from this software without specific prior written permission. */
29 | /* */
30 | /* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND */
31 | /* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, */
32 | /* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
33 | /* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
34 | /* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS */
35 | /* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, */
36 | /* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED */
37 | /* TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, */
38 | /* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON */
39 | /* ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, */
40 | /* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY */
41 | /* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
42 | /* POSSIBILITY OF SUCH DAMAGE. */
43 | /* ----------------------------------------------------------------- */
44 |
45 | #ifndef HTS_SSTREAM_C
46 | #define HTS_SSTREAM_C
47 |
48 | #ifdef __cplusplus
49 | #define HTS_SSTREAM_C_START extern "C" {
50 | #define HTS_SSTREAM_C_END }
51 | #else
52 | #define HTS_SSTREAM_C_START
53 | #define HTS_SSTREAM_C_END
54 | #endif /* __CPLUSPLUS */
55 |
56 | HTS_SSTREAM_C_START;
57 |
58 | #include
59 | #include
60 |
61 | /* hts_engine libraries */
62 | #include "HTS_hidden.h"
63 |
64 | /* HTS_set_default_duration: set default duration from state duration probability distribution */
65 | static double HTS_set_default_duration(size_t * duration, double *mean, double *vari, size_t size)
66 | {
67 | size_t i;
68 | double temp;
69 | size_t sum = 0;
70 |
71 | for (i = 0; i < size; i++) {
72 | temp = mean[i] + 0.5;
73 | if (temp < 1.0)
74 | duration[i] = 1;
75 | else
76 | duration[i] = (size_t) temp;
77 | sum += duration[i];
78 | }
79 |
80 | return (double) sum;
81 | }
82 |
83 | /* HTS_set_specified_duration: set duration from state duration probability distribution and specified frame length */
84 | static double HTS_set_specified_duration(size_t * duration, double *mean, double *vari, size_t size, double frame_length)
85 | {
86 | size_t i;
87 | int j;
88 | double temp1, temp2;
89 | double rho = 0.0;
90 | size_t sum = 0;
91 | size_t target_length;
92 |
93 | /* get the target frame length */
94 | if (frame_length + 0.5 < 1.0)
95 | target_length = 1;
96 | else
97 | target_length = (size_t) (frame_length + 0.5);
98 |
99 | /* check the specified duration */
100 | if (target_length <= size) {
101 | if (target_length < size)
102 | HTS_error(-1, "HTS_set_specified_duration: Specified frame length is too short.\n");
103 | for (i = 0; i < size; i++)
104 | duration[i] = 1;
105 | return (double) size;
106 | }
107 |
108 | /* RHO calculation */
109 | temp1 = 0.0;
110 | temp2 = 0.0;
111 | for (i = 0; i < size; i++) {
112 | temp1 += mean[i];
113 | temp2 += vari[i];
114 | }
115 | rho = ((double) target_length - temp1) / temp2;
116 |
117 | /* first estimation */
118 | for (i = 0; i < size; i++) {
119 | temp1 = mean[i] + rho * vari[i] + 0.5;
120 | if (temp1 < 1.0)
121 | duration[i] = 1;
122 | else
123 | duration[i] = (size_t) temp1;
124 | sum += duration[i];
125 | }
126 |
127 | /* loop estimation */
128 | while (target_length != sum) {
129 | /* sarch flexible state and modify its duration */
130 | if (target_length > sum) {
131 | j = -1;
132 | for (i = 0; i < size; i++) {
133 | temp2 = fabs(rho - ((double) duration[i] + 1 - mean[i]) / vari[i]);
134 | if (j < 0 || temp1 > temp2) {
135 | j = i;
136 | temp1 = temp2;
137 | }
138 | }
139 | sum++;
140 | duration[j]++;
141 | } else {
142 | j = -1;
143 | for (i = 0; i < size; i++) {
144 | if (duration[i] > 1) {
145 | temp2 = fabs(rho - ((double) duration[i] - 1 - mean[i]) / vari[i]);
146 | if (j < 0 || temp1 > temp2) {
147 | j = i;
148 | temp1 = temp2;
149 | }
150 | }
151 | }
152 | sum--;
153 | duration[j]--;
154 | }
155 | }
156 |
157 | return (double) target_length;
158 | }
159 |
160 | /* HTS_SStreamSet_initialize: initialize state stream set */
161 | void HTS_SStreamSet_initialize(HTS_SStreamSet * sss)
162 | {
163 | sss->nstream = 0;
164 | sss->nstate = 0;
165 | sss->sstream = NULL;
166 | sss->duration = NULL;
167 | sss->total_state = 0;
168 | sss->total_frame = 0;
169 | }
170 |
171 | /* HTS_SStreamSet_create: parse label and determine state duration */
172 | HTS_Boolean HTS_SStreamSet_create(HTS_SStreamSet * sss, HTS_ModelSet * ms, HTS_Label * label, HTS_Boolean phoneme_alignment_flag, double speed, double *duration_iw, double **parameter_iw, double **gv_iw)
173 | {
174 | size_t i, j, k;
175 | double temp;
176 | int shift;
177 | size_t state;
178 | HTS_SStream *sst;
179 | double *duration_mean, *duration_vari;
180 | double frame_length;
181 | size_t next_time;
182 | size_t next_state;
183 |
184 | if (HTS_Label_get_size(label) == 0)
185 | return FALSE;
186 |
187 | /* check interpolation weights */
188 | for (i = 0, temp = 0.0; i < HTS_ModelSet_get_nvoices(ms); i++)
189 | temp += duration_iw[i];
190 | if (temp == 0.0) {
191 | return FALSE;
192 | } else if (temp != 1.0) {
193 | for (i = 0; i < HTS_ModelSet_get_nvoices(ms); i++)
194 | if (duration_iw[i] != 0.0)
195 | duration_iw[i] /= temp;
196 | }
197 |
198 | for (i = 0; i < HTS_ModelSet_get_nstream(ms); i++) {
199 | for (j = 0, temp = 0.0; j < HTS_ModelSet_get_nvoices(ms); j++)
200 | temp += parameter_iw[j][i];
201 | if (temp == 0.0) {
202 | return FALSE;
203 | } else if (temp != 1.0) {
204 | for (j = 0; j < HTS_ModelSet_get_nvoices(ms); j++)
205 | if (parameter_iw[j][i] != 0.0)
206 | parameter_iw[j][i] /= temp;
207 | }
208 | if (HTS_ModelSet_use_gv(ms, i)) {
209 | for (j = 0, temp = 0.0; j < HTS_ModelSet_get_nvoices(ms); j++)
210 | temp += gv_iw[j][i];
211 | if (temp == 0.0)
212 | return FALSE;
213 | else if (temp != 1.0)
214 | for (j = 0; j < HTS_ModelSet_get_nvoices(ms); j++)
215 | if (gv_iw[j][i] != 0.0)
216 | gv_iw[j][i] /= temp;
217 | }
218 | }
219 |
220 | /* initialize state sequence */
221 | sss->nstate = HTS_ModelSet_get_nstate(ms);
222 | sss->nstream = HTS_ModelSet_get_nstream(ms);
223 | sss->total_frame = 0;
224 | sss->total_state = HTS_Label_get_size(label) * sss->nstate;
225 | sss->duration = (size_t *) HTS_calloc(sss->total_state, sizeof(size_t));
226 | sss->sstream = (HTS_SStream *) HTS_calloc(sss->nstream, sizeof(HTS_SStream));
227 | for (i = 0; i < sss->nstream; i++) {
228 | sst = &sss->sstream[i];
229 | sst->vector_length = HTS_ModelSet_get_vector_length(ms, i);
230 | sst->mean = (double **) HTS_calloc(sss->total_state, sizeof(double *));
231 | sst->vari = (double **) HTS_calloc(sss->total_state, sizeof(double *));
232 | if (HTS_ModelSet_is_msd(ms, i))
233 | sst->msd = (double *) HTS_calloc(sss->total_state, sizeof(double));
234 | else
235 | sst->msd = NULL;
236 | for (j = 0; j < sss->total_state; j++) {
237 | sst->mean[j] = (double *) HTS_calloc(sst->vector_length * HTS_ModelSet_get_window_size(ms, i), sizeof(double));
238 | sst->vari[j] = (double *) HTS_calloc(sst->vector_length * HTS_ModelSet_get_window_size(ms, i), sizeof(double));
239 | }
240 | if (HTS_ModelSet_use_gv(ms, i)) {
241 | sst->gv_switch = (HTS_Boolean *) HTS_calloc(sss->total_state, sizeof(HTS_Boolean));
242 | for (j = 0; j < sss->total_state; j++)
243 | sst->gv_switch[j] = TRUE;
244 | } else {
245 | sst->gv_switch = NULL;
246 | }
247 | }
248 |
249 | /* determine state duration */
250 | duration_mean = (double *) HTS_calloc(sss->total_state, sizeof(double));
251 | duration_vari = (double *) HTS_calloc(sss->total_state, sizeof(double));
252 | for (i = 0; i < HTS_Label_get_size(label); i++)
253 | HTS_ModelSet_get_duration(ms, HTS_Label_get_string(label, i), duration_iw, &duration_mean[i * sss->nstate], &duration_vari[i * sss->nstate]);
254 | if (phoneme_alignment_flag == TRUE) {
255 | /* use duration set by user */
256 | next_time = 0;
257 | next_state = 0;
258 | state = 0;
259 | for (i = 0; i < HTS_Label_get_size(label); i++) {
260 | temp = HTS_Label_get_end_frame(label, i);
261 | if (temp >= 0) {
262 | next_time += (size_t) HTS_set_specified_duration(&sss->duration[next_state], &duration_mean[next_state], &duration_vari[next_state], state + sss->nstate - next_state, temp - next_time);
263 | next_state = state + sss->nstate;
264 | } else if (i + 1 == HTS_Label_get_size(label)) {
265 | HTS_error(-1, "HTS_SStreamSet_create: The time of final label is not specified.\n");
266 | HTS_set_default_duration(&sss->duration[next_state], &duration_mean[next_state], &duration_vari[next_state], state + sss->nstate - next_state);
267 | }
268 | state += sss->nstate;
269 | }
270 | } else {
271 | /* determine frame length */
272 | if (speed != 1.0) {
273 | temp = 0.0;
274 | for (i = 0; i < sss->total_state; i++) {
275 | temp += duration_mean[i];
276 | }
277 | frame_length = temp / speed;
278 | HTS_set_specified_duration(sss->duration, duration_mean, duration_vari, sss->total_state, frame_length);
279 | } else {
280 | HTS_set_default_duration(sss->duration, duration_mean, duration_vari, sss->total_state);
281 | }
282 | }
283 | HTS_free(duration_mean);
284 | HTS_free(duration_vari);
285 |
286 | /* get parameter */
287 | for (i = 0, state = 0; i < HTS_Label_get_size(label); i++) {
288 | for (j = 2; j <= sss->nstate + 1; j++) {
289 | sss->total_frame += sss->duration[state];
290 | for (k = 0; k < sss->nstream; k++) {
291 | sst = &sss->sstream[k];
292 | if (sst->msd)
293 | HTS_ModelSet_get_parameter(ms, k, j, HTS_Label_get_string(label, i), (const double *const *) parameter_iw, sst->mean[state], sst->vari[state], &sst->msd[state]);
294 | else
295 | HTS_ModelSet_get_parameter(ms, k, j, HTS_Label_get_string(label, i), (const double *const *) parameter_iw, sst->mean[state], sst->vari[state], NULL);
296 | }
297 | state++;
298 | }
299 | }
300 |
301 | /* copy dynamic window */
302 | for (i = 0; i < sss->nstream; i++) {
303 | sst = &sss->sstream[i];
304 | sst->win_size = HTS_ModelSet_get_window_size(ms, i);
305 | sst->win_max_width = HTS_ModelSet_get_window_max_width(ms, i);
306 | sst->win_l_width = (int *) HTS_calloc(sst->win_size, sizeof(int));
307 | sst->win_r_width = (int *) HTS_calloc(sst->win_size, sizeof(int));
308 | sst->win_coefficient = (double **) HTS_calloc(sst->win_size, sizeof(double));
309 | for (j = 0; j < sst->win_size; j++) {
310 | sst->win_l_width[j] = HTS_ModelSet_get_window_left_width(ms, i, j);
311 | sst->win_r_width[j] = HTS_ModelSet_get_window_right_width(ms, i, j);
312 | if (sst->win_l_width[j] + sst->win_r_width[j] == 0)
313 | sst->win_coefficient[j] = (double *) HTS_calloc(-2 * sst->win_l_width[j] + 1, sizeof(double));
314 | else
315 | sst->win_coefficient[j] = (double *) HTS_calloc(-2 * sst->win_l_width[j], sizeof(double));
316 | sst->win_coefficient[j] -= sst->win_l_width[j];
317 | for (shift = sst->win_l_width[j]; shift <= sst->win_r_width[j]; shift++)
318 | sst->win_coefficient[j][shift] = HTS_ModelSet_get_window_coefficient(ms, i, j, shift);
319 | }
320 | }
321 |
322 | /* determine GV */
323 | for (i = 0; i < sss->nstream; i++) {
324 | sst = &sss->sstream[i];
325 | if (HTS_ModelSet_use_gv(ms, i)) {
326 | sst->gv_mean = (double *) HTS_calloc(sst->vector_length, sizeof(double));
327 | sst->gv_vari = (double *) HTS_calloc(sst->vector_length, sizeof(double));
328 | HTS_ModelSet_get_gv(ms, i, HTS_Label_get_string(label, 0), (const double *const *) gv_iw, sst->gv_mean, sst->gv_vari);
329 | } else {
330 | sst->gv_mean = NULL;
331 | sst->gv_vari = NULL;
332 | }
333 | }
334 |
335 | for (i = 0; i < HTS_Label_get_size(label); i++)
336 | if (HTS_ModelSet_get_gv_flag(ms, HTS_Label_get_string(label, i)) == FALSE)
337 | for (j = 0; j < sss->nstream; j++)
338 | if (HTS_ModelSet_use_gv(ms, j) == TRUE)
339 | for (k = 0; k < sss->nstate; k++)
340 | sss->sstream[j].gv_switch[i * sss->nstate + k] = FALSE;
341 |
342 | return TRUE;
343 | }
344 |
345 | /* HTS_SStreamSet_get_nstream: get number of stream */
346 | size_t HTS_SStreamSet_get_nstream(HTS_SStreamSet * sss)
347 | {
348 | return sss->nstream;
349 | }
350 |
351 | /* HTS_SStreamSet_get_vector_length: get vector length */
352 | size_t HTS_SStreamSet_get_vector_length(HTS_SStreamSet * sss, size_t stream_index)
353 | {
354 | return sss->sstream[stream_index].vector_length;
355 | }
356 |
357 | /* HTS_SStreamSet_is_msd: get MSD flag */
358 | HTS_Boolean HTS_SStreamSet_is_msd(HTS_SStreamSet * sss, size_t stream_index)
359 | {
360 | return sss->sstream[stream_index].msd ? TRUE : FALSE;
361 | }
362 |
363 | /* HTS_SStreamSet_get_total_state: get total number of state */
364 | size_t HTS_SStreamSet_get_total_state(HTS_SStreamSet * sss)
365 | {
366 | return sss->total_state;
367 | }
368 |
369 | /* HTS_SStreamSet_get_total_frame: get total number of frame */
370 | size_t HTS_SStreamSet_get_total_frame(HTS_SStreamSet * sss)
371 | {
372 | return sss->total_frame;
373 | }
374 |
375 | /* HTS_SStreamSet_get_msd: get MSD parameter */
376 | double HTS_SStreamSet_get_msd(HTS_SStreamSet * sss, size_t stream_index, size_t state_index)
377 | {
378 | return sss->sstream[stream_index].msd[state_index];
379 | }
380 |
381 | /* HTS_SStreamSet_window_size: get dynamic window size */
382 | size_t HTS_SStreamSet_get_window_size(HTS_SStreamSet * sss, size_t stream_index)
383 | {
384 | return sss->sstream[stream_index].win_size;
385 | }
386 |
387 | /* HTS_SStreamSet_get_window_left_width: get left width of dynamic window */
388 | int HTS_SStreamSet_get_window_left_width(HTS_SStreamSet * sss, size_t stream_index, size_t window_index)
389 | {
390 | return sss->sstream[stream_index].win_l_width[window_index];
391 | }
392 |
393 | /* HTS_SStreamSet_get_winodow_right_width: get right width of dynamic window */
394 | int HTS_SStreamSet_get_window_right_width(HTS_SStreamSet * sss, size_t stream_index, size_t window_index)
395 | {
396 | return sss->sstream[stream_index].win_r_width[window_index];
397 | }
398 |
399 | /* HTS_SStreamSet_get_window_coefficient: get coefficient of dynamic window */
400 | double HTS_SStreamSet_get_window_coefficient(HTS_SStreamSet * sss, size_t stream_index, size_t window_index, int coefficient_index)
401 | {
402 | return sss->sstream[stream_index].win_coefficient[window_index][coefficient_index];
403 | }
404 |
405 | /* HTS_SStreamSet_get_window_max_width: get max width of dynamic window */
406 | size_t HTS_SStreamSet_get_window_max_width(HTS_SStreamSet * sss, size_t stream_index)
407 | {
408 | return sss->sstream[stream_index].win_max_width;
409 | }
410 |
411 | /* HTS_SStreamSet_use_gv: get GV flag */
412 | HTS_Boolean HTS_SStreamSet_use_gv(HTS_SStreamSet * sss, size_t stream_index)
413 | {
414 | return sss->sstream[stream_index].gv_mean ? TRUE : FALSE;
415 | }
416 |
417 | /* HTS_SStreamSet_get_duration: get state duration */
418 | size_t HTS_SStreamSet_get_duration(HTS_SStreamSet * sss, size_t state_index)
419 | {
420 | return sss->duration[state_index];
421 | }
422 |
423 | /* HTS_SStreamSet_get_mean: get mean parameter */
424 | double HTS_SStreamSet_get_mean(HTS_SStreamSet * sss, size_t stream_index, size_t state_index, size_t vector_index)
425 | {
426 | return sss->sstream[stream_index].mean[state_index][vector_index];
427 | }
428 |
429 | /* HTS_SStreamSet_set_mean: set mean parameter */
430 | void HTS_SStreamSet_set_mean(HTS_SStreamSet * sss, size_t stream_index, size_t state_index, size_t vector_index, double f)
431 | {
432 | sss->sstream[stream_index].mean[state_index][vector_index] = f;
433 | }
434 |
435 | /* HTS_SStreamSet_get_vari: get variance parameter */
436 | double HTS_SStreamSet_get_vari(HTS_SStreamSet * sss, size_t stream_index, size_t state_index, size_t vector_index)
437 | {
438 | return sss->sstream[stream_index].vari[state_index][vector_index];
439 | }
440 |
441 | /* HTS_SStreamSet_set_vari: set variance parameter */
442 | void HTS_SStreamSet_set_vari(HTS_SStreamSet * sss, size_t stream_index, size_t state_index, size_t vector_index, double f)
443 | {
444 | sss->sstream[stream_index].vari[state_index][vector_index] = f;
445 | }
446 |
447 | /* HTS_SStreamSet_get_gv_mean: get GV mean parameter */
448 | double HTS_SStreamSet_get_gv_mean(HTS_SStreamSet * sss, size_t stream_index, size_t vector_index)
449 | {
450 | return sss->sstream[stream_index].gv_mean[vector_index];
451 | }
452 |
453 | /* HTS_SStreamSet_get_gv_mean: get GV variance parameter */
454 | double HTS_SStreamSet_get_gv_vari(HTS_SStreamSet * sss, size_t stream_index, size_t vector_index)
455 | {
456 | return sss->sstream[stream_index].gv_vari[vector_index];
457 | }
458 |
459 | /* HTS_SStreamSet_set_gv_switch: set GV switch */
460 | void HTS_SStreamSet_set_gv_switch(HTS_SStreamSet * sss, size_t stream_index, size_t state_index, HTS_Boolean i)
461 | {
462 | sss->sstream[stream_index].gv_switch[state_index] = i;
463 | }
464 |
465 | /* HTS_SStreamSet_get_gv_switch: get GV switch */
466 | HTS_Boolean HTS_SStreamSet_get_gv_switch(HTS_SStreamSet * sss, size_t stream_index, size_t state_index)
467 | {
468 | return sss->sstream[stream_index].gv_switch[state_index];
469 | }
470 |
471 | /* HTS_SStreamSet_clear: free state stream set */
472 | void HTS_SStreamSet_clear(HTS_SStreamSet * sss)
473 | {
474 | size_t i, j;
475 | HTS_SStream *sst;
476 |
477 | if (sss->sstream) {
478 | for (i = 0; i < sss->nstream; i++) {
479 | sst = &sss->sstream[i];
480 | for (j = 0; j < sss->total_state; j++) {
481 | HTS_free(sst->mean[j]);
482 | HTS_free(sst->vari[j]);
483 | }
484 | if (sst->msd)
485 | HTS_free(sst->msd);
486 | HTS_free(sst->mean);
487 | HTS_free(sst->vari);
488 | for (j = 0; j < sst->win_size; j++) {
489 | sst->win_coefficient[j] += sst->win_l_width[j];
490 | HTS_free(sst->win_coefficient[j]);
491 | }
492 | HTS_free(sst->win_coefficient);
493 | HTS_free(sst->win_l_width);
494 | HTS_free(sst->win_r_width);
495 | if (sst->gv_mean)
496 | HTS_free(sst->gv_mean);
497 | if (sst->gv_vari)
498 | HTS_free(sst->gv_vari);
499 | if (sst->gv_switch)
500 | HTS_free(sst->gv_switch);
501 | }
502 | HTS_free(sss->sstream);
503 | }
504 | if (sss->duration)
505 | HTS_free(sss->duration);
506 |
507 | HTS_SStreamSet_initialize(sss);
508 | }
509 |
510 | HTS_SSTREAM_C_END;
511 |
512 | #endif /* !HTS_SSTREAM_C */
513 |
--------------------------------------------------------------------------------
/src/lib/Makefile.am:
--------------------------------------------------------------------------------
1 |
2 | EXTRA_DIST = Makefile.mak
3 |
4 | AM_CPPFLAGS = -I@top_srcdir@/include
5 |
6 | lib_LIBRARIES = libHTSEngine.a
7 |
8 | libHTSEngine_a_SOURCES = HTS_audio.c HTS_engine.c HTS_hidden.h HTS_misc.c \
9 | HTS_pstream.c HTS_sstream.c HTS_model.c HTS_vocoder.c \
10 | HTS_gstream.c HTS_label.c
11 |
12 | DISTCLEANFILES = *.log *.out *~
13 |
14 | MAINTAINERCLEANFILES = Makefile.in
15 |
--------------------------------------------------------------------------------
/src/lib/Makefile.mak:
--------------------------------------------------------------------------------
1 |
2 | CC = cl
3 |
4 | CFLAGS = /O2 /Ob2 /Oi /Ot /Oy /GT /GL /TC /I ..\include
5 | LFLAGS = /LTCG
6 |
7 | CORES = HTS_audio.obj HTS_engine.obj HTS_gstream.obj HTS_label.obj HTS_misc.obj HTS_model.obj HTS_pstream.obj HTS_sstream.obj HTS_vocoder.obj
8 |
9 | all: hts_engine_API.lib
10 |
11 | hts_engine_API.lib: $(CORES)
12 | lib $(LFLAGS) /OUT:$@ $(CORES)
13 |
14 | .c.obj:
15 | $(CC) $(CFLAGS) /c $<
16 |
17 | clean:
18 | del *.lib
19 | del *.obj
20 |
--------------------------------------------------------------------------------