├── .cvsignore
├── .github
└── workflows
│ └── ccpp.yaml
├── .gitignore
├── .project
├── .travis.yml
├── README.md
├── appveyor.yml
├── msys_build.sh
└── src
├── .cvsignore
├── AUTHORS
├── CMakeLists.txt
├── COPYING
├── INSTALL
├── Makefile.am
├── Makefile.mak
├── NEWS
├── README
├── bin
├── .cvsignore
├── Makefile.am
├── Makefile.mak
└── hts_engine.c
├── config
└── .cvsignore
├── configure.ac
├── hts_engine_API.pc.in
├── include
└── HTS_engine.h
└── lib
├── .cvsignore
├── HTS_audio.c
├── HTS_engine.c
├── HTS_gstream.c
├── HTS_hidden.h
├── HTS_label.c
├── HTS_misc.c
├── HTS_model.c
├── HTS_pstream.c
├── HTS_sstream.c
├── HTS_vocoder.c
├── Makefile.am
└── Makefile.mak
/.cvsignore:
--------------------------------------------------------------------------------
1 | memo.txt
2 |
--------------------------------------------------------------------------------
/.github/workflows/ccpp.yaml:
--------------------------------------------------------------------------------
1 | name: C/C++ CI
2 |
3 | on:
4 | push:
5 | branches: [ master ]
6 | pull_request:
7 | branches: [ master ]
8 |
9 | jobs:
10 | build:
11 | strategy:
12 | matrix:
13 | build_type: [ Release ]
14 | config:
15 | - os: ubuntu-latest
16 | cc: gcc
17 | cxx: g++
18 | - os: ubuntu-latest
19 | cc: clang
20 | cxx: clang++
21 | - os: macos-latest
22 | cc: gcc
23 | cxx: g++
24 | - os: macos-latest
25 | cc: clang
26 | cxx: clang++
27 | - os: windows-latest
28 | cc: cl
29 | cxx: cl
30 |
31 | env:
32 | CC: ${{ matrix.config.cc }}
33 | CXX: ${{ matrix.config.cxx }}
34 |
35 | runs-on: ${{ matrix.config.os }}
36 |
37 | steps:
38 | - uses: actions/checkout@v2
39 | - name: Build
40 | working-directory: src
41 | run: |
42 | cmake -D CMAKE_BUILD_TYPE=${{ matrix.build_type }} -D BUILD_SHARED_LIBS=ON -S . -B build
43 | cmake --build build --config ${{ matrix.build_type }}
44 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Created by https://www.gitignore.io
2 |
3 | ### Emacs ###
4 | # -*- mode: gitignore; -*-
5 | *~
6 | \#*\#
7 | /.emacs.desktop
8 | /.emacs.desktop.lock
9 | *.elc
10 | auto-save-list
11 | tramp
12 | .\#*
13 |
14 | # Org-mode
15 | .org-id-locations
16 | *_archive
17 |
18 | # flymake-mode
19 | *_flymake.*
20 |
21 | # eshell files
22 | /eshell/history
23 | /eshell/lastdir
24 |
25 | # elpa packages
26 | /elpa/
27 |
28 | # reftex files
29 | *.rel
30 |
31 | # AUCTeX auto folder
32 | /auto/
33 |
34 | # cask packages
35 | .cask/
36 |
37 |
38 | ### vim ###
39 | [._]*.s[a-w][a-z]
40 | [._]s[a-w][a-z]
41 | *.un~
42 | Session.vim
43 | .netrwhist
44 | *~
45 |
46 |
47 | ### C++ ###
48 | # Compiled Object files
49 | *.slo
50 | *.lo
51 | *.o
52 | *.obj
53 |
54 | # Precompiled Headers
55 | *.gch
56 | *.pch
57 |
58 | # Compiled Dynamic libraries
59 | *.so
60 | *.dylib
61 | *.dll
62 |
63 | # Fortran module files
64 | *.mod
65 |
66 | # Compiled Static libraries
67 | *.lai
68 | *.la
69 | *.a
70 | *.lib
71 |
72 | # Executables
73 | *.exe
74 | *.out
75 | *.app
76 |
77 |
78 | ### Autotools ###
79 | # http://www.gnu.org/software/automake
80 |
81 | Makefile.in
82 |
83 | # http://www.gnu.org/software/autoconf
84 |
85 | /autom4te.cache
86 | /aclocal.m4
87 | /compile
88 | /configure
89 | /depcomp
90 | /install-sh
91 | /missing
92 | /stamp-h1
93 |
94 |
95 | ### CMake ###
96 | CMakeCache.txt
97 | CMakeFiles
98 | Makefile
99 | cmake_install.cmake
100 | install_manifest.txt
101 |
102 |
103 | ### Linux ###
104 | *~
105 |
106 | # KDE directory preferences
107 | .directory
108 |
109 |
110 | ### OSX ###
111 | .DS_Store
112 | .AppleDouble
113 | .LSOverride
114 |
115 | # Icon must end with two \r
116 | Icon
117 |
118 |
119 | # Thumbnails
120 | ._*
121 |
122 | # Files that might appear on external disk
123 | .Spotlight-V100
124 | .Trashes
125 |
126 | # Directories potentially created on remote AFP share
127 | .AppleDB
128 | .AppleDesktop
129 | Network Trash Folder
130 | Temporary Items
131 | .apdisk
132 |
133 |
134 | ### Windows ###
135 | # Windows image file caches
136 | Thumbs.db
137 | ehthumbs.db
138 |
139 | # Folder config file
140 | Desktop.ini
141 |
142 | # Recycle Bin used on file shares
143 | $RECYCLE.BIN/
144 |
145 | # Windows Installer files
146 | *.cab
147 | *.msi
148 | *.msm
149 | *.msp
150 |
151 | # Windows shortcuts
152 | *.lnk
153 |
154 | # manually added
155 | .waf*
156 | .dropbox
157 | .lock-*
158 | *.lib
159 | *.log
160 | build
161 | Makefile
162 | .deps/
163 | config.status
--------------------------------------------------------------------------------
/.project:
--------------------------------------------------------------------------------
1 |
2 |
3 | sourceforge_hts_engine_API
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
1 | language: cpp
2 |
3 | # Ubuntu 18.04 https://docs.travis-ci.com/user/reference/bionic
4 | # NOTE: this project requires cmake >= 3.11
5 | dist: bionic
6 |
7 | compiler:
8 | - gcc
9 | - clang
10 |
11 | # Just check if build success or not for now
12 | script:
13 | - cd src
14 | - mkdir -p build && cd build
15 | - cmake -DCMAKE_BUILD_TYPE=Release -DBUILD_SHARED_LIBS=ON ..
16 | - make -j
17 |
18 | notifications:
19 | email: false
20 |
21 | os:
22 | - linux
23 | - osx
24 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # hts_engine_API
2 |
3 |
4 | 
5 | [](https://travis-ci.org/r9y9/hts_engine_API)
6 | [](https://ci.appveyor.com/project/r9y9/hts-engine-api/branch/master)
7 |
8 | A fork of hts_engine_API
9 |
10 | ## Why
11 |
12 | Wanted to fork it with *git*.
13 |
14 | **NOTE**: To preserve history of cvs version of hts_engine_API, this fork was originially created by:
15 |
16 | ```
17 | git cvsimport -v \
18 | -d :pserver:anonymous@hts-engine.cvs.sourceforge.net:/cvsroot/hts-engine \
19 | -C hts_engine_API hts_engine_API
20 | ```
21 |
22 | ## Supported platforms
23 |
24 | - Linux
25 | - Mac OS X
26 | - Windows (gcc/msvc)
27 |
28 | ## Changes
29 |
30 | The important changes from the original hts_engine_API are summarized below:
31 |
32 | - CMake support
33 | - Add pkg-config suppport
34 | - Continuous integratioin support
35 | - keep sementic versioning http://semver.org/
36 |
--------------------------------------------------------------------------------
/appveyor.yml:
--------------------------------------------------------------------------------
1 | environment:
2 | PYTHON: "C:/Python36"
3 | matrix:
4 | # NOTE: we may want to revert this back if needed
5 | # - COMPILER: gcc
6 | # ARCH: "i686"
7 | #
8 | # - COMPILER: gcc
9 | # ARCH: "x86_64"
10 |
11 | - COMPILER: msvc
12 | ARCH: "i686"
13 |
14 | - COMPILER: msvc
15 | ARCH: "x86_64"
16 |
17 | init:
18 | - "ECHO %PYTHON%"
19 | - ps: "ls C:/Python*"
20 | - "SET PATH=%PYTHON%;%PYTHON%\\Scripts;%PATH%"
21 |
22 | cache:
23 | - mingw-w32-bin-i686-20200211.7z
24 | - mingw-w64-bin-x86_64-20200211.7z
25 |
26 | skip_commits:
27 | # Add [av skip] to commit messages for docfixes, etc to reduce load on queue
28 | message: /\[av skip\]/
29 |
30 | notifications:
31 | - provider: Email
32 | on_build_success: false
33 | on_build_failure: false
34 | on_build_status_changed: false
35 |
36 | build_script:
37 | - C:\MinGW\msys\1.0\bin\sh --login /c/projects/hts-engine-api/msys_build.sh
38 |
39 | artifacts:
40 | - path: '**\*.dll'
41 | name: hts_engine_API
42 |
--------------------------------------------------------------------------------
/msys_build.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 |
3 | set -e
4 |
5 | if [ "$ARCH" = x86_64 ]; then
6 | bits=64
7 | else
8 | bits=32
9 | fi
10 |
11 | # Use this mingw instead of the pre-installed mingw on Appveyor
12 | if [ "$COMPILER" = gcc ]; then
13 | f=mingw-w$bits-bin-$ARCH-20200211.7z
14 | if ! [ -e $f ]; then
15 | echo "Downloading $f"
16 | curl -LsSO https://sourceforge.net/projects/mingw-w64-dgn/files/mingw-w64/$f
17 | fi
18 | 7z x $f > /dev/null
19 | export PATH=$PWD/mingw$bits/bin:$PATH
20 | export CC=$PWD/mingw$bits/bin/gcc
21 | fi
22 |
23 | # Build
24 | cd /c/projects/hts-engine-api/src
25 | mkdir -p build && cd build
26 | # NOTE: it seems this does not work as expected...
27 | if [ "$COMPILER" = gcc ]; then
28 | cmake -DCMAKE_C_COMPILER=gcc -DCMAKE_CXX_COMPILER=g++ ..
29 | else
30 | cmake ..
31 | fi
32 | cmake --build . --config Release
--------------------------------------------------------------------------------
/src/.cvsignore:
--------------------------------------------------------------------------------
1 | autom4te.cache
2 | aclocal.m4
3 | config.status
4 | cvs2cl.pl
5 | Makefile
6 | Makefile.in
7 | configure
8 | format.sh
9 | makeclean.sh
10 | test_mcp.sh
11 | test_mcp
12 | conf.sh
13 | *test*
14 | config.log
15 | hts_engine_API-0.99.tar.*
16 | *.bz2
17 |
--------------------------------------------------------------------------------
/src/AUTHORS:
--------------------------------------------------------------------------------
1 | The hts_engine API is software to synthesize speech waveform from HMMs trained
2 | by the HMM-based speech synthesis system (HTS). This software is released
3 | under the Modified BSD license. See the COPYING file in the same directory as
4 | this file for the license.
5 |
6 | The hts_engine_API has been developed by several members of HTS working group
7 | and some graduate students in Nagoya Institute of Technology:
8 |
9 | Keiichi Tokuda http://www.sp.nitech.ac.jp/~tokuda/
10 | (Produce and Design)
11 | Keiichiro Oura http://www.sp.nitech.ac.jp/~uratec/
12 | (Design and Development, Main Maintainer)
13 | Heiga Zen
14 | Shinji Sako http://www.mmsp.nitech.ac.jp/~sako/
15 |
--------------------------------------------------------------------------------
/src/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | cmake_minimum_required(VERSION 2.8)
2 |
3 | # The name "HTSEngine" is used in the original makefile
4 | # Due to historical reasons, let me use "hts_engine_API" instead first.
5 | # we may want to use "HTSEngine" in the future.
6 | # NOTE: we should use semanitc versioning. 1.09 -> 1.0.9!
7 | project(hts_engine_API)
8 |
9 | set(PROJECT_VER_MAJOR 1)
10 | set(PROJECT_VER_MINOR 0)
11 | set(PROJECT_VER_PATCH 9)
12 | set(PROJECT_VER "${PROJECT_VER_MAJOR}.${PROJECT_VER_MINOR}.${PROJECT_VER_PATCH}")
13 | set(PROJECT_APIVER "${PROJECT_VER_MAJOR}.${PROJECT_VER_MINOR}")
14 |
15 | option(AUDIO_PLAY_NONE "Disable audio playing support" ON)
16 |
17 | # NOTE: we can use portaudio or win32 if we really want.
18 | # I disable it by default assuming most people don't want audio playing functionality
19 | if (${AUDIO_PLAY_NONE})
20 | add_compile_options("-D AUDIO_PLAY_NONE")
21 | endif ()
22 |
23 | # set the default path for built executables to the "bin" directory
24 | set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${PROJECT_BINARY_DIR}/bin)
25 |
26 | # set the default path for built libraries to the "lib" directory
27 | set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${PROJECT_BINARY_DIR}/lib)
28 | set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${PROJECT_BINARY_DIR}/lib)
29 |
30 | configure_file(hts_engine_API.pc.in "${PROJECT_BINARY_DIR}/hts_engine_API.pc" @ONLY)
31 |
32 | file(GLOB_RECURSE SOURCE_FILES CONFIGURE_DEPENDS ${PROJECT_SOURCE_DIR}/lib/*.c)
33 | file(GLOB_RECURSE HEADER_FILES CONFIGURE_DEPENDS ${PROJECT_SOURCE_DIR}/lib/*.h ${PROJECT_SOURCE_DIR}/include/*.h)
34 |
35 | include_directories(${PROJECT_SOURCE_DIR}/include)
36 |
37 | # The hts_engine_API library
38 | add_library(${PROJECT_NAME} ${SOURCE_FILES} ${HEADER_FILES})
39 | set_target_properties(${PROJECT_NAME} PROPERTIES
40 | VERSION ${PROJECT_VER}
41 | SOVERSION ${PROJECT_APIVER}
42 | )
43 |
44 | # hts_engine binary
45 | add_executable(hts_engine bin/hts_engine.c)
46 | target_link_libraries(hts_engine hts_engine_API)
47 |
48 | if(${CMAKE_SYSTEM_NAME} STREQUAL "Windows")
49 | target_link_libraries(${PROJECT_NAME} winmm)
50 | target_link_libraries(hts_engine winmm)
51 | else()
52 | target_link_libraries(hts_engine m)
53 | endif()
54 |
55 | install(TARGETS ${PROJECT_NAME} hts_engine DESTINATION lib RUNTIME DESTINATION bin)
56 | install(FILES include/HTS_engine.h DESTINATION include)
57 | install(FILES "${PROJECT_BINARY_DIR}/hts_engine_API.pc" DESTINATION lib/pkgconfig/)
--------------------------------------------------------------------------------
/src/COPYING:
--------------------------------------------------------------------------------
1 | /* ----------------------------------------------------------------- */
2 | /* The HMM-Based Speech Synthesis Engine "hts_engine API" */
3 | /* developed by HTS Working Group */
4 | /* http://hts-engine.sourceforge.net/ */
5 | /* ----------------------------------------------------------------- */
6 | /* */
7 | /* Copyright (c) 2001-2014 Nagoya Institute of Technology */
8 | /* Department of Computer Science */
9 | /* */
10 | /* 2001-2008 Tokyo Institute of Technology */
11 | /* Interdisciplinary Graduate School of */
12 | /* Science and Engineering */
13 | /* */
14 | /* All rights reserved. */
15 | /* */
16 | /* Redistribution and use in source and binary forms, with or */
17 | /* without modification, are permitted provided that the following */
18 | /* conditions are met: */
19 | /* */
20 | /* - Redistributions of source code must retain the above copyright */
21 | /* notice, this list of conditions and the following disclaimer. */
22 | /* - Redistributions in binary form must reproduce the above */
23 | /* copyright notice, this list of conditions and the following */
24 | /* disclaimer in the documentation and/or other materials provided */
25 | /* with the distribution. */
26 | /* - Neither the name of the HTS working group nor the names of its */
27 | /* contributors may be used to endorse or promote products derived */
28 | /* from this software without specific prior written permission. */
29 | /* */
30 | /* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND */
31 | /* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, */
32 | /* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
33 | /* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
34 | /* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS */
35 | /* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, */
36 | /* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED */
37 | /* TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, */
38 | /* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON */
39 | /* ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, */
40 | /* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY */
41 | /* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
42 | /* POSSIBILITY OF SUCH DAMAGE. */
43 | /* ----------------------------------------------------------------- */
44 |
--------------------------------------------------------------------------------
/src/INSTALL:
--------------------------------------------------------------------------------
1 | Installation Instructions
2 | *************************
3 |
4 | 1. Cd to ./src directory.
5 |
6 | % cd src
7 |
8 | 2. Create "build" directory and cd to ./src/build.
9 |
10 | % mkdir -p build && cd build
11 |
12 | 3. Run "cmake" to create Makefile and "make" to compile.
13 |
14 | % cmake -DCMAKE_BUILD_TYPE=Release -DBUILD_SHARED_LIBS=ON ..
15 | % make -j
16 |
17 | 4. Install library and binary.
18 |
19 | % make install
--------------------------------------------------------------------------------
/src/Makefile.am:
--------------------------------------------------------------------------------
1 |
2 | EXTRA_DIST = AUTHORS COPYING ChangeLog INSTALL NEWS README Makefile.mak
3 |
4 | SUBDIRS = lib bin
5 |
6 | include_HEADERS = include/HTS_engine.h
7 |
8 | DISTCLEANFILES = *.log *.out *~ config/*~ include/*~
9 |
10 | MAINTAINERCLEANFILES = aclocal.m4 configure Makefile.in config/compile \
11 | config/config.guess config/config.sub config/depcomp \
12 | config/install-sh config/missing
13 |
--------------------------------------------------------------------------------
/src/Makefile.mak:
--------------------------------------------------------------------------------
1 |
2 | INSTALLDIR = C:\hts_engine_API
3 |
4 | all:
5 | cd lib
6 | nmake /f Makefile.mak
7 | cd ..
8 | cd bin
9 | nmake /f Makefile.mak
10 | cd ..
11 |
12 | clean:
13 | cd lib
14 | nmake /f Makefile.mak clean
15 | cd ..
16 | cd bin
17 | nmake /f Makefile.mak clean
18 | cd ..
19 |
20 | install::
21 | @if not exist "$(INSTALLDIR)\lib" mkdir "$(INSTALLDIR)\lib"
22 | cd lib
23 | copy *.lib $(INSTALLDIR)\lib
24 | cd ..
25 | @if not exist "$(INSTALLDIR)\bin" mkdir "$(INSTALLDIR)\bin"
26 | cd bin
27 | copy *.exe $(INSTALLDIR)\bin
28 | cd ..
29 | @if not exist "$(INSTALLDIR)\include" mkdir "$(INSTALLDIR)\include"
30 | cd include
31 | copy *.h $(INSTALLDIR)\include
32 | cd ..
33 |
--------------------------------------------------------------------------------
/src/NEWS:
--------------------------------------------------------------------------------
1 | Version 1.09:
2 | * add '-g' option to change volume.
3 | * add some functions to get fullcontext label format and version defined in HTS voice.
4 | * support 64-bit Windows audio.
5 | * bug fixes.
6 |
7 | Version 1.08:
8 | * support 64-bit machine.
9 | * re-write excitation function.
10 | * add some functions to get generated parameters and number of frames.
11 | * add some functions for separating synthesis step.
12 | * bug fixes.
13 |
14 | Version 1.07:
15 | * support new HTS voice format.
16 | * add LSP postfilter.
17 | * change volume unit to DB.
18 | * add function to get generated speech.
19 | * bug fixes.
20 |
21 | Version 1.06:
22 | * modify state duration calculation algorithm.
23 | * change many function from void to boolean.
24 | * change source format.
25 | * bug fixes.
26 |
27 | Version 1.05:
28 | * support PortAudio for audio output.
29 |
30 | Version 1.04:
31 | * support C++ compiler.
32 | * add low-pass filter stream.
33 | * change definition of GV weights.
34 | * add stop switch.
35 | * add volume controller.
36 | * bug fixes.
37 |
38 | Version 1.03:
39 | * add '-z' option to control audio buffer size.
40 | * change PDF file format.
41 | * bug fixes.
42 |
43 | Version 1.02:
44 | * context-dependent GV without silent and pause phoneme.
45 | * buffer size control of audio output for Windows.
46 | * makefiles for nmake of VC.
47 | * save detail information.
48 | * support singing voice synthesis.
49 | * bug fixes.
50 |
51 | Version 1.01:
52 | * bug fixes.
53 | * transfer site to SourceForge.
54 |
55 | Version 1.00:
56 | * bug fixes and performance improvements.
57 | * support linear gain in addition to log gain for LSP-type parameters.
58 | * first stable release.
59 |
60 | Version 0.99:
61 | * bug fixes.
62 | * switch license to the New and Simplified BSD license.
63 |
64 | Version 0.96:
65 | * bug fixes.
66 | * support flexible model structure.
67 | * support LSP-type parameters.
68 |
69 | Version 0.95:
70 | * support GV.
71 |
72 | Version 0.9:
73 | * implement API-style hts_engine.
74 | * support speaker-interpolation.
75 |
--------------------------------------------------------------------------------
/src/README:
--------------------------------------------------------------------------------
1 | ===============================================================================
2 | The HMM-Based Speech Synthesis Engine "hts_engine API" version 1.09
3 | release December 25, 2014
4 |
5 |
6 | The hts_engine API is an API version of hts_engine which has been released
7 | since HTS version 1.1. It has been being developed by the HTS working group
8 | (see "Who we are" below) and some graduate students in Nagoya Institute of
9 | Technology (see "AUTHORS" in the same directory).
10 |
11 | *******************************************************************************
12 | Copying
13 | *******************************************************************************
14 |
15 | The hts_engine API is released under the Modified BSD license (see
16 | http://www.opensource.org/). Using and distributing this software is free
17 | (without restriction including without limitation the rights to use, copy,
18 | modify, merge, publish, distribute, sublicense, and/or sell copies of this
19 | work, and to permit persons to whom this work is furnished to do so) subject to
20 | the conditions in the following license:
21 |
22 | /* ----------------------------------------------------------------- */
23 | /* The HMM-Based Speech Synthesis Engine "hts_engine API" */
24 | /* developed by HTS Working Group */
25 | /* http://hts-engine.sourceforge.net/ */
26 | /* ----------------------------------------------------------------- */
27 | /* */
28 | /* Copyright (c) 2001-2014 Nagoya Institute of Technology */
29 | /* Department of Computer Science */
30 | /* */
31 | /* 2001-2008 Tokyo Institute of Technology */
32 | /* Interdisciplinary Graduate School of */
33 | /* Science and Engineering */
34 | /* */
35 | /* All rights reserved. */
36 | /* */
37 | /* Redistribution and use in source and binary forms, with or */
38 | /* without modification, are permitted provided that the following */
39 | /* conditions are met: */
40 | /* */
41 | /* - Redistributions of source code must retain the above copyright */
42 | /* notice, this list of conditions and the following disclaimer. */
43 | /* - Redistributions in binary form must reproduce the above */
44 | /* copyright notice, this list of conditions and the following */
45 | /* disclaimer in the documentation and/or other materials provided */
46 | /* with the distribution. */
47 | /* - Neither the name of the HTS working group nor the names of its */
48 | /* contributors may be used to endorse or promote products derived */
49 | /* from this software without specific prior written permission. */
50 | /* */
51 | /* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND */
52 | /* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, */
53 | /* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
54 | /* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
55 | /* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS */
56 | /* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, */
57 | /* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED */
58 | /* TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, */
59 | /* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON */
60 | /* ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, */
61 | /* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY */
62 | /* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
63 | /* POSSIBILITY OF SUCH DAMAGE. */
64 | /* ----------------------------------------------------------------- */
65 |
66 | Although this software is free, we still offer no warranties and no
67 | maintenance. We will continue to endeavor to fix bugs and answer queries when
68 | can, but are not in a position to guarantee it. We will consider consultancy if
69 | desired, please contacts us for details.
70 |
71 | If you are using the hts_engine API in commercial environments, even though no
72 | license is required, we would be grateful if you let us know as it helps
73 | justify ourselves to our various sponsors. We also strongly encourage you to
74 |
75 | * refer to the use of hts_engine API in any publications that use this
76 | software
77 | * report bugs, where possible with bug fixes, that are found
78 |
79 | See also "COPYING" file in the current directory for details.
80 |
81 | *******************************************************************************
82 | Installation
83 | *******************************************************************************
84 |
85 | See "INSTALL" in the same directory for details.
86 |
87 | *******************************************************************************
88 | Documentation
89 | *******************************************************************************
90 |
91 | Reference manual of hts_engine API is available at
92 |
93 | http://hts-engine.sourceforge.net/
94 |
95 | *******************************************************************************
96 | Acknowledgements
97 | *******************************************************************************
98 |
99 | Keiichi Tokuda
100 | Shinji Sako
101 | Heiga Zen
102 | Keiichiro Oura
103 | Kazuhiro Nakamura
104 | Keijiro Saino
105 |
106 | *******************************************************************************
107 | Who we are
108 | *******************************************************************************
109 |
110 | The HTS working group is a voluntary group for developing the HMM-Based Speech
111 | Synthesis System. Current members are
112 |
113 | Keiichi Tokuda http://www.sp.nitech.ac.jp/~tokuda/
114 | (Produce and Design)
115 | Keiichiro Oura http://www.sp.nitech.ac.jp/~uratec/
116 | (Design and Development, Main Maintainer)
117 | Kei Hashimoto http://www.sp.nitech.ac.jp/~bonanza/
118 | Sayaka Shiota http://www.sp.nitech.ac.jp/~sayaka/
119 | Shinji Takaki http://www.sp.nitech.ac.jp/~k-prr44/
120 | Heiga Zen
121 | Junichi Yamagishi http://homepages.inf.ed.ac.uk/jyamagis/
122 | Tomoki Toda http://spalab.naist.jp/~tomoki/index_e.html
123 | Takashi Nose
124 | Shinji Sako http://www.mmsp.nitech.ac.jp/~sako/
125 | Alan W. Black http://www.cs.cmu.edu/~awb/
126 |
127 | and the members are dynamically changing. The current formal contact address of
128 | HTS working group and a mailing list for HTS users can be found at
129 | http://hts.sp.nitech.ac.jp/
130 | ===============================================================================
131 |
--------------------------------------------------------------------------------
/src/bin/.cvsignore:
--------------------------------------------------------------------------------
1 | Makefile.in
2 | hts_engine
3 | Makefile
4 | .deps
5 |
--------------------------------------------------------------------------------
/src/bin/Makefile.am:
--------------------------------------------------------------------------------
1 |
2 | EXTRA_DIST = Makefile.mak
3 |
4 | AM_CPPFLAGS = -I @top_srcdir@/include
5 |
6 | bin_PROGRAMS = hts_engine
7 |
8 | hts_engine_SOURCES = hts_engine.c
9 |
10 | hts_engine_LDADD = ../lib/libHTSEngine.a
11 |
12 | DISTCLEANFILES = *.log *.out *~
13 |
14 | MAINTAINERCLEANFILES = Makefile.in
15 |
--------------------------------------------------------------------------------
/src/bin/Makefile.mak:
--------------------------------------------------------------------------------
1 |
2 | CC = cl
3 | CL = link
4 |
5 | CFLAGS = /O2 /Ob2 /Oi /Ot /Oy /GT /GL /TC /I ..\include
6 | LFLAGS = /LTCG
7 |
8 | LIBS = ..\lib\hts_engine_API.lib winmm.lib
9 |
10 | all: hts_engine.exe
11 |
12 | hts_engine.exe : hts_engine.obj
13 | $(CC) $(CFLAGS) /c $(@B).c
14 | $(CL) $(LFLAGS) /OUT:$@ $(LIBS) $(@B).obj
15 |
16 | clean:
17 | del *.exe
18 | del *.obj
19 |
--------------------------------------------------------------------------------
/src/bin/hts_engine.c:
--------------------------------------------------------------------------------
1 | /* ----------------------------------------------------------------- */
2 | /* The HMM-Based Speech Synthesis Engine "hts_engine API" */
3 | /* developed by HTS Working Group */
4 | /* http://hts-engine.sourceforge.net/ */
5 | /* ----------------------------------------------------------------- */
6 | /* */
7 | /* Copyright (c) 2001-2014 Nagoya Institute of Technology */
8 | /* Department of Computer Science */
9 | /* */
10 | /* 2001-2008 Tokyo Institute of Technology */
11 | /* Interdisciplinary Graduate School of */
12 | /* Science and Engineering */
13 | /* */
14 | /* All rights reserved. */
15 | /* */
16 | /* Redistribution and use in source and binary forms, with or */
17 | /* without modification, are permitted provided that the following */
18 | /* conditions are met: */
19 | /* */
20 | /* - Redistributions of source code must retain the above copyright */
21 | /* notice, this list of conditions and the following disclaimer. */
22 | /* - Redistributions in binary form must reproduce the above */
23 | /* copyright notice, this list of conditions and the following */
24 | /* disclaimer in the documentation and/or other materials provided */
25 | /* with the distribution. */
26 | /* - Neither the name of the HTS working group nor the names of its */
27 | /* contributors may be used to endorse or promote products derived */
28 | /* from this software without specific prior written permission. */
29 | /* */
30 | /* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND */
31 | /* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, */
32 | /* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
33 | /* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
34 | /* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS */
35 | /* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, */
36 | /* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED */
37 | /* TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, */
38 | /* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON */
39 | /* ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, */
40 | /* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY */
41 | /* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
42 | /* POSSIBILITY OF SUCH DAMAGE. */
43 | /* ----------------------------------------------------------------- */
44 |
45 | #ifndef HTS_ENGINE_C
46 | #define HTS_ENGINE_C
47 |
48 | #ifdef __cplusplus
49 | #define HTS_ENGINE_C_START extern "C" {
50 | #define HTS_ENGINE_C_END }
51 | #else
52 | #define HTS_ENGINE_C_START
53 | #define HTS_ENGINE_C_END
54 | #endif /* __CPLUSPLUS */
55 |
56 | HTS_ENGINE_C_START;
57 |
58 | #include
59 |
60 | #include "HTS_engine.h"
61 |
62 | /* usage: output usage */
63 | void usage(void)
64 | {
65 | fprintf(stderr, "%s\n", HTS_COPYRIGHT);
66 | fprintf(stderr, "hts_engine - The HMM-based speech synthesis engine \"hts_engine API\"\n");
67 | fprintf(stderr, "\n");
68 | fprintf(stderr, " usage:\n");
69 | fprintf(stderr, " hts_engine [ options ] [ infile ]\n");
70 | fprintf(stderr, " options: [ def][ min-- max]\n");
71 | fprintf(stderr, " -m htsvoice : HTS voice files [ N/A]\n");
72 | fprintf(stderr, " -od s : filename of output label with duration [ N/A]\n");
73 | fprintf(stderr, " -om s : filename of output spectrum [ N/A]\n");
74 | fprintf(stderr, " -of s : filename of output log F0 [ N/A]\n");
75 | fprintf(stderr, " -ol s : filename of output low-pass filter [ N/A]\n");
76 | fprintf(stderr, " -or s : filename of output raw audio (generated speech) [ N/A]\n");
77 | fprintf(stderr, " -ow s : filename of output wav audio (generated speech) [ N/A]\n");
78 | fprintf(stderr, " -ot s : filename of output trace information [ N/A]\n");
79 | fprintf(stderr, " -vp : use phoneme alignment for duration [ N/A]\n");
80 | fprintf(stderr, " -i i f1 .. fi : enable interpolation & specify number(i),coefficient(f) [ N/A]\n");
81 | fprintf(stderr, " -s i : sampling frequency [ auto][ 1-- ]\n");
82 | fprintf(stderr, " -p i : frame period (point) [ auto][ 1-- ]\n");
83 | fprintf(stderr, " -a f : all-pass constant [ auto][ 0.0-- 1.0]\n");
84 | fprintf(stderr, " -b f : postfiltering coefficient [ 0.0][ 0.0-- 1.0]\n");
85 | fprintf(stderr, " -r f : speech speed rate [ 1.0][ 0.0-- ]\n");
86 | fprintf(stderr, " -fm f : additional half-tone [ 0.0][ -- ]\n");
87 | fprintf(stderr, " -u f : voiced/unvoiced threshold [ 0.5][ 0.0-- 1.0]\n");
88 | fprintf(stderr, " -jm f : weight of GV for spectrum [ 1.0][ 0.0-- ]\n");
89 | fprintf(stderr, " -jf f : weight of GV for log F0 [ 1.0][ 0.0-- ]\n");
90 | fprintf(stderr, " -g f : volume (dB) [ 0.0][ -- ]\n");
91 | fprintf(stderr, " -z i : audio buffer size (if i==0, turn off) [ 0][ 0-- ]\n");
92 | fprintf(stderr, " infile:\n");
93 | fprintf(stderr, " label file\n");
94 | fprintf(stderr, " note:\n");
95 | fprintf(stderr, " generated spectrum, log F0, and low-pass filter coefficient\n");
96 | fprintf(stderr, " sequences are saved in natural endian, binary (float) format.\n");
97 | fprintf(stderr, "\n");
98 |
99 | exit(0);
100 | }
101 |
102 | int main(int argc, char **argv)
103 | {
104 | int i;
105 | double f;
106 |
107 | /* hts_engine API */
108 | HTS_Engine engine;
109 |
110 | /* HTS voices */
111 | size_t num_voices;
112 | char **fn_voices;
113 |
114 | /* input label file name */
115 | char *labfn = NULL;
116 |
117 | /* output file pointers */
118 | FILE *durfp = NULL, *mgcfp = NULL, *lf0fp = NULL, *lpffp = NULL, *wavfp = NULL, *rawfp = NULL, *tracefp = NULL;
119 |
120 | /* interpolation weights */
121 | size_t num_interpolation_weights;
122 |
123 | /* output usage */
124 | if (argc <= 1)
125 | usage();
126 |
127 | /* initialize hts_engine API */
128 | HTS_Engine_initialize(&engine);
129 |
130 | /* get HTS voice file names */
131 | num_voices = 0;
132 | fn_voices = (char **) malloc(argc * sizeof(char *));
133 | for (i = 0; i < argc; i++) {
134 | if (argv[i][0] == '-' && argv[i][1] == 'm')
135 | fn_voices[num_voices++] = argv[++i];
136 | if (argv[i][0] == '-' && argv[i][1] == 'h')
137 | usage();
138 | }
139 | if (num_voices == 0) {
140 | fprintf(stderr, "Error: HTS voice must be specified.\n");
141 | free(fn_voices);
142 | exit(1);
143 | }
144 |
145 | /* load HTS voices */
146 | if (HTS_Engine_load(&engine, fn_voices, num_voices) != TRUE) {
147 | fprintf(stderr, "Error: HTS voices cannot be loaded.\n");
148 | free(fn_voices);
149 | HTS_Engine_clear(&engine);
150 | exit(1);
151 | }
152 | free(fn_voices);
153 |
154 | /* get options */
155 | while (--argc) {
156 | if (**++argv == '-') {
157 | switch (*(*argv + 1)) {
158 | case 'v':
159 | switch (*(*argv + 2)) {
160 | case 'p':
161 | HTS_Engine_set_phoneme_alignment_flag(&engine, TRUE);
162 | break;
163 | default:
164 | fprintf(stderr, "Error: Invalid option '-v%c'.\n", *(*argv + 2));
165 | HTS_Engine_clear(&engine);
166 | exit(1);
167 | }
168 | break;
169 | case 'o':
170 | switch (*(*argv + 2)) {
171 | case 'w':
172 | wavfp = fopen(*++argv, "wb");
173 | break;
174 | case 'r':
175 | rawfp = fopen(*++argv, "wb");
176 | break;
177 | case 'd':
178 | durfp = fopen(*++argv, "wt");
179 | break;
180 | case 'm':
181 | mgcfp = fopen(*++argv, "wb");
182 | break;
183 | case 'f':
184 | case 'p':
185 | lf0fp = fopen(*++argv, "wb");
186 | break;
187 | case 'l':
188 | lpffp = fopen(*++argv, "wb");
189 | break;
190 | case 't':
191 | tracefp = fopen(*++argv, "wt");
192 | break;
193 | default:
194 | fprintf(stderr, "Error: Invalid option '-o%c'.\n", *(*argv + 2));
195 | HTS_Engine_clear(&engine);
196 | exit(1);
197 | }
198 | --argc;
199 | break;
200 | case 'h':
201 | usage();
202 | break;
203 | case 'm':
204 | argv++; /* HTS voices were already loaded */
205 | --argc;
206 | break;
207 | case 's':
208 | HTS_Engine_set_sampling_frequency(&engine, (size_t) atoi(*++argv));
209 | --argc;
210 | break;
211 | case 'p':
212 | HTS_Engine_set_fperiod(&engine, (size_t) atoi(*++argv));
213 | --argc;
214 | break;
215 | case 'a':
216 | HTS_Engine_set_alpha(&engine, atof(*++argv));
217 | --argc;
218 | break;
219 | case 'b':
220 | HTS_Engine_set_beta(&engine, atof(*++argv));
221 | --argc;
222 | break;
223 | case 'r':
224 | HTS_Engine_set_speed(&engine, atof(*++argv));
225 | --argc;
226 | break;
227 | case 'f':
228 | switch (*(*argv + 2)) {
229 | case 'm':
230 | HTS_Engine_add_half_tone(&engine, atof(*++argv));
231 | break;
232 | default:
233 | fprintf(stderr, "Error: Invalid option '-f%c'.\n", *(*argv + 2));
234 | HTS_Engine_clear(&engine);
235 | exit(1);
236 | }
237 | --argc;
238 | break;
239 | case 'u':
240 | HTS_Engine_set_msd_threshold(&engine, 1, atof(*++argv));
241 | --argc;
242 | break;
243 | case 'i':
244 | num_interpolation_weights = atoi(*++argv);
245 | argc--;
246 | if (num_interpolation_weights != num_voices) {
247 | HTS_Engine_clear(&engine);
248 | exit(1);
249 | }
250 | for (i = 0; i < num_interpolation_weights; i++) {
251 | f = atof(*++argv);
252 | argc--;
253 | HTS_Engine_set_duration_interpolation_weight(&engine, i, f);
254 | HTS_Engine_set_parameter_interpolation_weight(&engine, i, 0, f);
255 | HTS_Engine_set_parameter_interpolation_weight(&engine, i, 1, f);
256 | HTS_Engine_set_gv_interpolation_weight(&engine, i, 0, f);
257 | HTS_Engine_set_gv_interpolation_weight(&engine, i, 1, f);
258 | }
259 | break;
260 | case 'j':
261 | switch (*(*argv + 2)) {
262 | case 'm':
263 | HTS_Engine_set_gv_weight(&engine, 0, atof(*++argv));
264 | break;
265 | case 'f':
266 | case 'p':
267 | HTS_Engine_set_gv_weight(&engine, 1, atof(*++argv));
268 | break;
269 | default:
270 | fprintf(stderr, "Error: Invalid option '-j%c'.\n", *(*argv + 2));
271 | HTS_Engine_clear(&engine);
272 | exit(1);
273 | }
274 | --argc;
275 | break;
276 | case 'g':
277 | HTS_Engine_set_volume(&engine, atof(*++argv));
278 | --argc;
279 | break;
280 | case 'z':
281 | HTS_Engine_set_audio_buff_size(&engine, (size_t) atoi(*++argv));
282 | --argc;
283 | break;
284 | default:
285 | fprintf(stderr, "Error: Invalid option '-%c'.\n", *(*argv + 1));
286 | HTS_Engine_clear(&engine);
287 | exit(1);
288 | }
289 | } else {
290 | labfn = *argv;
291 | }
292 | }
293 |
294 | /* synthesize */
295 | if (HTS_Engine_synthesize_from_fn(&engine, labfn) != TRUE) {
296 | fprintf(stderr, "Error: waveform cannot be synthesized.\n");
297 | HTS_Engine_clear(&engine);
298 | exit(1);
299 | }
300 |
301 | /* output */
302 | if (tracefp != NULL)
303 | HTS_Engine_save_information(&engine, tracefp);
304 | if (durfp != NULL)
305 | HTS_Engine_save_label(&engine, durfp);
306 | if (rawfp)
307 | HTS_Engine_save_generated_speech(&engine, rawfp);
308 | if (wavfp)
309 | HTS_Engine_save_riff(&engine, wavfp);
310 | if (mgcfp)
311 | HTS_Engine_save_generated_parameter(&engine, 0, mgcfp);
312 | if (lf0fp)
313 | HTS_Engine_save_generated_parameter(&engine, 1, lf0fp);
314 | if (lpffp)
315 | HTS_Engine_save_generated_parameter(&engine, 2, lpffp);
316 |
317 | /* reset */
318 | HTS_Engine_refresh(&engine);
319 |
320 | /* free memory */
321 | HTS_Engine_clear(&engine);
322 |
323 | /* close files */
324 | if (durfp != NULL)
325 | fclose(durfp);
326 | if (mgcfp != NULL)
327 | fclose(mgcfp);
328 | if (lf0fp != NULL)
329 | fclose(lf0fp);
330 | if (lpffp != NULL)
331 | fclose(lpffp);
332 | if (wavfp != NULL)
333 | fclose(wavfp);
334 | if (rawfp != NULL)
335 | fclose(rawfp);
336 | if (tracefp != NULL)
337 | fclose(tracefp);
338 |
339 | return 0;
340 | }
341 |
342 | HTS_ENGINE_C_END;
343 |
344 | #endif /* !HTS_ENGINE_C */
345 |
--------------------------------------------------------------------------------
/src/config/.cvsignore:
--------------------------------------------------------------------------------
1 | depcomp
2 | install-sh
3 | missing
4 | config.guess
5 | config.sub
6 |
--------------------------------------------------------------------------------
/src/configure.ac:
--------------------------------------------------------------------------------
1 | # -*- Autoconf -*-
2 | # Process this file with autoconf to produce a configure script.
3 |
4 | AC_PREREQ(2.59)
5 | AC_INIT(hts_engine_API, 1.09, hts-engine-users@lists.sourceforge.net, hts_engine_API)
6 | AC_CONFIG_AUX_DIR([config])
7 | AC_COPYRIGHT(Copyright 2001-2014 Nagoya Institute of Technology)
8 | AC_COPYRIGHT(Copyright 2001-2008 Tokyo Institute of Technology)
9 | AM_INIT_AUTOMAKE
10 |
11 | # Checks for C compiler
12 | AC_PROG_CC
13 | AM_PROG_CC_C_O
14 | AC_PROG_INSTALL
15 | AC_PROG_RANLIB
16 | AN_MAKEVAR([AR], [AC_PROG_AR])
17 | AN_PROGRAM([ar], [AC_PROG_AR])
18 | AC_DEFUN([AC_PROG_AR], [AC_CHECK_TOOL(AR, ar, :)])
19 | AC_PROG_AR
20 |
21 |
22 | # Checks for libraries.
23 | AC_CHECK_LIB([m], [log])
24 |
25 |
26 | # Checks for header files.
27 | AC_HEADER_STDC
28 | AC_CHECK_HEADERS([stdlib.h string.h])
29 |
30 |
31 | # Checks for typedefs, structures, and compiler characteristics.
32 | AC_C_CONST
33 | AC_TYPE_SIZE_T
34 |
35 | # Checks for library functions.
36 | AC_FUNC_VPRINTF
37 | AC_CHECK_FUNCS([sqrt strchr strrchr strstr])
38 |
39 |
40 | # Checks for embedded device compile
41 | AC_ARG_ENABLE(embedded, [ --enable-embedded turn on compiling for embedded devices (default=no)],,enable_embedded=no)
42 | AC_MSG_CHECKING(whether to enable compiling for embedded devices)
43 | if test x$enable_embedded = xyes; then
44 | AC_MSG_RESULT(yes)
45 | AC_DEFINE(HTS_EMBEDDED)
46 | else
47 | AC_MSG_RESULT(no)
48 | fi
49 |
50 |
51 | # Checks for using festival
52 | AC_ARG_ENABLE(festival, [ --enable-festival use memory allocation/free functions of speech tools (default=no)],,enable_festival=no)
53 | AC_MSG_CHECKING(whether to use memory allocation/free functions of speech tools)
54 | if test x$enable_festival = xyes; then
55 | AC_MSG_RESULT(yes)
56 | AC_DEFINE(FESTIVAL)
57 | else
58 | AC_MSG_RESULT(no)
59 | fi
60 |
61 |
62 | AC_CANONICAL_HOST
63 | AC_C_BIGENDIAN
64 |
65 |
66 | # Checks library for windows audio devices
67 | case "$host_os" in
68 | *win32* | *wince* | *cygwin* | *mingw* )
69 | AC_HAVE_LIBRARY([winmm],,AC_MSG_ERROR(No winmm))
70 | ;;
71 | *)
72 | ;;
73 | esac
74 |
75 |
76 | AC_CONFIG_FILES([Makefile bin/Makefile lib/Makefile])
77 |
78 | AC_OUTPUT
79 |
80 |
--------------------------------------------------------------------------------
/src/hts_engine_API.pc.in:
--------------------------------------------------------------------------------
1 | prefix=@CMAKE_INSTALL_PREFIX@
2 | exec_prefix=@CMAKE_INSTALL_PREFIX@
3 | libdir=${prefix}/lib
4 | includedir=${prefix}/include
5 |
6 | Name: @CMAKE_PROJECT_NAME@
7 | Description: hts_engine API - a library to synthesize speech waveform from HMMs trained by the HMM-based speech synthesis system (HTS).
8 | Version: @CMAKE_PROJECT_VERSION@
9 | Cflags: -I${includedir}
10 | Libs: -L${libdir} -l@CMAKE_PROJECT_NAME@
11 |
--------------------------------------------------------------------------------
/src/include/HTS_engine.h:
--------------------------------------------------------------------------------
1 | /* ----------------------------------------------------------------- */
2 | /* The HMM-Based Speech Synthesis Engine "hts_engine API" */
3 | /* developed by HTS Working Group */
4 | /* http://hts-engine.sourceforge.net/ */
5 | /* ----------------------------------------------------------------- */
6 | /* */
7 | /* Copyright (c) 2001-2014 Nagoya Institute of Technology */
8 | /* Department of Computer Science */
9 | /* */
10 | /* 2001-2008 Tokyo Institute of Technology */
11 | /* Interdisciplinary Graduate School of */
12 | /* Science and Engineering */
13 | /* */
14 | /* All rights reserved. */
15 | /* */
16 | /* Redistribution and use in source and binary forms, with or */
17 | /* without modification, are permitted provided that the following */
18 | /* conditions are met: */
19 | /* */
20 | /* - Redistributions of source code must retain the above copyright */
21 | /* notice, this list of conditions and the following disclaimer. */
22 | /* - Redistributions in binary form must reproduce the above */
23 | /* copyright notice, this list of conditions and the following */
24 | /* disclaimer in the documentation and/or other materials provided */
25 | /* with the distribution. */
26 | /* - Neither the name of the HTS working group nor the names of its */
27 | /* contributors may be used to endorse or promote products derived */
28 | /* from this software without specific prior written permission. */
29 | /* */
30 | /* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND */
31 | /* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, */
32 | /* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
33 | /* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
34 | /* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS */
35 | /* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, */
36 | /* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED */
37 | /* TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, */
38 | /* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON */
39 | /* ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, */
40 | /* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY */
41 | /* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
42 | /* POSSIBILITY OF SUCH DAMAGE. */
43 | /* ----------------------------------------------------------------- */
44 |
45 | #ifndef HTS_ENGINE_H
46 | #define HTS_ENGINE_H
47 |
48 | #ifdef __cplusplus
49 | #define HTS_ENGINE_H_START extern "C" {
50 | #define HTS_ENGINE_H_END }
51 | #else
52 | #define HTS_ENGINE_H_START
53 | #define HTS_ENGINE_H_END
54 | #endif /* __CPLUSPLUS */
55 |
56 | HTS_ENGINE_H_START;
57 |
58 | #include
59 |
60 | #ifndef HTS_ENGINE_DLLEXPORT_H_
61 | #define HTS_ENGINE_DLLEXPORT_H_
62 |
63 | #ifndef DLLEXPORT
64 | # ifdef _WIN32
65 | # define DLLEXPORT __declspec(dllexport)
66 | # else
67 | # define DLLEXPORT
68 | # endif
69 | #endif
70 | #endif
71 |
72 | /* common ---------------------------------------------------------- */
73 |
74 | typedef char HTS_Boolean;
75 |
76 | #ifndef TRUE
77 | #define TRUE 1
78 | #endif /* !TRUE */
79 |
80 | #ifndef FALSE
81 | #define FALSE 0
82 | #endif /* !FALSE */
83 |
84 | #ifndef HTS_NODATA
85 | #define HTS_NODATA (-1.0e+10)
86 | #endif /* HTS_NODATA */
87 |
88 | /* copyright ------------------------------------------------------- */
89 |
90 | #define HTS_COPYRIGHT "The HMM-Based Speech Synthesis Engine \"hts_engine API\"\nVersion 1.09 (http://hts-engine.sourceforge.net/)\nCopyright (C) 2001-2014 Nagoya Institute of Technology\n 2001-2008 Tokyo Institute of Technology\nAll rights reserved.\n"
91 |
92 | /* audio ----------------------------------------------------------- */
93 |
94 | /* HTS_Audio: audio output wrapper */
95 | typedef struct _HTS_Audio {
96 | size_t sampling_frequency; /* sampling frequency */
97 | size_t max_buff_size; /* buffer size for audio output interface */
98 | short *buff; /* current buffer */
99 | size_t buff_size; /* current buffer size */
100 | void *audio_interface; /* audio interface specified in compile step */
101 | } HTS_Audio;
102 |
103 | /* model ----------------------------------------------------------- */
104 |
105 | /* HTS_Window: window coefficients to calculate dynamic features. */
106 | typedef struct _HTS_Window {
107 | size_t size; /* # of windows (static + deltas) */
108 | int *l_width; /* left width of windows */
109 | int *r_width; /* right width of windows */
110 | double **coefficient; /* window coefficient */
111 | size_t max_width; /* maximum width of windows */
112 | } HTS_Window;
113 |
114 | /* HTS_Pattern: list of patterns in a question and a tree. */
115 | typedef struct _HTS_Pattern {
116 | char *string; /* pattern string */
117 | struct _HTS_Pattern *next; /* pointer to the next pattern */
118 | } HTS_Pattern;
119 |
120 | /* HTS_Question: list of questions in a tree. */
121 | typedef struct _HTS_Question {
122 | char *string; /* name of this question */
123 | HTS_Pattern *head; /* pointer to the head of pattern list */
124 | struct _HTS_Question *next; /* pointer to the next question */
125 | } HTS_Question;
126 |
127 | /* HTS_Node: list of tree nodes in a tree. */
128 | typedef struct _HTS_Node {
129 | int index; /* index of this node */
130 | size_t pdf; /* index of PDF for this node (leaf node only) */
131 | struct _HTS_Node *yes; /* pointer to its child node (yes) */
132 | struct _HTS_Node *no; /* pointer to its child node (no) */
133 | struct _HTS_Node *next; /* pointer to the next node */
134 | HTS_Question *quest; /* question applied at this node */
135 | } HTS_Node;
136 |
137 | /* HTS_Tree: list of decision trees in a model. */
138 | typedef struct _HTS_Tree {
139 | HTS_Pattern *head; /* pointer to the head of pattern list for this tree */
140 | struct _HTS_Tree *next; /* pointer to next tree */
141 | HTS_Node *root; /* root node of this tree */
142 | size_t state; /* state index of this tree */
143 | } HTS_Tree;
144 |
145 | /* HTS_Model: set of PDFs, decision trees and questions. */
146 | typedef struct _HTS_Model {
147 | size_t vector_length; /* vector length (static features only) */
148 | size_t num_windows; /* # of windows for delta */
149 | HTS_Boolean is_msd; /* flag for MSD */
150 | size_t ntree; /* # of trees */
151 | size_t *npdf; /* # of PDFs at each tree */
152 | float ***pdf; /* PDFs */
153 | HTS_Tree *tree; /* pointer to the list of trees */
154 | HTS_Question *question; /* pointer to the list of questions */
155 | } HTS_Model;
156 |
157 | /* HTS_ModelSet: set of duration models, HMMs and GV models. */
158 | typedef struct _HTS_ModelSet {
159 | char *hts_voice_version; /* version of HTS voice format */
160 | size_t sampling_frequency; /* sampling frequency */
161 | size_t frame_period; /* frame period */
162 | size_t num_voices; /* # of HTS voices */
163 | size_t num_states; /* # of HMM states */
164 | size_t num_streams; /* # of streams */
165 | char *stream_type; /* stream type */
166 | char *fullcontext_format; /* fullcontext label format */
167 | char *fullcontext_version; /* version of fullcontext label */
168 | HTS_Question *gv_off_context; /* GV switch */
169 | char **option; /* options for each stream */
170 | HTS_Model *duration; /* duration PDFs and trees */
171 | HTS_Window *window; /* window coefficients for delta */
172 | HTS_Model **stream; /* parameter PDFs and trees */
173 | HTS_Model **gv; /* GV PDFs and trees */
174 | } HTS_ModelSet;
175 |
176 | /* label ----------------------------------------------------------- */
177 |
178 | /* HTS_LabelString: individual label string with time information */
179 | typedef struct _HTS_LabelString {
180 | struct _HTS_LabelString *next; /* pointer to next label string */
181 | char *name; /* label string */
182 | double start; /* start frame specified in the given label */
183 | double end; /* end frame specified in the given label */
184 | } HTS_LabelString;
185 |
186 | /* HTS_Label: list of label strings */
187 | typedef struct _HTS_Label {
188 | HTS_LabelString *head; /* pointer to the head of label string */
189 | size_t size; /* # of label strings */
190 | } HTS_Label;
191 |
192 | /* sstream --------------------------------------------------------- */
193 |
194 | /* HTS_SStream: individual state stream */
195 | typedef struct _HTS_SStream {
196 | size_t vector_length; /* vector length (static features only) */
197 | double **mean; /* mean vector sequence */
198 | double **vari; /* variance vector sequence */
199 | double *msd; /* MSD parameter sequence */
200 | size_t win_size; /* # of windows (static + deltas) */
201 | int *win_l_width; /* left width of windows */
202 | int *win_r_width; /* right width of windows */
203 | double **win_coefficient; /* window cofficients */
204 | size_t win_max_width; /* maximum width of windows */
205 | double *gv_mean; /* mean vector of GV */
206 | double *gv_vari; /* variance vector of GV */
207 | HTS_Boolean *gv_switch; /* GV flag sequence */
208 | } HTS_SStream;
209 |
210 | /* HTS_SStreamSet: set of state stream */
211 | typedef struct _HTS_SStreamSet {
212 | HTS_SStream *sstream; /* state streams */
213 | size_t nstream; /* # of streams */
214 | size_t nstate; /* # of states */
215 | size_t *duration; /* duration sequence */
216 | size_t total_state; /* total state */
217 | size_t total_frame; /* total frame */
218 | } HTS_SStreamSet;
219 |
220 | /* pstream --------------------------------------------------------- */
221 |
222 | /* HTS_SMatrices: matrices/vectors used in the speech parameter generation algorithm. */
223 | typedef struct _HTS_SMatrices {
224 | double **mean; /* mean vector sequence */
225 | double **ivar; /* inverse diag variance sequence */
226 | double *g; /* vector used in the forward substitution */
227 | double **wuw; /* W' U^-1 W */
228 | double *wum; /* W' U^-1 mu */
229 | } HTS_SMatrices;
230 |
231 | /* HTS_PStream: individual PDF stream. */
232 | typedef struct _HTS_PStream {
233 | size_t vector_length; /* vector length (static features only) */
234 | size_t length; /* stream length */
235 | size_t width; /* width of dynamic window */
236 | double **par; /* output parameter vector */
237 | HTS_SMatrices sm; /* matrices for parameter generation */
238 | size_t win_size; /* # of windows (static + deltas) */
239 | int *win_l_width; /* left width of windows */
240 | int *win_r_width; /* right width of windows */
241 | double **win_coefficient; /* window coefficients */
242 | HTS_Boolean *msd_flag; /* Boolean sequence for MSD */
243 | double *gv_mean; /* mean vector of GV */
244 | double *gv_vari; /* variance vector of GV */
245 | HTS_Boolean *gv_switch; /* GV flag sequence */
246 | size_t gv_length; /* frame length for GV calculation */
247 | } HTS_PStream;
248 |
249 | /* HTS_PStreamSet: set of PDF streams. */
250 | typedef struct _HTS_PStreamSet {
251 | HTS_PStream *pstream; /* PDF streams */
252 | size_t nstream; /* # of PDF streams */
253 | size_t total_frame; /* total frame */
254 | } HTS_PStreamSet;
255 |
256 | /* gstream --------------------------------------------------------- */
257 |
258 | /* HTS_GStream: generated parameter stream. */
259 | typedef struct _HTS_GStream {
260 | size_t vector_length; /* vector length (static features only) */
261 | double **par; /* generated parameter */
262 | } HTS_GStream;
263 |
264 | /* HTS_GStreamSet: set of generated parameter stream. */
265 | typedef struct _HTS_GStreamSet {
266 | size_t total_nsample; /* total sample */
267 | size_t total_frame; /* total frame */
268 | size_t nstream; /* # of streams */
269 | HTS_GStream *gstream; /* generated parameter streams */
270 | double *gspeech; /* generated speech */
271 | } HTS_GStreamSet;
272 |
273 | /* engine ---------------------------------------------------------- */
274 |
275 | /* HTS_Condition: synthesis condition */
276 | typedef struct _HTS_Condition {
277 | /* global */
278 | size_t sampling_frequency; /* sampling frequency */
279 | size_t fperiod; /* frame period */
280 | size_t audio_buff_size; /* audio buffer size (for audio device) */
281 | HTS_Boolean stop; /* stop flag */
282 | double volume; /* volume */
283 | double *msd_threshold; /* MSD thresholds */
284 | double *gv_weight; /* GV weights */
285 |
286 | /* duration */
287 | HTS_Boolean phoneme_alignment_flag; /* flag for using phoneme alignment in label */
288 | double speed; /* speech speed */
289 |
290 | /* spectrum */
291 | size_t stage; /* if stage=0 then gamma=0 else gamma=-1/stage */
292 | HTS_Boolean use_log_gain; /* log gain flag (for LSP) */
293 | double alpha; /* all-pass constant */
294 | double beta; /* postfiltering coefficient */
295 |
296 | /* log F0 */
297 | double additional_half_tone; /* additional half tone */
298 |
299 | /* interpolation weights */
300 | double *duration_iw; /* weights for duration interpolation */
301 | double **parameter_iw; /* weights for parameter interpolation */
302 | double **gv_iw; /* weights for GV interpolation */
303 | } HTS_Condition;
304 |
305 | /* HTS_Engine: Engine itself. */
306 | typedef struct _HTS_Engine {
307 | HTS_Condition condition; /* synthesis condition */
308 | HTS_Audio audio; /* audio output */
309 | HTS_ModelSet ms; /* set of duration models, HMMs and GV models */
310 | HTS_Label label; /* label */
311 | HTS_SStreamSet sss; /* set of state streams */
312 | HTS_PStreamSet pss; /* set of PDF streams */
313 | HTS_GStreamSet gss; /* set of generated parameter streams */
314 | } HTS_Engine;
315 |
316 | /* engine method --------------------------------------------------- */
317 |
318 | /* HTS_Engine_initialize: initialize engine */
319 | DLLEXPORT void HTS_Engine_initialize(HTS_Engine * engine);
320 |
321 | /* HTS_Engine_load: load HTS voices */
322 | DLLEXPORT HTS_Boolean HTS_Engine_load(HTS_Engine * engine, char **voices, size_t num_voices);
323 |
324 | /* HTS_Engine_set_sampling_frequency: set sampling fraquency */
325 | DLLEXPORT void HTS_Engine_set_sampling_frequency(HTS_Engine * engine, size_t i);
326 |
327 | /* HTS_Engine_get_sampling_frequency: get sampling frequency */
328 | DLLEXPORT size_t HTS_Engine_get_sampling_frequency(HTS_Engine * engine);
329 |
330 | /* HTS_Engine_set_fperiod: set frame period */
331 | DLLEXPORT void HTS_Engine_set_fperiod(HTS_Engine * engine, size_t i);
332 |
333 | /* HTS_Engine_get_fperiod: get frame period */
334 | DLLEXPORT size_t HTS_Engine_get_fperiod(HTS_Engine * engine);
335 |
336 | /* HTS_Engine_set_audio_buff_size: set audio buffer size */
337 | DLLEXPORT void HTS_Engine_set_audio_buff_size(HTS_Engine * engine, size_t i);
338 |
339 | /* HTS_Engine_get_audio_buff_size: get audio buffer size */
340 | DLLEXPORT size_t HTS_Engine_get_audio_buff_size(HTS_Engine * engine);
341 |
342 | /* HTS_Engine_set_stop_flag: set stop flag */
343 | DLLEXPORT void HTS_Engine_set_stop_flag(HTS_Engine * engine, HTS_Boolean b);
344 |
345 | /* HTS_Engine_get_stop_flag: get stop flag */
346 | DLLEXPORT HTS_Boolean HTS_Engine_get_stop_flag(HTS_Engine * engine);
347 |
348 | /* HTS_Engine_set_volume: set volume in db */
349 | DLLEXPORT void HTS_Engine_set_volume(HTS_Engine * engine, double f);
350 |
351 | /* HTS_Engine_get_volume: get volume in db */
352 | DLLEXPORT double HTS_Engine_get_volume(HTS_Engine * engine);
353 |
354 | /* HTS_Egnine_set_msd_threshold: set MSD threshold */
355 | DLLEXPORT void HTS_Engine_set_msd_threshold(HTS_Engine * engine, size_t stream_index, double f);
356 |
357 | /* HTS_Engine_get_msd_threshold: get MSD threshold */
358 | DLLEXPORT double HTS_Engine_get_msd_threshold(HTS_Engine * engine, size_t stream_index);
359 |
360 | /* HTS_Engine_set_gv_weight: set GV weight */
361 | DLLEXPORT void HTS_Engine_set_gv_weight(HTS_Engine * engine, size_t stream_index, double f);
362 |
363 | /* HTS_Engine_get_gv_weight: get GV weight */
364 | DLLEXPORT double HTS_Engine_get_gv_weight(HTS_Engine * engine, size_t stream_index);
365 |
366 | /* HTS_Engine_set_speed: set speech speed */
367 | DLLEXPORT void HTS_Engine_set_speed(HTS_Engine * engine, double f);
368 |
369 | /* HTS_Engine_set_phoneme_alignment_flag: set flag for using phoneme alignment in label */
370 | DLLEXPORT void HTS_Engine_set_phoneme_alignment_flag(HTS_Engine * engine, HTS_Boolean b);
371 |
372 | /* HTS_Engine_set_alpha: set alpha */
373 | DLLEXPORT void HTS_Engine_set_alpha(HTS_Engine * engine, double f);
374 |
375 | /* HTS_Engine_get_alpha: get alpha */
376 | DLLEXPORT double HTS_Engine_get_alpha(HTS_Engine * engine);
377 |
378 | /* HTS_Engine_set_beta: set beta */
379 | DLLEXPORT void HTS_Engine_set_beta(HTS_Engine * engine, double f);
380 |
381 | /* HTS_Engine_get_beta: get beta */
382 | DLLEXPORT double HTS_Engine_get_beta(HTS_Engine * engine);
383 |
384 | /* HTS_Engine_add_half_tone: add half tone */
385 | DLLEXPORT void HTS_Engine_add_half_tone(HTS_Engine * engine, double f);
386 |
387 | /* HTS_Engine_set_duration_interpolation_weight: set interpolation weight for duration */
388 | DLLEXPORT void HTS_Engine_set_duration_interpolation_weight(HTS_Engine * engine, size_t voice_index, double f);
389 |
390 | /* HTS_Engine_get_duration_interpolation_weight: get interpolation weight for duration */
391 | DLLEXPORT double HTS_Engine_get_duration_interpolation_weight(HTS_Engine * engine, size_t voice_index);
392 |
393 | /* HTS_Engine_set_parameter_interpolation_weight: set interpolation weight for parameter */
394 | DLLEXPORT void HTS_Engine_set_parameter_interpolation_weight(HTS_Engine * engine, size_t voice_index, size_t stream_index, double f);
395 |
396 | /* HTS_Engine_get_parameter_interpolation_weight: get interpolation weight for parameter */
397 | DLLEXPORT double HTS_Engine_get_parameter_interpolation_weight(HTS_Engine * engine, size_t voice_index, size_t stream_index);
398 |
399 | /* HTS_Engine_set_gv_interpolation_weight: set interpolation weight for GV */
400 | DLLEXPORT void HTS_Engine_set_gv_interpolation_weight(HTS_Engine * engine, size_t voice_index, size_t stream_index, double f);
401 |
402 | /* HTS_Engine_get_gv_interpolation_weight: get interpolation weight for GV */
403 | DLLEXPORT double HTS_Engine_get_gv_interpolation_weight(HTS_Engine * engine, size_t voice_index, size_t stream_index);
404 |
405 | /* HTS_Engine_get_total_state: get total number of state */
406 | DLLEXPORT size_t HTS_Engine_get_total_state(HTS_Engine * engine);
407 |
408 | /* HTS_Engine_set_state_mean: set mean value of state */
409 | DLLEXPORT void HTS_Engine_set_state_mean(HTS_Engine * engine, size_t stream_index, size_t state_index, size_t vector_index, double f);
410 |
411 | /* HTS_Engine_get_state_mean: get mean value of state */
412 | DLLEXPORT double HTS_Engine_get_state_mean(HTS_Engine * engine, size_t stream_index, size_t state_index, size_t vector_index);
413 |
414 | /* HTS_Engine_get_state_duration: get state duration */
415 | DLLEXPORT size_t HTS_Engine_get_state_duration(HTS_Engine * engine, size_t state_index);
416 |
417 | /* HTS_Engine_get_nvoices: get number of voices */
418 | DLLEXPORT size_t HTS_Engine_get_nvoices(HTS_Engine * engine);
419 |
420 | /* HTS_Engine_get_nstream: get number of stream */
421 | DLLEXPORT size_t HTS_Engine_get_nstream(HTS_Engine * engine);
422 |
423 | /* HTS_Engine_get_nstate: get number of state */
424 | DLLEXPORT size_t HTS_Engine_get_nstate(HTS_Engine * engine);
425 |
426 | /* HTS_Engine_get_fullcontext_label_format: get full context label format */
427 | DLLEXPORT const char *HTS_Engine_get_fullcontext_label_format(HTS_Engine * engine);
428 |
429 | /* HTS_Engine_get_fullcontext_label_version: get full context label version */
430 | DLLEXPORT const char *HTS_Engine_get_fullcontext_label_version(HTS_Engine * engine);
431 |
432 | /* HTS_Engine_get_total_frame: get total number of frame */
433 | DLLEXPORT size_t HTS_Engine_get_total_frame(HTS_Engine * engine);
434 |
435 | /* HTS_Engine_get_nsamples: get number of samples */
436 | DLLEXPORT size_t HTS_Engine_get_nsamples(HTS_Engine * engine);
437 |
438 | /* HTS_Engine_get_generated_parameter: output generated parameter */
439 | DLLEXPORT double HTS_Engine_get_generated_parameter(HTS_Engine * engine, size_t stream_index, size_t frame_index, size_t vector_index);
440 |
441 | /* HTS_Engine_get_generated_speech: output generated speech */
442 | DLLEXPORT double HTS_Engine_get_generated_speech(HTS_Engine * engine, size_t index);
443 |
444 | /* HTS_Engine_synthesize_from_fn: synthesize speech from file name */
445 | DLLEXPORT HTS_Boolean HTS_Engine_synthesize_from_fn(HTS_Engine * engine, const char *fn);
446 |
447 | /* HTS_Engine_synthesize_from_strings: synthesize speech from string list */
448 | DLLEXPORT HTS_Boolean HTS_Engine_synthesize_from_strings(HTS_Engine * engine, char **lines, size_t num_lines);
449 |
450 | /* HTS_Engine_generate_state_sequence_from_fn: generate state sequence from file name (1st synthesis step) */
451 | DLLEXPORT HTS_Boolean HTS_Engine_generate_state_sequence_from_fn(HTS_Engine * engine, const char *fn);
452 |
453 | /* HTS_Engine_generate_state_sequence_from_strings: generate state sequence from string list (1st synthesis step) */
454 | DLLEXPORT HTS_Boolean HTS_Engine_generate_state_sequence_from_strings(HTS_Engine * engine, char **lines, size_t num_lines);
455 |
456 | /* HTS_Engine_generate_parameter_sequence: generate parameter sequence (2nd synthesis step) */
457 | DLLEXPORT HTS_Boolean HTS_Engine_generate_parameter_sequence(HTS_Engine * engine);
458 |
459 | /* HTS_Engine_generate_sample_sequence: generate sample sequence (3rd synthesis step) */
460 | DLLEXPORT HTS_Boolean HTS_Engine_generate_sample_sequence(HTS_Engine * engine);
461 |
462 | /* HTS_Engine_save_information: save trace information */
463 | DLLEXPORT void HTS_Engine_save_information(HTS_Engine * engine, FILE * fp);
464 |
465 | /* HTS_Engine_save_label: save label with time */
466 | DLLEXPORT void HTS_Engine_save_label(HTS_Engine * engine, FILE * fp);
467 |
468 | /* HTS_Engine_save_generated_parameter: save generated parameter */
469 | DLLEXPORT void HTS_Engine_save_generated_parameter(HTS_Engine * engine, size_t stream_index, FILE * fp);
470 |
471 | /* HTS_Engine_save_generated_speech: save generated speech */
472 | DLLEXPORT void HTS_Engine_save_generated_speech(HTS_Engine * engine, FILE * fp);
473 |
474 | /* HTS_Engine_save_riff: save RIFF format file */
475 | DLLEXPORT void HTS_Engine_save_riff(HTS_Engine * engine, FILE * fp);
476 |
477 | /* HTS_Engine_refresh: free memory per one time synthesis */
478 | DLLEXPORT void HTS_Engine_refresh(HTS_Engine * engine);
479 |
480 | /* HTS_Engine_clear: free engine */
481 | DLLEXPORT void HTS_Engine_clear(HTS_Engine * engine);
482 |
483 | HTS_ENGINE_H_END;
484 |
485 | #endif /* !HTS_ENGINE_H */
486 |
--------------------------------------------------------------------------------
/src/lib/.cvsignore:
--------------------------------------------------------------------------------
1 | Makefile
2 | Makefile.in
3 | libHTSEngine.a
4 | .deps
5 |
--------------------------------------------------------------------------------
/src/lib/HTS_audio.c:
--------------------------------------------------------------------------------
1 | /* ----------------------------------------------------------------- */
2 | /* The HMM-Based Speech Synthesis Engine "hts_engine API" */
3 | /* developed by HTS Working Group */
4 | /* http://hts-engine.sourceforge.net/ */
5 | /* ----------------------------------------------------------------- */
6 | /* */
7 | /* Copyright (c) 2001-2014 Nagoya Institute of Technology */
8 | /* Department of Computer Science */
9 | /* */
10 | /* 2001-2008 Tokyo Institute of Technology */
11 | /* Interdisciplinary Graduate School of */
12 | /* Science and Engineering */
13 | /* */
14 | /* All rights reserved. */
15 | /* */
16 | /* Redistribution and use in source and binary forms, with or */
17 | /* without modification, are permitted provided that the following */
18 | /* conditions are met: */
19 | /* */
20 | /* - Redistributions of source code must retain the above copyright */
21 | /* notice, this list of conditions and the following disclaimer. */
22 | /* - Redistributions in binary form must reproduce the above */
23 | /* copyright notice, this list of conditions and the following */
24 | /* disclaimer in the documentation and/or other materials provided */
25 | /* with the distribution. */
26 | /* - Neither the name of the HTS working group nor the names of its */
27 | /* contributors may be used to endorse or promote products derived */
28 | /* from this software without specific prior written permission. */
29 | /* */
30 | /* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND */
31 | /* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, */
32 | /* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
33 | /* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
34 | /* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS */
35 | /* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, */
36 | /* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED */
37 | /* TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, */
38 | /* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON */
39 | /* ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, */
40 | /* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY */
41 | /* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
42 | /* POSSIBILITY OF SUCH DAMAGE. */
43 | /* ----------------------------------------------------------------- */
44 |
45 | #ifndef HTS_AUDIO_C
46 | #define HTS_AUDIO_C
47 |
48 | #ifdef __cplusplus
49 | #define HTS_AUDIO_C_START extern "C" {
50 | #define HTS_AUDIO_C_END }
51 | #else
52 | #define HTS_AUDIO_C_START
53 | #define HTS_AUDIO_C_END
54 | #endif /* __CPLUSPLUS */
55 |
56 | HTS_AUDIO_C_START;
57 |
58 | #if !defined(AUDIO_PLAY_WIN32) && !defined(AUDIO_PLAY_PORTAUDIO) && !defined(AUDIO_PLAY_NONE)
59 | #if defined(__WINCE__) || defined(_WINCE) || defined(_WINCE) || defined(__WINCE) || defined(__WIN32__) || defined(__WIN32) || defined(_WIN32) || defined(WIN32) || defined(__CYGWIN__) || defined(__MINGW32__)
60 | #define AUDIO_PLAY_WIN32
61 | #else
62 | #define AUDIO_PLAY_NONE
63 | #endif /* __WINCE__ || _WINCE || _WINCE || __WINCE || __WIN32__ || __WIN32 || _WIN32 || WIN32 || __CYGWIN__ || __MINGW32__ */
64 | #endif /* !AUDIO_PLAY_WIN32 && !AUDIO_PLAY_PORTAUDIO && !AUDIO_PLAY_NONE */
65 |
66 | /* hts_engine libralies */
67 | #include "HTS_hidden.h"
68 |
69 | #ifdef AUDIO_PLAY_WIN32
70 |
71 | #include
72 | #include
73 | #define AUDIO_WAIT_BUFF_MS 10 /* wait time (0.01 sec) */
74 | #define AUDIO_CHANNEL 1 /* monaural */
75 | #ifdef _M_X64
76 | #define AUDIO_POINTER_TYPE DWORD_PTR
77 | #else
78 | #define AUDIO_POINTER_TYPE DWORD
79 | #endif
80 |
81 | /* HTS_Audio: audio interface for Windows */
82 | typedef struct _HTS_AudioInterface {
83 | HWAVEOUT hwaveout; /* audio device handle */
84 | WAVEFORMATEX waveformatex; /* wave formatex */
85 | unsigned char which_buff; /* double buffering flag */
86 | HTS_Boolean now_buff_1; /* double buffering flag */
87 | HTS_Boolean now_buff_2; /* double buffering flag */
88 | WAVEHDR buff_1; /* buffer */
89 | WAVEHDR buff_2; /* buffer */
90 | } HTS_AudioInterface;
91 |
92 | /* HTS_AudioInterface_callback_function: callback function from audio device */
93 | static void CALLBACK HTS_AudioInterface_callback_function(HWAVEOUT hwaveout, UINT msg, AUDIO_POINTER_TYPE user_data, AUDIO_POINTER_TYPE param1, AUDIO_POINTER_TYPE param2)
94 | {
95 | WAVEHDR *wavehdr = (WAVEHDR *) param1;
96 | HTS_AudioInterface *audio_interface = (HTS_AudioInterface *) user_data;
97 |
98 | if (msg == MM_WOM_DONE && wavehdr && (wavehdr->dwFlags & WHDR_DONE)) {
99 | if (audio_interface->now_buff_1 == TRUE && wavehdr == &(audio_interface->buff_1)) {
100 | audio_interface->now_buff_1 = FALSE;
101 | } else if (audio_interface->now_buff_2 == TRUE && wavehdr == &(audio_interface->buff_2)) {
102 | audio_interface->now_buff_2 = FALSE;
103 | }
104 | }
105 | }
106 |
107 | /* HTS_AudioInterface_write: send buffer to audio device */
108 | static HTS_Boolean HTS_AudioInterface_write(HTS_AudioInterface * audio_interface, const short *buff, size_t buff_size)
109 | {
110 | MMRESULT result;
111 |
112 | if (audio_interface->which_buff == 1) {
113 | while (audio_interface->now_buff_1 == TRUE)
114 | Sleep(AUDIO_WAIT_BUFF_MS);
115 | audio_interface->now_buff_1 = TRUE;
116 | audio_interface->which_buff = 2;
117 | memcpy(audio_interface->buff_1.lpData, buff, buff_size * sizeof(short));
118 | audio_interface->buff_1.dwBufferLength = (DWORD) buff_size *sizeof(short);
119 | result = waveOutWrite(audio_interface->hwaveout, &(audio_interface->buff_1), sizeof(WAVEHDR));
120 | } else {
121 | while (audio_interface->now_buff_2 == TRUE)
122 | Sleep(AUDIO_WAIT_BUFF_MS);
123 | audio_interface->now_buff_2 = TRUE;
124 | audio_interface->which_buff = 1;
125 | memcpy(audio_interface->buff_2.lpData, buff, buff_size * sizeof(short));
126 | audio_interface->buff_2.dwBufferLength = (DWORD) buff_size *sizeof(short);
127 | result = waveOutWrite(audio_interface->hwaveout, &(audio_interface->buff_2), sizeof(WAVEHDR));
128 | }
129 |
130 | if (result != MMSYSERR_NOERROR)
131 | HTS_error(0, "hts_engine: Cannot send datablocks to your output audio device to play waveform.\n");
132 |
133 | return (result == MMSYSERR_NOERROR) ? TRUE : FALSE;
134 | }
135 |
136 | /* HTS_AudioInterface_close: close audio device */
137 | static void HTS_AudioInterface_close(HTS_AudioInterface * audio_interface)
138 | {
139 | MMRESULT result;
140 |
141 | /* stop audio */
142 | result = waveOutReset(audio_interface->hwaveout);
143 | if (result != MMSYSERR_NOERROR)
144 | HTS_error(0, "hts_engine: Cannot stop and reset your output audio device.\n");
145 | /* unprepare */
146 | result = waveOutUnprepareHeader(audio_interface->hwaveout, &(audio_interface->buff_1), sizeof(WAVEHDR));
147 | if (result != MMSYSERR_NOERROR)
148 | HTS_error(0, "hts_engine: Cannot cleanup the audio datablocks to play waveform.\n");
149 | result = waveOutUnprepareHeader(audio_interface->hwaveout, &(audio_interface->buff_2), sizeof(WAVEHDR));
150 | if (result != MMSYSERR_NOERROR)
151 | HTS_error(0, "hts_engine: Cannot cleanup the audio datablocks to play waveform.\n");
152 | /* close */
153 | result = waveOutClose(audio_interface->hwaveout);
154 | if (result != MMSYSERR_NOERROR)
155 | HTS_error(0, "hts_engine: Failed to close your output audio device.\n");
156 | if (audio_interface->buff_1.lpData != NULL)
157 | HTS_free(audio_interface->buff_1.lpData);
158 | if (audio_interface->buff_2.lpData != NULL)
159 | HTS_free(audio_interface->buff_2.lpData);
160 |
161 | HTS_free(audio_interface);
162 | }
163 |
164 | static HTS_AudioInterface *HTS_AudioInterface_open(size_t sampling_frequency, size_t max_buff_size)
165 | {
166 | HTS_AudioInterface *audio_interface;
167 | MMRESULT result;
168 |
169 | /* make audio interface */
170 | audio_interface = (HTS_AudioInterface *) HTS_calloc(1, sizeof(HTS_AudioInterface));
171 |
172 | audio_interface->hwaveout = 0;
173 | audio_interface->which_buff = 1;
174 | audio_interface->now_buff_1 = FALSE;
175 | audio_interface->now_buff_2 = FALSE;
176 |
177 | /* format */
178 | audio_interface->waveformatex.wFormatTag = WAVE_FORMAT_PCM;
179 | audio_interface->waveformatex.nChannels = AUDIO_CHANNEL;
180 | audio_interface->waveformatex.nSamplesPerSec = (DWORD) sampling_frequency;
181 | audio_interface->waveformatex.wBitsPerSample = sizeof(short) * 8;
182 | audio_interface->waveformatex.nBlockAlign = AUDIO_CHANNEL * audio_interface->waveformatex.wBitsPerSample / 8;
183 | audio_interface->waveformatex.nAvgBytesPerSec = (DWORD) sampling_frequency *audio_interface->waveformatex.nBlockAlign;
184 | /* open */
185 | result = waveOutOpen(&audio_interface->hwaveout, WAVE_MAPPER, &audio_interface->waveformatex, (AUDIO_POINTER_TYPE) HTS_AudioInterface_callback_function, (AUDIO_POINTER_TYPE) audio_interface, CALLBACK_FUNCTION);
186 | if (result != MMSYSERR_NOERROR) {
187 | HTS_error(0, "hts_engine: Failed to open your output audio_interface device to play waveform.\n");
188 | HTS_free(audio_interface);
189 | return NULL;
190 | }
191 |
192 | /* prepare */
193 | audio_interface->buff_1.lpData = (LPSTR) HTS_calloc(max_buff_size, sizeof(short));
194 | audio_interface->buff_1.dwBufferLength = (DWORD) max_buff_size *sizeof(short);
195 | audio_interface->buff_1.dwFlags = WHDR_BEGINLOOP | WHDR_ENDLOOP;
196 | audio_interface->buff_1.dwLoops = 1;
197 | audio_interface->buff_1.lpNext = 0;
198 | audio_interface->buff_1.reserved = 0;
199 | result = waveOutPrepareHeader(audio_interface->hwaveout, &(audio_interface->buff_1), sizeof(WAVEHDR));
200 | if (result != MMSYSERR_NOERROR) {
201 | HTS_error(0, "hts_engine: Cannot initialize audio_interface datablocks to play waveform.\n");
202 | HTS_free(audio_interface->buff_1.lpData);
203 | HTS_free(audio_interface);
204 | return NULL;
205 | }
206 | audio_interface->buff_2.lpData = (LPSTR) HTS_calloc(max_buff_size, sizeof(short));
207 | audio_interface->buff_2.dwBufferLength = (DWORD) max_buff_size *sizeof(short);
208 | audio_interface->buff_2.dwFlags = WHDR_BEGINLOOP | WHDR_ENDLOOP;
209 | audio_interface->buff_2.dwLoops = 1;
210 | audio_interface->buff_2.lpNext = 0;
211 | audio_interface->buff_2.reserved = 0;
212 | result = waveOutPrepareHeader(audio_interface->hwaveout, &(audio_interface->buff_2), sizeof(WAVEHDR));
213 | if (result != MMSYSERR_NOERROR) {
214 | HTS_error(0, "hts_engine: Cannot initialize audio_interface datablocks to play waveform.\n");
215 | HTS_free(audio_interface->buff_1.lpData);
216 | HTS_free(audio_interface->buff_2.lpData);
217 | HTS_free(audio_interface);
218 | return NULL;
219 | }
220 |
221 | return audio_interface;
222 | }
223 |
224 | /* HTS_Audio_initialize: initialize audio */
225 | void HTS_Audio_initialize(HTS_Audio * audio)
226 | {
227 | if (audio == NULL)
228 | return;
229 |
230 | audio->sampling_frequency = 0;
231 | audio->max_buff_size = 0;
232 | audio->buff = NULL;
233 | audio->buff_size = 0;
234 | audio->audio_interface = NULL;
235 | }
236 |
237 | /* HTS_Audio_set_parameter: set parameters for audio */
238 | void HTS_Audio_set_parameter(HTS_Audio * audio, size_t sampling_frequency, size_t max_buff_size)
239 | {
240 | if (audio == NULL)
241 | return;
242 |
243 | if (audio->sampling_frequency == sampling_frequency && audio->max_buff_size == max_buff_size)
244 | return;
245 |
246 | HTS_Audio_clear(audio);
247 |
248 | if (sampling_frequency == 0 || max_buff_size == 0)
249 | return;
250 |
251 | audio->audio_interface = HTS_AudioInterface_open(sampling_frequency, max_buff_size);
252 | if (audio->audio_interface == NULL)
253 | return;
254 |
255 | audio->sampling_frequency = sampling_frequency;
256 | audio->max_buff_size = max_buff_size;
257 | audio->buff = (short *) HTS_calloc(max_buff_size, sizeof(short));
258 | audio->buff_size = 0;
259 | }
260 |
261 | /* HTS_Audio_write: send data to audio */
262 | void HTS_Audio_write(HTS_Audio * audio, short data)
263 | {
264 | if (audio == NULL || audio->audio_interface == NULL)
265 | return;
266 |
267 | audio->buff[audio->buff_size++] = data;
268 |
269 | if (audio->buff_size >= audio->max_buff_size) {
270 | if (HTS_AudioInterface_write((HTS_AudioInterface *) audio->audio_interface, audio->buff, audio->buff_size) != TRUE) {
271 | HTS_Audio_clear(audio);
272 | return;
273 | }
274 | audio->buff_size = 0;
275 | }
276 | }
277 |
278 | /* HTS_Audio_flush: flush remain data */
279 | void HTS_Audio_flush(HTS_Audio * audio)
280 | {
281 | HTS_AudioInterface *audio_interface;
282 |
283 | if (audio == NULL || audio->audio_interface == NULL)
284 | return;
285 |
286 | audio_interface = (HTS_AudioInterface *) audio->audio_interface;
287 | if (audio->buff_size > 0) {
288 | if (HTS_AudioInterface_write(audio_interface, audio->buff, audio->buff_size) != TRUE) {
289 | HTS_Audio_clear(audio);
290 | return;
291 | }
292 | audio->buff_size = 0;
293 | }
294 | while (audio_interface->now_buff_1 == TRUE || audio_interface->now_buff_2 == TRUE)
295 | Sleep(AUDIO_WAIT_BUFF_MS);
296 | }
297 |
298 | /* HTS_Audio_clear: free audio */
299 | void HTS_Audio_clear(HTS_Audio * audio)
300 | {
301 | HTS_AudioInterface *audio_interface;
302 |
303 | if (audio == NULL || audio->audio_interface == NULL)
304 | return;
305 |
306 | audio_interface = (HTS_AudioInterface *) audio->audio_interface;
307 | HTS_AudioInterface_close(audio_interface);
308 | if (audio->buff != NULL)
309 | free(audio->buff);
310 | HTS_Audio_initialize(audio);
311 | }
312 |
313 | #endif /* AUDIO_PLAY_WIN32 */
314 |
315 | #ifdef AUDIO_PLAY_PORTAUDIO
316 |
317 | #include "portaudio.h"
318 |
319 | /* HTS_AudioInterface: audio output for PortAudio */
320 | typedef struct _HTS_AudioInterface {
321 | PaStreamParameters parameters; /* parameters for output stream */
322 | PaStream *stream; /* output stream */
323 | } HTS_AudioInterface;
324 |
325 | /* HTS_AudioInterface_write: send data to audio device */
326 | static void HTS_AudioInterface_write(HTS_AudioInterface * audio_interface, const short *buff, size_t buff_size)
327 | {
328 | PaError err;
329 |
330 | err = Pa_WriteStream(audio_interface->stream, buff, buff_size);
331 | if (err != paNoError && err != paOutputUnderflowed)
332 | HTS_error(0, "hts_engine: Cannot send datablocks to your output audio device to play waveform.\n");
333 | }
334 |
335 | /* HTS_AudioInterface_close: close audio device */
336 | static void HTS_AudioInterface_close(HTS_AudioInterface * audio_interface)
337 | {
338 | PaError err;
339 |
340 | err = Pa_StopStream(audio_interface->stream);
341 | if (err != paNoError)
342 | HTS_error(0, "hts_engine: Cannot stop your output audio device.\n");
343 | err = Pa_CloseStream(audio_interface->stream);
344 | if (err != paNoError)
345 | HTS_error(0, "hts_engine: Failed to close your output audio device.\n");
346 | Pa_Terminate();
347 |
348 | HTS_free(audio_interface);
349 | }
350 |
351 | static HTS_AudioInterface *HTS_AudioInterface_open(size_t sampling_frequency, size_t max_buff_size)
352 | {
353 | HTS_AudioInterface *audio_interface;
354 | PaError err;
355 |
356 | audio_interface = HTS_calloc(1, sizeof(HTS_AudioInterface));
357 | audio_interface->stream = NULL;
358 |
359 | err = Pa_Initialize();
360 | if (err != paNoError) {
361 | HTS_error(0, "hts_engine: Failed to initialize your output audio device to play waveform.\n");
362 | HTS_free(audio_interface);
363 | return NULL;
364 | }
365 |
366 | audio_interface->parameters.device = Pa_GetDefaultOutputDevice();
367 | audio_interface->parameters.channelCount = 1;
368 | audio_interface->parameters.sampleFormat = paInt16;
369 | audio_interface->parameters.suggestedLatency = Pa_GetDeviceInfo(audio_interface->parameters.device)->defaultLowOutputLatency;
370 | audio_interface->parameters.hostApiSpecificStreamInfo = NULL;
371 |
372 | err = Pa_OpenStream(&audio_interface->stream, NULL, &audio_interface->parameters, sampling_frequency, max_buff_size, paClipOff, NULL, NULL);
373 | if (err != paNoError) {
374 | HTS_error(0, "hts_engine: Failed to open your output audio device to play waveform.\n");
375 | Pa_Terminate();
376 | HTS_free(audio_interface);
377 | return NULL;
378 | }
379 |
380 | err = Pa_StartStream(audio_interface->stream);
381 | if (err != paNoError) {
382 | HTS_error(0, "hts_engine: Failed to start your output audio device to play waveform.\n");
383 | Pa_CloseStream(audio_interface->stream);
384 | Pa_Terminate();
385 | HTS_free(audio_interface);
386 | return NULL;
387 | }
388 |
389 | return audio_interface;
390 | }
391 |
392 | /* HTS_Audio_initialize: initialize audio */
393 | void HTS_Audio_initialize(HTS_Audio * audio)
394 | {
395 | if (audio == NULL)
396 | return;
397 |
398 | audio->sampling_frequency = 0;
399 | audio->max_buff_size = 0;
400 | audio->buff = NULL;
401 | audio->buff_size = 0;
402 | audio->audio_interface = NULL;
403 | }
404 |
405 | /* HTS_Audio_set_parameter: set parameters for audio */
406 | void HTS_Audio_set_parameter(HTS_Audio * audio, size_t sampling_frequency, size_t max_buff_size)
407 | {
408 | if (audio == NULL)
409 | return;
410 |
411 | if (audio->sampling_frequency == sampling_frequency && audio->max_buff_size == max_buff_size)
412 | return;
413 |
414 | HTS_Audio_clear(audio);
415 |
416 | if (sampling_frequency == 0 || max_buff_size == 0)
417 | return;
418 |
419 | audio->audio_interface = HTS_AudioInterface_open(sampling_frequency, max_buff_size);
420 | if (audio->audio_interface == NULL)
421 | return;
422 |
423 | audio->sampling_frequency = sampling_frequency;
424 | audio->max_buff_size = max_buff_size;
425 | audio->buff = (short *) HTS_calloc(max_buff_size, sizeof(short));
426 | audio->buff_size = 0;
427 | }
428 |
429 | /* HTS_Audio_write: send data to audio device */
430 | void HTS_Audio_write(HTS_Audio * audio, short data)
431 | {
432 | if (audio == NULL)
433 | return;
434 |
435 | audio->buff[audio->buff_size++] = data;
436 |
437 | if (audio->buff_size >= audio->max_buff_size) {
438 | if (audio->audio_interface != NULL)
439 | HTS_AudioInterface_write((HTS_AudioInterface *) audio->audio_interface, audio->buff, audio->max_buff_size);
440 | audio->buff_size = 0;
441 | }
442 | }
443 |
444 | /* HTS_Audio_flush: flush remain data */
445 | void HTS_Audio_flush(HTS_Audio * audio)
446 | {
447 | HTS_AudioInterface *audio_interface;
448 |
449 | if (audio == NULL || audio->audio_interface == NULL)
450 | return;
451 |
452 | audio_interface = (HTS_AudioInterface *) audio->audio_interface;
453 | if (audio->buff_size > 0) {
454 | HTS_AudioInterface_write(audio_interface, audio->buff, audio->buff_size);
455 | audio->buff_size = 0;
456 | }
457 | }
458 |
459 | /* HTS_Audio_clear: free audio */
460 | void HTS_Audio_clear(HTS_Audio * audio)
461 | {
462 | HTS_AudioInterface *audio_interface;
463 |
464 | if (audio == NULL || audio->audio_interface == NULL)
465 | return;
466 | audio_interface = (HTS_AudioInterface *) audio->audio_interface;
467 |
468 | HTS_Audio_flush(audio);
469 | HTS_AudioInterface_close(audio_interface);
470 | if (audio->buff != NULL)
471 | HTS_free(audio->buff);
472 | HTS_Audio_initialize(audio);
473 | }
474 |
475 | #endif /* AUDIO_PLAY_PORTAUDIO */
476 |
477 | #ifdef AUDIO_PLAY_NONE
478 |
479 | /* HTS_Audio_initialize: initialize audio */
480 | void HTS_Audio_initialize(HTS_Audio * audio)
481 | {
482 | }
483 |
484 | /* HTS_Audio_set_parameter: set parameters for audio */
485 | void HTS_Audio_set_parameter(HTS_Audio * audio, size_t sampling_frequeny, size_t max_buff_size)
486 | {
487 | }
488 |
489 | /* HTS_Audio_write: send data to audio */
490 | void HTS_Audio_write(HTS_Audio * audio, short data)
491 | {
492 | }
493 |
494 | /* HTS_Audio_flush: flush remain data */
495 | void HTS_Audio_flush(HTS_Audio * audio)
496 | {
497 | }
498 |
499 | /* HTS_Audio_clear: free audio */
500 | void HTS_Audio_clear(HTS_Audio * audio)
501 | {
502 | }
503 |
504 | #endif /* AUDIO_PLAY_NONE */
505 |
506 | HTS_AUDIO_C_END;
507 |
508 | #endif /* !HTS_AUDIO_C */
509 |
--------------------------------------------------------------------------------
/src/lib/HTS_gstream.c:
--------------------------------------------------------------------------------
1 | /* ----------------------------------------------------------------- */
2 | /* The HMM-Based Speech Synthesis Engine "hts_engine API" */
3 | /* developed by HTS Working Group */
4 | /* http://hts-engine.sourceforge.net/ */
5 | /* ----------------------------------------------------------------- */
6 | /* */
7 | /* Copyright (c) 2001-2014 Nagoya Institute of Technology */
8 | /* Department of Computer Science */
9 | /* */
10 | /* 2001-2008 Tokyo Institute of Technology */
11 | /* Interdisciplinary Graduate School of */
12 | /* Science and Engineering */
13 | /* */
14 | /* All rights reserved. */
15 | /* */
16 | /* Redistribution and use in source and binary forms, with or */
17 | /* without modification, are permitted provided that the following */
18 | /* conditions are met: */
19 | /* */
20 | /* - Redistributions of source code must retain the above copyright */
21 | /* notice, this list of conditions and the following disclaimer. */
22 | /* - Redistributions in binary form must reproduce the above */
23 | /* copyright notice, this list of conditions and the following */
24 | /* disclaimer in the documentation and/or other materials provided */
25 | /* with the distribution. */
26 | /* - Neither the name of the HTS working group nor the names of its */
27 | /* contributors may be used to endorse or promote products derived */
28 | /* from this software without specific prior written permission. */
29 | /* */
30 | /* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND */
31 | /* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, */
32 | /* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
33 | /* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
34 | /* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS */
35 | /* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, */
36 | /* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED */
37 | /* TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, */
38 | /* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON */
39 | /* ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, */
40 | /* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY */
41 | /* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
42 | /* POSSIBILITY OF SUCH DAMAGE. */
43 | /* ----------------------------------------------------------------- */
44 |
45 | #ifndef HTS_GSTREAM_C
46 | #define HTS_GSTREAM_C
47 |
48 | #ifdef __cplusplus
49 | #define HTS_GSTREAM_C_START extern "C" {
50 | #define HTS_GSTREAM_C_END }
51 | #else
52 | #define HTS_GSTREAM_C_START
53 | #define HTS_GSTREAM_C_END
54 | #endif /* __CPLUSPLUS */
55 |
56 | HTS_GSTREAM_C_START;
57 |
58 | /* hts_engine libraries */
59 | #include "HTS_hidden.h"
60 |
61 | /* HTS_GStreamSet_initialize: initialize generated parameter stream set */
62 | void HTS_GStreamSet_initialize(HTS_GStreamSet * gss)
63 | {
64 | gss->nstream = 0;
65 | gss->total_frame = 0;
66 | gss->total_nsample = 0;
67 | gss->gstream = NULL;
68 | gss->gspeech = NULL;
69 | }
70 |
71 | /* HTS_GStreamSet_create: generate speech */
72 | HTS_Boolean HTS_GStreamSet_create(HTS_GStreamSet * gss, HTS_PStreamSet * pss, size_t stage, HTS_Boolean use_log_gain, size_t sampling_rate, size_t fperiod, double alpha, double beta, HTS_Boolean * stop, double volume, HTS_Audio * audio)
73 | {
74 | size_t i, j, k;
75 | size_t msd_frame;
76 | HTS_Vocoder v;
77 | size_t nlpf = 0;
78 | double *lpf = NULL;
79 |
80 | /* check */
81 | if (gss->gstream || gss->gspeech) {
82 | HTS_error(1, "HTS_GStreamSet_create: HTS_GStreamSet is not initialized.\n");
83 | return FALSE;
84 | }
85 |
86 | /* initialize */
87 | gss->nstream = HTS_PStreamSet_get_nstream(pss);
88 | gss->total_frame = HTS_PStreamSet_get_total_frame(pss);
89 | gss->total_nsample = fperiod * gss->total_frame;
90 | gss->gstream = (HTS_GStream *) HTS_calloc(gss->nstream, sizeof(HTS_GStream));
91 | for (i = 0; i < gss->nstream; i++) {
92 | gss->gstream[i].vector_length = HTS_PStreamSet_get_vector_length(pss, i);
93 | gss->gstream[i].par = (double **) HTS_calloc(gss->total_frame, sizeof(double *));
94 | for (j = 0; j < gss->total_frame; j++)
95 | gss->gstream[i].par[j] = (double *) HTS_calloc(gss->gstream[i].vector_length, sizeof(double));
96 | }
97 | gss->gspeech = (double *) HTS_calloc(gss->total_nsample, sizeof(double));
98 |
99 | /* copy generated parameter */
100 | for (i = 0; i < gss->nstream; i++) {
101 | if (HTS_PStreamSet_is_msd(pss, i)) { /* for MSD */
102 | for (j = 0, msd_frame = 0; j < gss->total_frame; j++)
103 | if (HTS_PStreamSet_get_msd_flag(pss, i, j)) {
104 | for (k = 0; k < gss->gstream[i].vector_length; k++)
105 | gss->gstream[i].par[j][k] = HTS_PStreamSet_get_parameter(pss, i, msd_frame, k);
106 | msd_frame++;
107 | } else
108 | for (k = 0; k < gss->gstream[i].vector_length; k++)
109 | gss->gstream[i].par[j][k] = HTS_NODATA;
110 | } else { /* for non MSD */
111 | for (j = 0; j < gss->total_frame; j++)
112 | for (k = 0; k < gss->gstream[i].vector_length; k++)
113 | gss->gstream[i].par[j][k] = HTS_PStreamSet_get_parameter(pss, i, j, k);
114 | }
115 | }
116 |
117 | /* check */
118 | if (gss->nstream != 2 && gss->nstream != 3) {
119 | HTS_error(1, "HTS_GStreamSet_create: The number of streams should be 2 or 3.\n");
120 | HTS_GStreamSet_clear(gss);
121 | return FALSE;
122 | }
123 | if (HTS_PStreamSet_get_vector_length(pss, 1) != 1) {
124 | HTS_error(1, "HTS_GStreamSet_create: The size of lf0 static vector should be 1.\n");
125 | HTS_GStreamSet_clear(gss);
126 | return FALSE;
127 | }
128 | if (gss->nstream >= 3 && gss->gstream[2].vector_length % 2 == 0) {
129 | HTS_error(1, "HTS_GStreamSet_create: The number of low-pass filter coefficient should be odd numbers.");
130 | HTS_GStreamSet_clear(gss);
131 | return FALSE;
132 | }
133 |
134 | /* synthesize speech waveform */
135 | HTS_Vocoder_initialize(&v, gss->gstream[0].vector_length - 1, stage, use_log_gain, sampling_rate, fperiod);
136 | if (gss->nstream >= 3)
137 | nlpf = gss->gstream[2].vector_length;
138 | for (i = 0; i < gss->total_frame && (*stop) == FALSE; i++) {
139 | j = i * fperiod;
140 | if (gss->nstream >= 3)
141 | lpf = &gss->gstream[2].par[i][0];
142 | HTS_Vocoder_synthesize(&v, gss->gstream[0].vector_length - 1, gss->gstream[1].par[i][0], &gss->gstream[0].par[i][0], nlpf, lpf, alpha, beta, volume, &gss->gspeech[j], audio);
143 | }
144 | HTS_Vocoder_clear(&v);
145 | if (audio)
146 | HTS_Audio_flush(audio);
147 |
148 | return TRUE;
149 | }
150 |
151 | /* HTS_GStreamSet_get_total_nsamples: get total number of sample */
152 | size_t HTS_GStreamSet_get_total_nsamples(HTS_GStreamSet * gss)
153 | {
154 | return gss->total_nsample;
155 | }
156 |
157 | /* HTS_GStreamSet_get_total_frame: get total number of frame */
158 | size_t HTS_GStreamSet_get_total_frame(HTS_GStreamSet * gss)
159 | {
160 | return gss->total_frame;
161 | }
162 |
163 | /* HTS_GStreamSet_get_vector_length: get features length */
164 | size_t HTS_GStreamSet_get_vector_length(HTS_GStreamSet * gss, size_t stream_index)
165 | {
166 | return gss->gstream[stream_index].vector_length;
167 | }
168 |
169 | /* HTS_GStreamSet_get_speech: get synthesized speech parameter */
170 | double HTS_GStreamSet_get_speech(HTS_GStreamSet * gss, size_t sample_index)
171 | {
172 | return gss->gspeech[sample_index];
173 | }
174 |
175 | /* HTS_GStreamSet_get_parameter: get generated parameter */
176 | double HTS_GStreamSet_get_parameter(HTS_GStreamSet * gss, size_t stream_index, size_t frame_index, size_t vector_index)
177 | {
178 | return gss->gstream[stream_index].par[frame_index][vector_index];
179 | }
180 |
181 | /* HTS_GStreamSet_clear: free generated parameter stream set */
182 | void HTS_GStreamSet_clear(HTS_GStreamSet * gss)
183 | {
184 | size_t i, j;
185 |
186 | if (gss->gstream) {
187 | for (i = 0; i < gss->nstream; i++) {
188 | if (gss->gstream[i].par != NULL) {
189 | for (j = 0; j < gss->total_frame; j++)
190 | HTS_free(gss->gstream[i].par[j]);
191 | HTS_free(gss->gstream[i].par);
192 | }
193 | }
194 | HTS_free(gss->gstream);
195 | }
196 | if (gss->gspeech)
197 | HTS_free(gss->gspeech);
198 | HTS_GStreamSet_initialize(gss);
199 | }
200 |
201 | HTS_GSTREAM_C_END;
202 |
203 | #endif /* !HTS_GSTREAM_C */
204 |
--------------------------------------------------------------------------------
/src/lib/HTS_hidden.h:
--------------------------------------------------------------------------------
1 | /* ----------------------------------------------------------------- */
2 | /* The HMM-Based Speech Synthesis Engine "hts_engine API" */
3 | /* developed by HTS Working Group */
4 | /* http://hts-engine.sourceforge.net/ */
5 | /* ----------------------------------------------------------------- */
6 | /* */
7 | /* Copyright (c) 2001-2014 Nagoya Institute of Technology */
8 | /* Department of Computer Science */
9 | /* */
10 | /* 2001-2008 Tokyo Institute of Technology */
11 | /* Interdisciplinary Graduate School of */
12 | /* Science and Engineering */
13 | /* */
14 | /* All rights reserved. */
15 | /* */
16 | /* Redistribution and use in source and binary forms, with or */
17 | /* without modification, are permitted provided that the following */
18 | /* conditions are met: */
19 | /* */
20 | /* - Redistributions of source code must retain the above copyright */
21 | /* notice, this list of conditions and the following disclaimer. */
22 | /* - Redistributions in binary form must reproduce the above */
23 | /* copyright notice, this list of conditions and the following */
24 | /* disclaimer in the documentation and/or other materials provided */
25 | /* with the distribution. */
26 | /* - Neither the name of the HTS working group nor the names of its */
27 | /* contributors may be used to endorse or promote products derived */
28 | /* from this software without specific prior written permission. */
29 | /* */
30 | /* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND */
31 | /* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, */
32 | /* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
33 | /* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
34 | /* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS */
35 | /* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, */
36 | /* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED */
37 | /* TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, */
38 | /* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON */
39 | /* ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, */
40 | /* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY */
41 | /* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
42 | /* POSSIBILITY OF SUCH DAMAGE. */
43 | /* ----------------------------------------------------------------- */
44 |
45 | #ifndef HTS_HIDDEN_H
46 | #define HTS_HIDDEN_H
47 |
48 | #ifdef __cplusplus
49 | #define HTS_HIDDEN_H_START extern "C" {
50 | #define HTS_HIDDEN_H_END }
51 | #else
52 | #define HTS_HIDDEN_H_START
53 | #define HTS_HIDDEN_H_END
54 | #endif /* __CPLUSPLUS */
55 |
56 | HTS_HIDDEN_H_START;
57 |
58 | /* hts_engine libraries */
59 | #include "HTS_engine.h"
60 |
61 | /* common ---------------------------------------------------------- */
62 |
63 | #define HTS_MAXBUFLEN 1024
64 |
65 | #if !defined(WORDS_BIGENDIAN) && !defined(WORDS_LITTLEENDIAN)
66 | #define WORDS_LITTLEENDIAN
67 | #endif /* !WORDS_BIGENDIAN && !WORDS_LITTLEENDIAN */
68 | #if defined(WORDS_BIGENDIAN) && defined(WORDS_LITTLEENDIAN)
69 | #undef WORDS_BIGENDIAN
70 | #endif /* WORDS_BIGENDIAN && WORDS_LITTLEENDIAN */
71 |
72 | #define MAX_F0 20000.0
73 | #define MIN_F0 20.0
74 | #define MAX_LF0 9.9034875525361280454891979401956 /* log(20000.0) */
75 | #define MIN_LF0 2.9957322735539909934352235761425 /* log(20.0) */
76 | #define HALF_TONE 0.05776226504666210911810267678818 /* log(2.0) / 12.0 */
77 | #define DB 0.11512925464970228420089957273422 /* log(10.0) / 20.0 */
78 |
79 | /* misc ------------------------------------------------------------ */
80 |
81 | typedef struct _HTS_File {
82 | unsigned char type;
83 | void *pointer;
84 | } HTS_File;
85 |
86 | /* HTS_fopen: wrapper for fopen */
87 | HTS_File *HTS_fopen_from_fn(const char *name, const char *opt);
88 |
89 | /* HTS_fopen_from_fp: wrapper for fopen */
90 | HTS_File *HTS_fopen_from_fp(HTS_File * fp, size_t size);
91 |
92 | /* HTS_fopen_from_data: wrapper for fopen */
93 | HTS_File *HTS_fopen_from_data(void *data, size_t size);
94 |
95 | /* HTS_fclose: wrapper for fclose */
96 | void HTS_fclose(HTS_File * fp);
97 |
98 | /* HTS_fgetc: wrapper for fgetc */
99 | int HTS_fgetc(HTS_File * fp);
100 |
101 | /* HTS_feof: wrapper for feof */
102 | int HTS_feof(HTS_File * fp);
103 |
104 | /* HTS_fseek: wrapper for fseek */
105 | int HTS_fseek(HTS_File * fp, long offset, int origin);
106 |
107 | /* HTS_ftell: wrapper for ftell */
108 | size_t HTS_ftell(HTS_File * fp);
109 |
110 | /* HTS_fread_big_endian: fread with byteswap */
111 | size_t HTS_fread_big_endian(void *buf, size_t size, size_t n, HTS_File * fp);
112 |
113 | /* HTS_fread_little_endian: fread with byteswap */
114 | size_t HTS_fread_little_endian(void *buf, size_t size, size_t n, HTS_File * fp);
115 |
116 | /* HTS_fwrite_little_endian: fwrite with byteswap */
117 | size_t HTS_fwrite_little_endian(const void *buf, size_t size, size_t n, FILE * fp);
118 |
119 | /* HTS_get_pattern_token: get pattern token (single/double quote can be used) */
120 | HTS_Boolean HTS_get_pattern_token(HTS_File * fp, char *buff);
121 |
122 | /* HTS_get_token: get token from file pointer (separators are space,tab,line break) */
123 | HTS_Boolean HTS_get_token_from_fp(HTS_File * fp, char *buff);
124 |
125 | /* HTS_get_token: get token from file pointer with specified separator */
126 | HTS_Boolean HTS_get_token_from_fp_with_separator(HTS_File * fp, char *buff, char separator);
127 |
128 | /* HTS_get_token_from_string: get token from string (separator are space,tab,line break) */
129 | HTS_Boolean HTS_get_token_from_string(const char *string, size_t * index, char *buff);
130 |
131 | /* HTS_get_token_from_string_with_separator: get token from string with specified separator */
132 | HTS_Boolean HTS_get_token_from_string_with_separator(const char *str, size_t * index, char *buff, char separator);
133 |
134 | /* HTS_calloc: wrapper for calloc */
135 | void *HTS_calloc(const size_t num, const size_t size);
136 |
137 | /* HTS_strdup: wrapper for strdup */
138 | char *HTS_strdup(const char *string);
139 |
140 | /* HTS_calloc_matrix: allocate double matrix */
141 | double **HTS_alloc_matrix(size_t x, size_t y);
142 |
143 | /* HTS_free_matrix: free double matrix */
144 | void HTS_free_matrix(double **p, size_t x);
145 |
146 | /* HTS_Free: wrapper for free */
147 | void HTS_free(void *p);
148 |
149 | /* HTS_error: output error message */
150 | void HTS_error(int error, const char *message, ...);
151 |
152 | /* audio ----------------------------------------------------------- */
153 |
154 | /* HTS_Audio_initialize: initialize audio */
155 | void HTS_Audio_initialize(HTS_Audio * audio);
156 |
157 | /* HTS_Audio_set_parameter: set parameters for audio */
158 | void HTS_Audio_set_parameter(HTS_Audio * audio, size_t sampling_frequency, size_t max_buff_size);
159 |
160 | /* HTS_Audio_write: send data to audio */
161 | void HTS_Audio_write(HTS_Audio * audio, short data);
162 |
163 | /* HTS_Audio_flush: flush remain data */
164 | void HTS_Audio_flush(HTS_Audio * audio);
165 |
166 | /* HTS_Audio_clear: free audio */
167 | void HTS_Audio_clear(HTS_Audio * audio);
168 |
169 | /* model ----------------------------------------------------------- */
170 |
171 | /* HTS_ModelSet_initialize: initialize model set */
172 | void HTS_ModelSet_initialize(HTS_ModelSet * ms);
173 |
174 | /* HTS_ModelSet_load: load HTS voices */
175 | HTS_Boolean HTS_ModelSet_load(HTS_ModelSet * ms, char **voices, size_t num_voices);
176 |
177 | /* HTS_ModelSet_get_sampling_frequency: get sampling frequency of HTS voices */
178 | size_t HTS_ModelSet_get_sampling_frequency(HTS_ModelSet * ms);
179 |
180 | /* HTS_ModelSet_get_fperiod: get frame period of HTS voices */
181 | size_t HTS_ModelSet_get_fperiod(HTS_ModelSet * ms);
182 |
183 | /* HTS_ModelSet_get_fperiod: get stream option */
184 | const char *HTS_ModelSet_get_option(HTS_ModelSet * ms, size_t stream_index);
185 |
186 | /* HTS_ModelSet_get_gv_flag: get GV flag */
187 | HTS_Boolean HTS_ModelSet_get_gv_flag(HTS_ModelSet * ms, const char *string);
188 |
189 | /* HTS_ModelSet_get_nstate: get number of state */
190 | size_t HTS_ModelSet_get_nstate(HTS_ModelSet * ms);
191 |
192 | /* HTS_Engine_get_fullcontext_label_format: get full-context label format */
193 | const char *HTS_ModelSet_get_fullcontext_label_format(HTS_ModelSet * ms);
194 |
195 | /* HTS_Engine_get_fullcontext_label_version: get full-context label version */
196 | const char *HTS_ModelSet_get_fullcontext_label_version(HTS_ModelSet * ms);
197 |
198 | /* HTS_ModelSet_get_nstream: get number of stream */
199 | size_t HTS_ModelSet_get_nstream(HTS_ModelSet * ms);
200 |
201 | /* HTS_ModelSet_get_nvoices: get number of HTS voices */
202 | size_t HTS_ModelSet_get_nvoices(HTS_ModelSet * ms);
203 |
204 | /* HTS_ModelSet_get_vector_length: get vector length */
205 | size_t HTS_ModelSet_get_vector_length(HTS_ModelSet * ms, size_t stream_index);
206 |
207 | /* HTS_ModelSet_is_msd: get MSD flag */
208 | HTS_Boolean HTS_ModelSet_is_msd(HTS_ModelSet * ms, size_t stream_index);
209 |
210 | /* HTS_ModelSet_get_window_size: get dynamic window size */
211 | size_t HTS_ModelSet_get_window_size(HTS_ModelSet * ms, size_t stream_index);
212 |
213 | /* HTS_ModelSet_get_window_left_width: get left width of dynamic window */
214 | int HTS_ModelSet_get_window_left_width(HTS_ModelSet * ms, size_t stream_index, size_t window_index);
215 |
216 | /* HTS_ModelSet_get_window_right_width: get right width of dynamic window */
217 | int HTS_ModelSet_get_window_right_width(HTS_ModelSet * ms, size_t stream_index, size_t window_index);
218 |
219 | /* HTS_ModelSet_get_window_coefficient: get coefficient of dynamic window */
220 | double HTS_ModelSet_get_window_coefficient(HTS_ModelSet * ms, size_t stream_index, size_t window_index, size_t coefficient_index);
221 |
222 | /* HTS_ModelSet_get_window_max_width: get max width of dynamic window */
223 | size_t HTS_ModelSet_get_window_max_width(HTS_ModelSet * ms, size_t stream_index);
224 |
225 | /* HTS_ModelSet_use_gv: get GV flag */
226 | HTS_Boolean HTS_ModelSet_use_gv(HTS_ModelSet * ms, size_t stream_index);
227 |
228 | /* HTS_ModelSet_get_duration_index: get index of duration tree and PDF */
229 | void HTS_ModelSet_get_duration_index(HTS_ModelSet * ms, size_t voice_index, const char *string, size_t * tree_index, size_t * pdf_index);
230 |
231 | /* HTS_ModelSet_get_duration: get duration using interpolation weight */
232 | void HTS_ModelSet_get_duration(HTS_ModelSet * ms, const char *string, const double *iw, double *mean, double *vari);
233 |
234 | /* HTS_ModelSet_get_parameter_index: get index of parameter tree and PDF */
235 | void HTS_ModelSet_get_parameter_index(HTS_ModelSet * ms, size_t voice_index, size_t stream_index, size_t state_index, const char *string, size_t * tree_index, size_t * pdf_index);
236 |
237 | /* HTS_ModelSet_get_parameter: get parameter using interpolation weight */
238 | void HTS_ModelSet_get_parameter(HTS_ModelSet * ms, size_t stream_index, size_t state_index, const char *string, const double *const *iw, double *mean, double *vari, double *msd);
239 |
240 | void HTS_ModelSet_get_gv_index(HTS_ModelSet * ms, size_t voice_index, size_t stream_index, const char *string, size_t * tree_index, size_t * pdf_index);
241 |
242 | /* HTS_ModelSet_get_gv: get GV using interpolation weight */
243 | void HTS_ModelSet_get_gv(HTS_ModelSet * ms, size_t stream_index, const char *string, const double *const *iw, double *mean, double *vari);
244 |
245 | /* HTS_ModelSet_clear: free model set */
246 | void HTS_ModelSet_clear(HTS_ModelSet * ms);
247 |
248 | /* label ----------------------------------------------------------- */
249 |
250 | /* HTS_Label_initialize: initialize label */
251 | void HTS_Label_initialize(HTS_Label * label);
252 |
253 | /* HTS_Label_load_from_fn: load label from file name */
254 | void HTS_Label_load_from_fn(HTS_Label * label, size_t sampling_rate, size_t fperiod, const char *fn);
255 |
256 | /* HTS_Label_load_from_strings: load label list from string list */
257 | void HTS_Label_load_from_strings(HTS_Label * label, size_t sampling_rate, size_t fperiod, char **lines, size_t num_lines);
258 |
259 | /* HTS_Label_get_size: get number of label string */
260 | size_t HTS_Label_get_size(HTS_Label * label);
261 |
262 | /* HTS_Label_get_string: get label string */
263 | const char *HTS_Label_get_string(HTS_Label * label, size_t index);
264 |
265 | /* HTS_Label_get_start_frame: get start frame */
266 | double HTS_Label_get_start_frame(HTS_Label * label, size_t index);
267 |
268 | /* HTS_Label_get_end_frame: get end frame */
269 | double HTS_Label_get_end_frame(HTS_Label * label, size_t index);
270 |
271 | /* HTS_Label_clear: free label */
272 | void HTS_Label_clear(HTS_Label * label);
273 |
274 | /* sstream --------------------------------------------------------- */
275 |
276 | /* HTS_SStreamSet_initialize: initialize state stream set */
277 | void HTS_SStreamSet_initialize(HTS_SStreamSet * sss);
278 |
279 | /* HTS_SStreamSet_create: parse label and determine state duration */
280 | HTS_Boolean HTS_SStreamSet_create(HTS_SStreamSet * sss, HTS_ModelSet * ms, HTS_Label * label, HTS_Boolean phoneme_alignment_flag, double speed, double *duration_iw, double **parameter_iw, double **gv_iw);
281 |
282 | /* HTS_SStreamSet_get_nstream: get number of stream */
283 | size_t HTS_SStreamSet_get_nstream(HTS_SStreamSet * sss);
284 |
285 | /* HTS_SStreamSet_get_vector_length: get vector length */
286 | size_t HTS_SStreamSet_get_vector_length(HTS_SStreamSet * sss, size_t stream_index);
287 |
288 | /* HTS_SStreamSet_is_msd: get MSD flag */
289 | HTS_Boolean HTS_SStreamSet_is_msd(HTS_SStreamSet * sss, size_t stream_index);
290 |
291 | /* HTS_SStreamSet_get_total_state: get total number of state */
292 | size_t HTS_SStreamSet_get_total_state(HTS_SStreamSet * sss);
293 |
294 | /* HTS_SStreamSet_get_total_frame: get total number of frame */
295 | size_t HTS_SStreamSet_get_total_frame(HTS_SStreamSet * sss);
296 |
297 | /* HTS_SStreamSet_get_msd: get msd parameter */
298 | double HTS_SStreamSet_get_msd(HTS_SStreamSet * sss, size_t stream_index, size_t state_index);
299 |
300 | /* HTS_SStreamSet_window_size: get dynamic window size */
301 | size_t HTS_SStreamSet_get_window_size(HTS_SStreamSet * sss, size_t stream_index);
302 |
303 | /* HTS_SStreamSet_get_window_left_width: get left width of dynamic window */
304 | int HTS_SStreamSet_get_window_left_width(HTS_SStreamSet * sss, size_t stream_index, size_t window_index);
305 |
306 | /* HTS_SStreamSet_get_window_right_width: get right width of dynamic window */
307 | int HTS_SStreamSet_get_window_right_width(HTS_SStreamSet * sss, size_t stream_index, size_t window_index);
308 |
309 | /* HTS_SStreamSet_get_window_coefficient: get coefficient of dynamic window */
310 | double HTS_SStreamSet_get_window_coefficient(HTS_SStreamSet * sss, size_t stream_index, size_t window_index, int coefficient_index);
311 |
312 | /* HTS_SStreamSet_get_window_max_width: get max width of dynamic window */
313 | size_t HTS_SStreamSet_get_window_max_width(HTS_SStreamSet * sss, size_t stream_index);
314 |
315 | /* HTS_SStreamSet_use_gv: get GV flag */
316 | HTS_Boolean HTS_SStreamSet_use_gv(HTS_SStreamSet * sss, size_t stream_index);
317 |
318 | /* HTS_SStreamSet_get_duration: get state duration */
319 | size_t HTS_SStreamSet_get_duration(HTS_SStreamSet * sss, size_t state_index);
320 |
321 | /* HTS_SStreamSet_get_mean: get mean parameter */
322 | double HTS_SStreamSet_get_mean(HTS_SStreamSet * sss, size_t stream_index, size_t state_index, size_t vector_index);
323 |
324 | /* HTS_SStreamSet_set_mean: set mean parameter */
325 | void HTS_SStreamSet_set_mean(HTS_SStreamSet * sss, size_t stream_index, size_t state_index, size_t vector_index, double f);
326 |
327 | /* HTS_SStreamSet_get_vari: get variance parameter */
328 | double HTS_SStreamSet_get_vari(HTS_SStreamSet * sss, size_t stream_index, size_t state_index, size_t vector_index);
329 |
330 | /* HTS_SStreamSet_set_vari: set variance parameter */
331 | void HTS_SStreamSet_set_vari(HTS_SStreamSet * sss, size_t stream_index, size_t state_index, size_t vector_index, double f);
332 |
333 | /* HTS_SStreamSet_get_gv_mean: get GV mean parameter */
334 | double HTS_SStreamSet_get_gv_mean(HTS_SStreamSet * sss, size_t stream_index, size_t vector_index);
335 |
336 | /* HTS_SStreamSet_get_gv_mean: get GV variance parameter */
337 | double HTS_SStreamSet_get_gv_vari(HTS_SStreamSet * sss, size_t stream_index, size_t vector_index);
338 |
339 | /* HTS_SStreamSet_set_gv_switch: set GV switch */
340 | void HTS_SStreamSet_set_gv_switch(HTS_SStreamSet * sss, size_t stream_index, size_t state_index, HTS_Boolean i);
341 |
342 | /* HTS_SStreamSet_get_gv_switch: get GV switch */
343 | HTS_Boolean HTS_SStreamSet_get_gv_switch(HTS_SStreamSet * sss, size_t stream_index, size_t state_index);
344 |
345 | /* HTS_SStreamSet_clear: free state stream set */
346 | void HTS_SStreamSet_clear(HTS_SStreamSet * sss);
347 |
348 | /* pstream --------------------------------------------------------- */
349 |
350 | /* check variance in finv() */
351 | #define INFTY ((double) 1.0e+38)
352 | #define INFTY2 ((double) 1.0e+19)
353 | #define INVINF ((double) 1.0e-38)
354 | #define INVINF2 ((double) 1.0e-19)
355 |
356 | /* GV */
357 | #define STEPINIT 0.1
358 | #define STEPDEC 0.5
359 | #define STEPINC 1.2
360 | #define W1 1.0
361 | #define W2 1.0
362 | #define GV_MAX_ITERATION 5
363 |
364 | /* HTS_PStreamSet_initialize: initialize parameter stream set */
365 | void HTS_PStreamSet_initialize(HTS_PStreamSet * pss);
366 |
367 | /* HTS_PStreamSet_create: parameter generation using GV weight */
368 | HTS_Boolean HTS_PStreamSet_create(HTS_PStreamSet * pss, HTS_SStreamSet * sss, double *msd_threshold, double *gv_weight);
369 |
370 | /* HTS_PStreamSet_get_nstream: get number of stream */
371 | size_t HTS_PStreamSet_get_nstream(HTS_PStreamSet * pss);
372 |
373 | /* HTS_PStreamSet_get_static_length: get features length */
374 | size_t HTS_PStreamSet_get_vector_length(HTS_PStreamSet * pss, size_t stream_index);
375 |
376 | /* HTS_PStreamSet_get_total_frame: get total number of frame */
377 | size_t HTS_PStreamSet_get_total_frame(HTS_PStreamSet * pss);
378 |
379 | /* HTS_PStreamSet_get_parameter: get parameter */
380 | double HTS_PStreamSet_get_parameter(HTS_PStreamSet * pss, size_t stream_index, size_t frame_index, size_t vector_index);
381 |
382 | /* HTS_PStreamSet_get_parameter_vector: get parameter vector */
383 | double *HTS_PStreamSet_get_parameter_vector(HTS_PStreamSet * pss, size_t stream_index, size_t frame_index);
384 |
385 | /* HTS_PStreamSet_get_msd_flag: get generated MSD flag per frame */
386 | HTS_Boolean HTS_PStreamSet_get_msd_flag(HTS_PStreamSet * pss, size_t stream_index, size_t frame_index);
387 |
388 | /* HTS_PStreamSet_is_msd: get MSD flag */
389 | HTS_Boolean HTS_PStreamSet_is_msd(HTS_PStreamSet * pss, size_t stream_index);
390 |
391 | /* HTS_PStreamSet_clear: free parameter stream set */
392 | void HTS_PStreamSet_clear(HTS_PStreamSet * pss);
393 |
394 | /* gstream --------------------------------------------------------- */
395 |
396 | /* HTS_GStreamSet_initialize: initialize generated parameter stream set */
397 | void HTS_GStreamSet_initialize(HTS_GStreamSet * gss);
398 |
399 | /* HTS_GStreamSet_create: generate speech */
400 | HTS_Boolean HTS_GStreamSet_create(HTS_GStreamSet * gss, HTS_PStreamSet * pss, size_t stage, HTS_Boolean use_log_gain, size_t sampling_rate, size_t fperiod, double alpha, double beta, HTS_Boolean * stop, double volume, HTS_Audio * audio);
401 |
402 | /* HTS_GStreamSet_get_total_nsamples: get total number of sample */
403 | size_t HTS_GStreamSet_get_total_nsamples(HTS_GStreamSet * gss);
404 |
405 | /* HTS_GStreamSet_get_total_frame: get total number of frame */
406 | size_t HTS_GStreamSet_get_total_frame(HTS_GStreamSet * gss);
407 |
408 | /* HTS_GStreamSet_get_static_length: get features length */
409 | size_t HTS_GStreamSet_get_vector_length(HTS_GStreamSet * gss, size_t stream_index);
410 |
411 | /* HTS_GStreamSet_get_speech: get synthesized speech parameter */
412 | double HTS_GStreamSet_get_speech(HTS_GStreamSet * gss, size_t sample_index);
413 |
414 | /* HTS_GStreamSet_get_parameter: get generated parameter */
415 | double HTS_GStreamSet_get_parameter(HTS_GStreamSet * gss, size_t stream_index, size_t frame_index, size_t vector_index);
416 |
417 | /* HTS_GStreamSet_clear: free generated parameter stream set */
418 | void HTS_GStreamSet_clear(HTS_GStreamSet * gss);
419 |
420 | /* vocoder --------------------------------------------------------- */
421 |
422 | #ifndef LZERO
423 | #define LZERO (-1.0e+10) /* ~log(0) */
424 | #endif /* !LZERO */
425 |
426 | #ifndef ZERO
427 | #define ZERO (1.0e-10) /* ~(0) */
428 | #endif /* !ZERO */
429 |
430 | #ifndef PI
431 | #define PI 3.14159265358979323846
432 | #endif /* !PI */
433 |
434 | #ifndef PI2
435 | #define PI2 6.28318530717958647692
436 | #endif /* !PI2 */
437 |
438 | #define RANDMAX 32767
439 |
440 | #define SEED 1
441 | #define B0 0x00000001
442 | #define B28 0x10000000
443 | #define B31 0x80000000
444 | #define B31_ 0x7fffffff
445 | #define Z 0x00000000
446 |
447 | #ifdef HTS_EMBEDDED
448 | #define GAUSS FALSE
449 | #define PADEORDER 4 /* pade order (for MLSA filter) */
450 | #define IRLENG 384 /* length of impulse response */
451 | #else
452 | #define GAUSS TRUE
453 | #define PADEORDER 5
454 | #define IRLENG 576
455 | #endif /* HTS_EMBEDDED */
456 |
457 | #define CHECK_LSP_STABILITY_MIN 0.25
458 | #define CHECK_LSP_STABILITY_NUM 4
459 |
460 | /* for MGLSA filter */
461 | #define NORMFLG1 TRUE
462 | #define NORMFLG2 FALSE
463 | #define MULGFLG1 TRUE
464 | #define MULGFLG2 FALSE
465 | #define NGAIN FALSE
466 |
467 | /* HTS_Vocoder: structure for setting of vocoder */
468 | typedef struct _HTS_Vocoder {
469 | HTS_Boolean is_first;
470 | size_t stage; /* Gamma=-1/stage: if stage=0 then Gamma=0 */
471 | double gamma; /* Gamma */
472 | HTS_Boolean use_log_gain; /* log gain flag (for LSP) */
473 | size_t fprd; /* frame shift */
474 | unsigned long next; /* temporary variable for random generator */
475 | HTS_Boolean gauss; /* flag to use Gaussian noise */
476 | double rate; /* sampling rate */
477 | double pitch_of_curr_point; /* used in excitation generation */
478 | double pitch_counter; /* used in excitation generation */
479 | double pitch_inc_per_point; /* used in excitation generation */
480 | double *excite_ring_buff; /* used in excitation generation */
481 | size_t excite_buff_size; /* used in excitation generation */
482 | size_t excite_buff_index; /* used in excitation generation */
483 | unsigned char sw; /* switch used in random generator */
484 | int x; /* excitation signal */
485 | double *freqt_buff; /* used in freqt */
486 | size_t freqt_size; /* buffer size for freqt */
487 | double *spectrum2en_buff; /* used in spectrum2en */
488 | size_t spectrum2en_size; /* buffer size for spectrum2en */
489 | double r1, r2, s; /* used in random generator */
490 | double *postfilter_buff; /* used in postfiltering */
491 | size_t postfilter_size; /* buffer size for postfiltering */
492 | double *c, *cc, *cinc, *d1; /* used in the MLSA/MGLSA filter */
493 | double *lsp2lpc_buff; /* used in lsp2lpc */
494 | size_t lsp2lpc_size; /* buffer size of lsp2lpc */
495 | double *gc2gc_buff; /* used in gc2gc */
496 | size_t gc2gc_size; /* buffer size for gc2gc */
497 | } HTS_Vocoder;
498 |
499 | /* HTS_Vocoder_initialize: initialize vocoder */
500 | void HTS_Vocoder_initialize(HTS_Vocoder * v, size_t m, size_t stage, HTS_Boolean use_log_gain, size_t rate, size_t fperiod);
501 |
502 | /* HTS_Vocoder_synthesize: pulse/noise excitation and MLSA/MGLSA filster based waveform synthesis */
503 | void HTS_Vocoder_synthesize(HTS_Vocoder * v, size_t m, double lf0, double *spectrum, size_t nlpf, double *lpf, double alpha, double beta, double volume, double *rawdata, HTS_Audio * audio);
504 |
505 | /* HTS_Vocoder_clear: clear vocoder */
506 | void HTS_Vocoder_clear(HTS_Vocoder * v);
507 |
508 | HTS_HIDDEN_H_END;
509 |
510 | #endif /* !HTS_HIDDEN_H */
511 |
--------------------------------------------------------------------------------
/src/lib/HTS_label.c:
--------------------------------------------------------------------------------
1 | /* ----------------------------------------------------------------- */
2 | /* The HMM-Based Speech Synthesis Engine "hts_engine API" */
3 | /* developed by HTS Working Group */
4 | /* http://hts-engine.sourceforge.net/ */
5 | /* ----------------------------------------------------------------- */
6 | /* */
7 | /* Copyright (c) 2001-2014 Nagoya Institute of Technology */
8 | /* Department of Computer Science */
9 | /* */
10 | /* 2001-2008 Tokyo Institute of Technology */
11 | /* Interdisciplinary Graduate School of */
12 | /* Science and Engineering */
13 | /* */
14 | /* All rights reserved. */
15 | /* */
16 | /* Redistribution and use in source and binary forms, with or */
17 | /* without modification, are permitted provided that the following */
18 | /* conditions are met: */
19 | /* */
20 | /* - Redistributions of source code must retain the above copyright */
21 | /* notice, this list of conditions and the following disclaimer. */
22 | /* - Redistributions in binary form must reproduce the above */
23 | /* copyright notice, this list of conditions and the following */
24 | /* disclaimer in the documentation and/or other materials provided */
25 | /* with the distribution. */
26 | /* - Neither the name of the HTS working group nor the names of its */
27 | /* contributors may be used to endorse or promote products derived */
28 | /* from this software without specific prior written permission. */
29 | /* */
30 | /* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND */
31 | /* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, */
32 | /* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
33 | /* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
34 | /* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS */
35 | /* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, */
36 | /* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED */
37 | /* TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, */
38 | /* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON */
39 | /* ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, */
40 | /* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY */
41 | /* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
42 | /* POSSIBILITY OF SUCH DAMAGE. */
43 | /* ----------------------------------------------------------------- */
44 |
45 | #ifndef HTS_LABEL_C
46 | #define HTS_LABEL_C
47 |
48 | #ifdef __cplusplus
49 | #define HTS_LABEL_C_START extern "C" {
50 | #define HTS_LABEL_C_END }
51 | #else
52 | #define HTS_LABEL_C_START
53 | #define HTS_LABEL_C_END
54 | #endif /* __CPLUSPLUS */
55 |
56 | HTS_LABEL_C_START;
57 |
58 | #include /* for atof() */
59 | #include /* for isgraph(),isdigit() */
60 |
61 | /* hts_engine libraries */
62 | #include "HTS_hidden.h"
63 |
64 | static HTS_Boolean isdigit_string(char *str)
65 | {
66 | int i;
67 |
68 | if (sscanf(str, "%d", &i) == 1)
69 | return TRUE;
70 | else
71 | return FALSE;
72 | }
73 |
74 | /* HTS_Label_initialize: initialize label */
75 | void HTS_Label_initialize(HTS_Label * label)
76 | {
77 | label->head = NULL;
78 | label->size = 0;
79 | }
80 |
81 | /* HTS_Label_check_time: check label */
82 | static void HTS_Label_check_time(HTS_Label * label)
83 | {
84 | HTS_LabelString *lstring = label->head;
85 | HTS_LabelString *next = NULL;
86 |
87 | if (lstring)
88 | lstring->start = 0.0;
89 | while (lstring) {
90 | next = lstring->next;
91 | if (!next)
92 | break;
93 | if (lstring->end < 0.0 && next->start >= 0.0)
94 | lstring->end = next->start;
95 | else if (lstring->end >= 0.0 && next->start < 0.0)
96 | next->start = lstring->end;
97 | if (lstring->start < 0.0)
98 | lstring->start = -1.0;
99 | if (lstring->end < 0.0)
100 | lstring->end = -1.0;
101 | lstring = next;
102 | }
103 | }
104 |
105 | /* HTS_Label_load: load label */
106 | static void HTS_Label_load(HTS_Label * label, size_t sampling_rate, size_t fperiod, HTS_File * fp)
107 | {
108 | char buff[HTS_MAXBUFLEN];
109 | HTS_LabelString *lstring = NULL;
110 | double start, end;
111 | const double rate = (double) sampling_rate / ((double) fperiod * 1e+7);
112 |
113 | if (label->head || label->size != 0) {
114 | HTS_error(1, "HTS_Label_load_from_fp: label is not initialized.\n");
115 | return;
116 | }
117 |
118 | /* parse label file */
119 | while (HTS_get_token_from_fp(fp, buff)) {
120 | if (!isgraph((int) buff[0]))
121 | break;
122 | label->size++;
123 |
124 | if (lstring) {
125 | lstring->next = (HTS_LabelString *) HTS_calloc(1, sizeof(HTS_LabelString));
126 | lstring = lstring->next;
127 | } else { /* first time */
128 | lstring = (HTS_LabelString *) HTS_calloc(1, sizeof(HTS_LabelString));
129 | label->head = lstring;
130 | }
131 | if (isdigit_string(buff)) { /* has frame infomation */
132 | start = atof(buff);
133 | HTS_get_token_from_fp(fp, buff);
134 | end = atof(buff);
135 | HTS_get_token_from_fp(fp, buff);
136 | lstring->start = rate * start;
137 | lstring->end = rate * end;
138 | } else {
139 | lstring->start = -1.0;
140 | lstring->end = -1.0;
141 | }
142 | lstring->next = NULL;
143 | lstring->name = HTS_strdup(buff);
144 | }
145 | HTS_Label_check_time(label);
146 | }
147 |
148 | /* HTS_Label_load_from_fn: load label from file name */
149 | void HTS_Label_load_from_fn(HTS_Label * label, size_t sampling_rate, size_t fperiod, const char *fn)
150 | {
151 | HTS_File *fp = HTS_fopen_from_fn(fn, "r");
152 | HTS_Label_load(label, sampling_rate, fperiod, fp);
153 | HTS_fclose(fp);
154 | }
155 |
156 | /* HTS_Label_load_from_strings: load label from strings */
157 | void HTS_Label_load_from_strings(HTS_Label * label, size_t sampling_rate, size_t fperiod, char **lines, size_t num_lines)
158 | {
159 | char buff[HTS_MAXBUFLEN];
160 | HTS_LabelString *lstring = NULL;
161 | size_t i;
162 | size_t data_index;
163 | double start, end;
164 | const double rate = (double) sampling_rate / ((double) fperiod * 1e+7);
165 |
166 | if (label->head || label->size != 0) {
167 | HTS_error(1, "HTS_Label_load_from_fp: label list is not initialized.\n");
168 | return;
169 | }
170 | /* copy label */
171 | for (i = 0; i < num_lines; i++) {
172 | if (!isgraph((int) lines[i][0]))
173 | break;
174 | label->size++;
175 |
176 | if (lstring) {
177 | lstring->next = (HTS_LabelString *) HTS_calloc(1, sizeof(HTS_LabelString));
178 | lstring = lstring->next;
179 | } else { /* first time */
180 | lstring = (HTS_LabelString *) HTS_calloc(1, sizeof(HTS_LabelString));
181 | label->head = lstring;
182 | }
183 | data_index = 0;
184 | if (isdigit_string(lines[i])) { /* has frame infomation */
185 | HTS_get_token_from_string(lines[i], &data_index, buff);
186 | start = atof(buff);
187 | HTS_get_token_from_string(lines[i], &data_index, buff);
188 | end = atof(buff);
189 | HTS_get_token_from_string(lines[i], &data_index, buff);
190 | lstring->name = HTS_strdup(buff);
191 | lstring->start = rate * start;
192 | lstring->end = rate * end;
193 | } else {
194 | lstring->start = -1.0;
195 | lstring->end = -1.0;
196 | lstring->name = HTS_strdup(lines[i]);
197 | }
198 | lstring->next = NULL;
199 | }
200 | HTS_Label_check_time(label);
201 | }
202 |
203 | /* HTS_Label_get_size: get number of label string */
204 | size_t HTS_Label_get_size(HTS_Label * label)
205 | {
206 | return label->size;
207 | }
208 |
209 | /* HTS_Label_get_string: get label string */
210 | const char *HTS_Label_get_string(HTS_Label * label, size_t index)
211 | {
212 | size_t i;
213 | HTS_LabelString *lstring = label->head;
214 |
215 | for (i = 0; i < index && lstring; i++)
216 | lstring = lstring->next;
217 | if (!lstring)
218 | return NULL;
219 | return lstring->name;
220 | }
221 |
222 | /* HTS_Label_get_start_frame: get start frame */
223 | double HTS_Label_get_start_frame(HTS_Label * label, size_t index)
224 | {
225 | size_t i;
226 | HTS_LabelString *lstring = label->head;
227 |
228 | for (i = 0; i < index && lstring; i++)
229 | lstring = lstring->next;
230 | if (!lstring)
231 | return -1.0;
232 | return lstring->start;
233 | }
234 |
235 | /* HTS_Label_get_end_frame: get end frame */
236 | double HTS_Label_get_end_frame(HTS_Label * label, size_t index)
237 | {
238 | size_t i;
239 | HTS_LabelString *lstring = label->head;
240 |
241 | for (i = 0; i < index && lstring; i++)
242 | lstring = lstring->next;
243 | if (!lstring)
244 | return -1.0;
245 | return lstring->end;
246 | }
247 |
248 | /* HTS_Label_clear: free label */
249 | void HTS_Label_clear(HTS_Label * label)
250 | {
251 | HTS_LabelString *lstring, *next_lstring;
252 |
253 | for (lstring = label->head; lstring; lstring = next_lstring) {
254 | next_lstring = lstring->next;
255 | HTS_free(lstring->name);
256 | HTS_free(lstring);
257 | }
258 | HTS_Label_initialize(label);
259 | }
260 |
261 | HTS_LABEL_C_END;
262 |
263 | #endif /* !HTS_LABEL_C */
264 |
--------------------------------------------------------------------------------
/src/lib/HTS_misc.c:
--------------------------------------------------------------------------------
1 | /* ----------------------------------------------------------------- */
2 | /* The HMM-Based Speech Synthesis Engine "hts_engine API" */
3 | /* developed by HTS Working Group */
4 | /* http://hts-engine.sourceforge.net/ */
5 | /* ----------------------------------------------------------------- */
6 | /* */
7 | /* Copyright (c) 2001-2014 Nagoya Institute of Technology */
8 | /* Department of Computer Science */
9 | /* */
10 | /* 2001-2008 Tokyo Institute of Technology */
11 | /* Interdisciplinary Graduate School of */
12 | /* Science and Engineering */
13 | /* */
14 | /* All rights reserved. */
15 | /* */
16 | /* Redistribution and use in source and binary forms, with or */
17 | /* without modification, are permitted provided that the following */
18 | /* conditions are met: */
19 | /* */
20 | /* - Redistributions of source code must retain the above copyright */
21 | /* notice, this list of conditions and the following disclaimer. */
22 | /* - Redistributions in binary form must reproduce the above */
23 | /* copyright notice, this list of conditions and the following */
24 | /* disclaimer in the documentation and/or other materials provided */
25 | /* with the distribution. */
26 | /* - Neither the name of the HTS working group nor the names of its */
27 | /* contributors may be used to endorse or promote products derived */
28 | /* from this software without specific prior written permission. */
29 | /* */
30 | /* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND */
31 | /* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, */
32 | /* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
33 | /* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
34 | /* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS */
35 | /* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, */
36 | /* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED */
37 | /* TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, */
38 | /* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON */
39 | /* ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, */
40 | /* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY */
41 | /* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
42 | /* POSSIBILITY OF SUCH DAMAGE. */
43 | /* ----------------------------------------------------------------- */
44 |
45 | #ifndef HTS_MISC_C
46 | #define HTS_MISC_C
47 |
48 | #ifdef __cplusplus
49 | #define HTS_MISC_C_START extern "C" {
50 | #define HTS_MISC_C_END }
51 | #else
52 | #define HTS_MISC_C_START
53 | #define HTS_MISC_C_END
54 | #endif /* __CPLUSPLUS */
55 |
56 | HTS_MISC_C_START;
57 |
58 | #include /* for exit(),calloc(),free() */
59 | #include /* for va_list */
60 | #include /* for strcpy(),strlen() */
61 |
62 | /* hts_engine libraries */
63 | #include "HTS_hidden.h"
64 |
65 | #ifdef FESTIVAL
66 | #include "EST_walloc.h"
67 | #endif /* FESTIVAL */
68 |
69 | #define HTS_FILE 0
70 | #define HTS_DATA 1
71 |
72 | typedef struct _HTS_Data {
73 | unsigned char *data;
74 | size_t size;
75 | size_t index;
76 | } HTS_Data;
77 |
78 | /* HTS_fopen_from_fn: wrapper for fopen */
79 | HTS_File *HTS_fopen_from_fn(const char *name, const char *opt)
80 | {
81 | HTS_File *fp = (HTS_File *) HTS_calloc(1, sizeof(HTS_File));
82 |
83 | fp->type = HTS_FILE;
84 | fp->pointer = (void *) fopen(name, opt);
85 |
86 | if (fp->pointer == NULL) {
87 | HTS_error(0, "HTS_fopen: Cannot open %s.\n", name);
88 | HTS_free(fp);
89 | return NULL;
90 | }
91 |
92 | return fp;
93 | }
94 |
95 | /* HTS_fopen_from_fp: wrapper for fopen */
96 | HTS_File *HTS_fopen_from_fp(HTS_File * fp, size_t size)
97 | {
98 | if (fp == NULL || size == 0)
99 | return NULL;
100 | else if (fp->type == HTS_FILE) {
101 | HTS_Data *d;
102 | HTS_File *f;
103 | d = (HTS_Data *) HTS_calloc(1, sizeof(HTS_Data));
104 | d->data = (unsigned char *) HTS_calloc(size, sizeof(unsigned char));
105 | d->size = size;
106 | d->index = 0;
107 | if (fread(d->data, sizeof(unsigned char), size, (FILE *) fp->pointer) != size) {
108 | free(d->data);
109 | free(d);
110 | return NULL;
111 | }
112 | f = (HTS_File *) HTS_calloc(1, sizeof(HTS_File));
113 | f->type = HTS_DATA;
114 | f->pointer = (void *) d;
115 | return f;
116 | } else if (fp->type == HTS_DATA) {
117 | HTS_File *f;
118 | HTS_Data *tmp1, *tmp2;
119 | tmp1 = (HTS_Data *) fp->pointer;
120 | if (tmp1->index + size > tmp1->size)
121 | return NULL;
122 | tmp2 = (HTS_Data *) HTS_calloc(1, sizeof(HTS_Data));
123 | tmp2->data = (unsigned char *) HTS_calloc(size, sizeof(unsigned char));
124 | tmp2->size = size;
125 | tmp2->index = 0;
126 | memcpy(tmp2->data, &tmp1->data[tmp1->index], size);
127 | tmp1->index += size;
128 | f = (HTS_File *) HTS_calloc(1, sizeof(HTS_File));
129 | f->type = HTS_DATA;
130 | f->pointer = (void *) tmp2;
131 | return f;
132 | }
133 |
134 | HTS_error(0, "HTS_fopen_from_fp: Unknown file type.\n");
135 | return NULL;
136 | }
137 |
138 | /* HTS_fopen_from_data: wrapper for fopen */
139 | HTS_File *HTS_fopen_from_data(void *data, size_t size)
140 | {
141 | HTS_Data *d;
142 | HTS_File *f;
143 |
144 | if (data == NULL || size == 0)
145 | return NULL;
146 |
147 | d = (HTS_Data *) HTS_calloc(1, sizeof(HTS_Data));
148 | d->data = (unsigned char *) HTS_calloc(size, sizeof(unsigned char));
149 | d->size = size;
150 | d->index = 0;
151 |
152 | memcpy(d->data, data, size);
153 |
154 | f = (HTS_File *) HTS_calloc(1, sizeof(HTS_File));
155 | f->type = HTS_DATA;
156 | f->pointer = (void *) d;
157 |
158 | return f;
159 | }
160 |
161 | /* HTS_fclose: wrapper for fclose */
162 | void HTS_fclose(HTS_File * fp)
163 | {
164 | if (fp == NULL) {
165 | return;
166 | } else if (fp->type == HTS_FILE) {
167 | if (fp->pointer != NULL)
168 | fclose((FILE *) fp->pointer);
169 | HTS_free(fp);
170 | return;
171 | } else if (fp->type == HTS_DATA) {
172 | if (fp->pointer != NULL) {
173 | HTS_Data *d = (HTS_Data *) fp->pointer;
174 | if (d->data != NULL)
175 | HTS_free(d->data);
176 | HTS_free(d);
177 | }
178 | HTS_free(fp);
179 | return;
180 | }
181 | HTS_error(0, "HTS_fclose: Unknown file type.\n");
182 | }
183 |
184 | /* HTS_fgetc: wrapper for fgetc */
185 | int HTS_fgetc(HTS_File * fp)
186 | {
187 | if (fp == NULL) {
188 | return EOF;
189 | } else if (fp->type == HTS_FILE) {
190 | return fgetc((FILE *) fp->pointer);
191 | } else if (fp->type == HTS_DATA) {
192 | HTS_Data *d = (HTS_Data *) fp->pointer;
193 | if (d->size <= d->index)
194 | return EOF;
195 | return (int) d->data[d->index++];
196 | }
197 | HTS_error(0, "HTS_fgetc: Unknown file type.\n");
198 | return EOF;
199 | }
200 |
201 | /* HTS_feof: wrapper for feof */
202 | int HTS_feof(HTS_File * fp)
203 | {
204 | if (fp == NULL) {
205 | return 1;
206 | } else if (fp->type == HTS_FILE) {
207 | return feof((FILE *) fp->pointer);
208 | } else if (fp->type == HTS_DATA) {
209 | HTS_Data *d = (HTS_Data *) fp->pointer;
210 | return d->size <= d->index ? 1 : 0;
211 | }
212 | HTS_error(0, "HTS_feof: Unknown file type.\n");
213 | return 1;
214 | }
215 |
216 | /* HTS_fseek: wrapper for fseek */
217 | int HTS_fseek(HTS_File * fp, long offset, int origin)
218 | {
219 | if (fp == NULL) {
220 | return 1;
221 | } else if (fp->type == HTS_FILE) {
222 | return fseek((FILE *) fp->pointer, offset, origin);
223 | } else if (fp->type == HTS_DATA) {
224 | HTS_Data *d = (HTS_Data *) fp->pointer;
225 | if (origin == SEEK_SET) {
226 | d->index = (size_t) offset;
227 | } else if (origin == SEEK_CUR) {
228 | d->index += offset;
229 | } else if (origin == SEEK_END) {
230 | d->index = d->size + offset;
231 | } else {
232 | return 1;
233 | }
234 | return 0;
235 | }
236 | HTS_error(0, "HTS_fseek: Unknown file type.\n");
237 | return 1;
238 | }
239 |
240 | /* HTS_ftell: rapper for ftell */
241 | size_t HTS_ftell(HTS_File * fp)
242 | {
243 | if (fp == NULL) {
244 | return 0;
245 | } else if (fp->type == HTS_FILE) {
246 | fpos_t pos;
247 | fgetpos((FILE *) fp->pointer, &pos);
248 | #if defined(_WIN32) || defined(__CYGWIN__) || defined(__APPLE__) || defined(__ANDROID__)
249 | return (size_t) pos;
250 | #else
251 | return (size_t) pos.__pos;
252 | #endif /* _WIN32 || __CYGWIN__ || __APPLE__ || __ANDROID__ */
253 | } else if (fp->type == HTS_DATA) {
254 | HTS_Data *d = (HTS_Data *) fp->pointer;
255 | return d->index;
256 | }
257 | HTS_error(0, "HTS_ftell: Unknown file type.\n");
258 | return 0;
259 | }
260 |
261 | /* HTS_fread: wrapper for fread */
262 | static size_t HTS_fread(void *buf, size_t size, size_t n, HTS_File * fp)
263 | {
264 | if (fp == NULL || size == 0 || n == 0) {
265 | return 0;
266 | }
267 | if (fp->type == HTS_FILE) {
268 | return fread(buf, size, n, (FILE *) fp->pointer);
269 | } else if (fp->type == HTS_DATA) {
270 | HTS_Data *d = (HTS_Data *) fp->pointer;
271 | size_t i, length = size * n;
272 | unsigned char *c = (unsigned char *) buf;
273 | for (i = 0; i < length; i++) {
274 | if (d->index < d->size)
275 | c[i] = d->data[d->index++];
276 | else
277 | break;
278 | }
279 | if (i == 0)
280 | return 0;
281 | else
282 | return i / size;
283 | }
284 | HTS_error(0, "HTS_fread: Unknown file type.\n");
285 | return 0;
286 | }
287 |
288 | /* HTS_byte_swap: byte swap */
289 | static void HTS_byte_swap(void *p, size_t size, size_t block)
290 | {
291 | char *q, tmp;
292 | size_t i, j;
293 |
294 | q = (char *) p;
295 |
296 | for (i = 0; i < block; i++) {
297 | for (j = 0; j < (size / 2); j++) {
298 | tmp = *(q + j);
299 | *(q + j) = *(q + (size - 1 - j));
300 | *(q + (size - 1 - j)) = tmp;
301 | }
302 | q += size;
303 | }
304 | }
305 |
306 | /* HTS_fread_big_endian: fread with byteswap */
307 | size_t HTS_fread_big_endian(void *buf, size_t size, size_t n, HTS_File * fp)
308 | {
309 | size_t block = HTS_fread(buf, size, n, fp);
310 |
311 | #ifdef WORDS_LITTLEENDIAN
312 | HTS_byte_swap(buf, size, block);
313 | #endif /* WORDS_LITTLEENDIAN */
314 |
315 | return block;
316 | }
317 |
318 | /* HTS_fread_little_endian: fread with byteswap */
319 | size_t HTS_fread_little_endian(void *buf, size_t size, size_t n, HTS_File * fp)
320 | {
321 | size_t block = HTS_fread(buf, size, n, fp);
322 |
323 | #ifdef WORDS_BIGENDIAN
324 | HTS_byte_swap(buf, size, block);
325 | #endif /* WORDS_BIGENDIAN */
326 |
327 | return block;
328 | }
329 |
330 | /* HTS_fwrite_little_endian: fwrite with byteswap */
331 | size_t HTS_fwrite_little_endian(const void *buf, size_t size, size_t n, FILE * fp)
332 | {
333 | #ifdef WORDS_BIGENDIAN
334 | HTS_byte_swap(buf, size, n * size);
335 | #endif /* WORDS_BIGENDIAN */
336 | return fwrite(buf, size, n, fp);
337 | }
338 |
339 | /* HTS_get_pattern_token: get pattern token (single/double quote can be used) */
340 | HTS_Boolean HTS_get_pattern_token(HTS_File * fp, char *buff)
341 | {
342 | char c;
343 | size_t i;
344 | HTS_Boolean squote = FALSE, dquote = FALSE;
345 |
346 | if (fp == NULL || HTS_feof(fp))
347 | return FALSE;
348 | c = HTS_fgetc(fp);
349 |
350 | while (c == ' ' || c == '\n') {
351 | if (HTS_feof(fp))
352 | return FALSE;
353 | c = HTS_fgetc(fp);
354 | }
355 |
356 | if (c == '\'') { /* single quote case */
357 | if (HTS_feof(fp))
358 | return FALSE;
359 | c = HTS_fgetc(fp);
360 | squote = TRUE;
361 | }
362 |
363 | if (c == '\"') { /*double quote case */
364 | if (HTS_feof(fp))
365 | return FALSE;
366 | c = HTS_fgetc(fp);
367 | dquote = TRUE;
368 | }
369 |
370 | if (c == ',') { /*special character ',' */
371 | strcpy(buff, ",");
372 | return TRUE;
373 | }
374 |
375 | i = 0;
376 | while (1) {
377 | buff[i++] = c;
378 | c = HTS_fgetc(fp);
379 | if (squote && c == '\'')
380 | break;
381 | if (dquote && c == '\"')
382 | break;
383 | if (!squote && !dquote) {
384 | if (c == ' ')
385 | break;
386 | if (c == '\n')
387 | break;
388 | if (HTS_feof(fp))
389 | break;
390 | }
391 | }
392 |
393 | buff[i] = '\0';
394 | return TRUE;
395 | }
396 |
397 | /* HTS_get_token: get token from file pointer (separators are space, tab, and line break) */
398 | HTS_Boolean HTS_get_token_from_fp(HTS_File * fp, char *buff)
399 | {
400 | char c;
401 | size_t i;
402 |
403 | if (fp == NULL || HTS_feof(fp))
404 | return FALSE;
405 | c = HTS_fgetc(fp);
406 | while (c == ' ' || c == '\n' || c == '\t') {
407 | if (HTS_feof(fp))
408 | return FALSE;
409 | c = HTS_fgetc(fp);
410 | if (c == EOF)
411 | return FALSE;
412 | }
413 |
414 | for (i = 0; c != ' ' && c != '\n' && c != '\t';) {
415 | buff[i++] = c;
416 | if (HTS_feof(fp))
417 | break;
418 | c = HTS_fgetc(fp);
419 | if (c == EOF)
420 | break;
421 | }
422 |
423 | buff[i] = '\0';
424 | return TRUE;
425 | }
426 |
427 | /* HTS_get_token_with_separator: get token from file pointer with specified separator */
428 | HTS_Boolean HTS_get_token_from_fp_with_separator(HTS_File * fp, char *buff, char separator)
429 | {
430 | char c;
431 | size_t i;
432 |
433 | if (fp == NULL || HTS_feof(fp))
434 | return FALSE;
435 | c = HTS_fgetc(fp);
436 | while (c == separator) {
437 | if (HTS_feof(fp))
438 | return FALSE;
439 | c = HTS_fgetc(fp);
440 | if (c == EOF)
441 | return FALSE;
442 | }
443 |
444 | for (i = 0; c != separator;) {
445 | buff[i++] = c;
446 | if (HTS_feof(fp))
447 | break;
448 | c = HTS_fgetc(fp);
449 | if (c == EOF)
450 | break;
451 | }
452 |
453 | buff[i] = '\0';
454 | return TRUE;
455 | }
456 |
457 | /* HTS_get_token_from_string: get token from string (separators are space, tab, and line break) */
458 | HTS_Boolean HTS_get_token_from_string(const char *string, size_t * index, char *buff)
459 | {
460 | char c;
461 | size_t i;
462 |
463 | c = string[(*index)];
464 | if (c == '\0')
465 | return FALSE;
466 | c = string[(*index)++];
467 | if (c == '\0')
468 | return FALSE;
469 | while (c == ' ' || c == '\n' || c == '\t') {
470 | if (c == '\0')
471 | return FALSE;
472 | c = string[(*index)++];
473 | }
474 | for (i = 0; c != ' ' && c != '\n' && c != '\t' && c != '\0'; i++) {
475 | buff[i] = c;
476 | c = string[(*index)++];
477 | }
478 |
479 | buff[i] = '\0';
480 | return TRUE;
481 | }
482 |
483 | /* HTS_get_token_from_string_with_separator: get token from string with specified separator */
484 | HTS_Boolean HTS_get_token_from_string_with_separator(const char *str, size_t * index, char *buff, char separator)
485 | {
486 | char c;
487 | size_t len = 0;
488 |
489 | if (str == NULL)
490 | return FALSE;
491 |
492 | c = str[(*index)];
493 | if (c == '\0')
494 | return FALSE;
495 | while (c == separator) {
496 | if (c == '\0')
497 | return FALSE;
498 | (*index)++;
499 | c = str[(*index)];
500 | }
501 | while (c != separator && c != '\0') {
502 | buff[len++] = c;
503 | (*index)++;
504 | c = str[(*index)];
505 | }
506 | if (c != '\0')
507 | (*index)++;
508 |
509 | buff[len] = '\0';
510 |
511 | if (len > 0)
512 | return TRUE;
513 | else
514 | return FALSE;
515 | }
516 |
517 | /* HTS_calloc: wrapper for calloc */
518 | void *HTS_calloc(const size_t num, const size_t size)
519 | {
520 | size_t n = num * size;
521 | void *mem;
522 |
523 | if (n == 0)
524 | return NULL;
525 |
526 | #ifdef FESTIVAL
527 | mem = (void *) safe_wcalloc(n);
528 | #else
529 | mem = (void *) malloc(n);
530 | #endif /* FESTIVAL */
531 |
532 | memset(mem, 0, n);
533 |
534 | if (mem == NULL)
535 | HTS_error(1, "HTS_calloc: Cannot allocate memory.\n");
536 |
537 | return mem;
538 | }
539 |
540 | /* HTS_Free: wrapper for free */
541 | void HTS_free(void *ptr)
542 | {
543 | #ifdef FESTIVAL
544 | wfree(ptr);
545 | #else
546 | free(ptr);
547 | #endif /* FESTIVAL */
548 | }
549 |
550 | /* HTS_strdup: wrapper for strdup */
551 | char *HTS_strdup(const char *string)
552 | {
553 | #ifdef FESTIVAL
554 | return (wstrdup(string));
555 | #else
556 | char *buff = (char *) HTS_calloc(strlen(string) + 1, sizeof(char));
557 | strcpy(buff, string);
558 | return buff;
559 | #endif /* FESTIVAL */
560 | }
561 |
562 | /* HTS_alloc_matrix: allocate double matrix */
563 | double **HTS_alloc_matrix(size_t x, size_t y)
564 | {
565 | size_t i;
566 | double **p;
567 |
568 | if (x == 0 || y == 0)
569 | return NULL;
570 |
571 | p = (double **) HTS_calloc(x, sizeof(double *));
572 |
573 | for (i = 0; i < x; i++)
574 | p[i] = (double *) HTS_calloc(y, sizeof(double));
575 | return p;
576 | }
577 |
578 | /* HTS_free_matrix: free double matrix */
579 | void HTS_free_matrix(double **p, size_t x)
580 | {
581 | size_t i;
582 |
583 | for (i = 0; i < x; i++)
584 | HTS_free(p[i]);
585 | HTS_free(p);
586 | }
587 |
588 | /* HTS_error: output error message */
589 | void HTS_error(int error, const char *message, ...)
590 | {
591 | va_list arg;
592 |
593 | fflush(stdout);
594 | fflush(stderr);
595 |
596 | if (error > 0)
597 | fprintf(stderr, "\nError: ");
598 | else
599 | fprintf(stderr, "\nWarning: ");
600 |
601 | va_start(arg, message);
602 | vfprintf(stderr, message, arg);
603 | va_end(arg);
604 |
605 | fflush(stderr);
606 |
607 | if (error > 0)
608 | exit(error);
609 | }
610 |
611 | HTS_MISC_C_END;
612 |
613 | #endif /* !HTS_MISC_C */
614 |
--------------------------------------------------------------------------------
/src/lib/HTS_pstream.c:
--------------------------------------------------------------------------------
1 | /* ----------------------------------------------------------------- */
2 | /* The HMM-Based Speech Synthesis Engine "hts_engine API" */
3 | /* developed by HTS Working Group */
4 | /* http://hts-engine.sourceforge.net/ */
5 | /* ----------------------------------------------------------------- */
6 | /* */
7 | /* Copyright (c) 2001-2014 Nagoya Institute of Technology */
8 | /* Department of Computer Science */
9 | /* */
10 | /* 2001-2008 Tokyo Institute of Technology */
11 | /* Interdisciplinary Graduate School of */
12 | /* Science and Engineering */
13 | /* */
14 | /* All rights reserved. */
15 | /* */
16 | /* Redistribution and use in source and binary forms, with or */
17 | /* without modification, are permitted provided that the following */
18 | /* conditions are met: */
19 | /* */
20 | /* - Redistributions of source code must retain the above copyright */
21 | /* notice, this list of conditions and the following disclaimer. */
22 | /* - Redistributions in binary form must reproduce the above */
23 | /* copyright notice, this list of conditions and the following */
24 | /* disclaimer in the documentation and/or other materials provided */
25 | /* with the distribution. */
26 | /* - Neither the name of the HTS working group nor the names of its */
27 | /* contributors may be used to endorse or promote products derived */
28 | /* from this software without specific prior written permission. */
29 | /* */
30 | /* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND */
31 | /* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, */
32 | /* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
33 | /* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
34 | /* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS */
35 | /* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, */
36 | /* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED */
37 | /* TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, */
38 | /* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON */
39 | /* ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, */
40 | /* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY */
41 | /* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
42 | /* POSSIBILITY OF SUCH DAMAGE. */
43 | /* ----------------------------------------------------------------- */
44 |
45 | #ifndef HTS_PSTREAM_C
46 | #define HTS_PSTREAM_C
47 |
48 | #ifdef __cplusplus
49 | #define HTS_PSTREAM_C_START extern "C" {
50 | #define HTS_PSTREAM_C_END }
51 | #else
52 | #define HTS_PSTREAM_C_START
53 | #define HTS_PSTREAM_C_END
54 | #endif /* __CPLUSPLUS */
55 |
56 | HTS_PSTREAM_C_START;
57 |
58 | #include /* for sqrt() */
59 |
60 | /* hts_engine libraries */
61 | #include "HTS_hidden.h"
62 |
63 | /* HTS_finv: calculate 1.0/variance function */
64 | static double HTS_finv(const double x)
65 | {
66 | if (x >= INFTY2)
67 | return 0.0;
68 | if (x <= -INFTY2)
69 | return 0.0;
70 | if (x <= INVINF2 && x >= 0)
71 | return INFTY;
72 | if (x >= -INVINF2 && x < 0)
73 | return -INFTY;
74 |
75 | return (1.0 / x);
76 | }
77 |
78 | /* HTS_PStream_calc_wuw_and_wum: calcurate W'U^{-1}W and W'U^{-1}M */
79 | static void HTS_PStream_calc_wuw_and_wum(HTS_PStream * pst, size_t m)
80 | {
81 | size_t t, i, j;
82 | int shift;
83 | double wu;
84 |
85 | for (t = 0; t < pst->length; t++) {
86 | /* initialize */
87 | pst->sm.wum[t] = 0.0;
88 | for (i = 0; i < pst->width; i++)
89 | pst->sm.wuw[t][i] = 0.0;
90 |
91 | /* calc WUW & WUM */
92 | for (i = 0; i < pst->win_size; i++)
93 | for (shift = pst->win_l_width[i]; shift <= pst->win_r_width[i]; shift++)
94 | if (((int) t + shift >= 0) && ((int) t + shift < pst->length) && (pst->win_coefficient[i][-shift] != 0.0)) {
95 | wu = pst->win_coefficient[i][-shift] * pst->sm.ivar[t + shift][i * pst->vector_length + m];
96 | pst->sm.wum[t] += wu * pst->sm.mean[t + shift][i * pst->vector_length + m];
97 | for (j = 0; (j < pst->width) && (t + j < pst->length); j++)
98 | if (((int) j <= pst->win_r_width[i] + shift) && (pst->win_coefficient[i][j - shift] != 0.0))
99 | pst->sm.wuw[t][j] += wu * pst->win_coefficient[i][j - shift];
100 | }
101 | }
102 | }
103 |
104 |
105 | /* HTS_PStream_ldl_factorization: Factorize W'*U^{-1}*W to L*D*L' (L: lower triangular, D: diagonal) */
106 | static void HTS_PStream_ldl_factorization(HTS_PStream * pst)
107 | {
108 | size_t t, i, j;
109 |
110 | for (t = 0; t < pst->length; t++) {
111 | for (i = 1; (i < pst->width) && (t >= i); i++)
112 | pst->sm.wuw[t][0] -= pst->sm.wuw[t - i][i] * pst->sm.wuw[t - i][i] * pst->sm.wuw[t - i][0];
113 |
114 | for (i = 1; i < pst->width; i++) {
115 | for (j = 1; (i + j < pst->width) && (t >= j); j++)
116 | pst->sm.wuw[t][i] -= pst->sm.wuw[t - j][j] * pst->sm.wuw[t - j][i + j] * pst->sm.wuw[t - j][0];
117 | pst->sm.wuw[t][i] /= pst->sm.wuw[t][0];
118 | }
119 | }
120 | }
121 |
122 | /* HTS_PStream_forward_substitution: forward subtitution for mlpg */
123 | static void HTS_PStream_forward_substitution(HTS_PStream * pst)
124 | {
125 | size_t t, i;
126 |
127 | for (t = 0; t < pst->length; t++) {
128 | pst->sm.g[t] = pst->sm.wum[t];
129 | for (i = 1; (i < pst->width) && (t >= i); i++)
130 | pst->sm.g[t] -= pst->sm.wuw[t - i][i] * pst->sm.g[t - i];
131 | }
132 | }
133 |
134 | /* HTS_PStream_backward_substitution: backward subtitution for mlpg */
135 | static void HTS_PStream_backward_substitution(HTS_PStream * pst, size_t m)
136 | {
137 | size_t rev, t, i;
138 |
139 | for (rev = 0; rev < pst->length; rev++) {
140 | t = pst->length - 1 - rev;
141 | pst->par[t][m] = pst->sm.g[t] / pst->sm.wuw[t][0];
142 | for (i = 1; (i < pst->width) && (t + i < pst->length); i++)
143 | pst->par[t][m] -= pst->sm.wuw[t][i] * pst->par[t + i][m];
144 | }
145 | }
146 |
147 | /* HTS_PStream_calc_gv: subfunction for mlpg using GV */
148 | static void HTS_PStream_calc_gv(HTS_PStream * pst, size_t m, double *mean, double *vari)
149 | {
150 | size_t t;
151 |
152 | *mean = 0.0;
153 | for (t = 0; t < pst->length; t++)
154 | if (pst->gv_switch[t])
155 | *mean += pst->par[t][m];
156 | *mean /= pst->gv_length;
157 | *vari = 0.0;
158 | for (t = 0; t < pst->length; t++)
159 | if (pst->gv_switch[t])
160 | *vari += (pst->par[t][m] - *mean) * (pst->par[t][m] - *mean);
161 | *vari /= pst->gv_length;
162 | }
163 |
164 | /* HTS_PStream_conv_gv: subfunction for mlpg using GV */
165 | static void HTS_PStream_conv_gv(HTS_PStream * pst, size_t m)
166 | {
167 | size_t t;
168 | double ratio;
169 | double mean;
170 | double vari;
171 |
172 | HTS_PStream_calc_gv(pst, m, &mean, &vari);
173 | ratio = sqrt(pst->gv_mean[m] / vari);
174 | for (t = 0; t < pst->length; t++)
175 | if (pst->gv_switch[t])
176 | pst->par[t][m] = ratio * (pst->par[t][m] - mean) + mean;
177 | }
178 |
179 | /* HTS_PStream_calc_derivative: subfunction for mlpg using GV */
180 | static double HTS_PStream_calc_derivative(HTS_PStream * pst, size_t m)
181 | {
182 | size_t t, i;
183 | double mean;
184 | double vari;
185 | double dv;
186 | double h;
187 | double gvobj;
188 | double hmmobj;
189 | double w = 1.0 / (pst->win_size * pst->length);
190 |
191 | HTS_PStream_calc_gv(pst, m, &mean, &vari);
192 | gvobj = -0.5 * W2 * vari * pst->gv_vari[m] * (vari - 2.0 * pst->gv_mean[m]);
193 | dv = -2.0 * pst->gv_vari[m] * (vari - pst->gv_mean[m]) / pst->length;
194 |
195 | for (t = 0; t < pst->length; t++) {
196 | pst->sm.g[t] = pst->sm.wuw[t][0] * pst->par[t][m];
197 | for (i = 1; i < pst->width; i++) {
198 | if (t + i < pst->length)
199 | pst->sm.g[t] += pst->sm.wuw[t][i] * pst->par[t + i][m];
200 | if (t + 1 > i)
201 | pst->sm.g[t] += pst->sm.wuw[t - i][i] * pst->par[t - i][m];
202 | }
203 | }
204 |
205 | for (t = 0, hmmobj = 0.0; t < pst->length; t++) {
206 | hmmobj += W1 * w * pst->par[t][m] * (pst->sm.wum[t] - 0.5 * pst->sm.g[t]);
207 | h = -W1 * w * pst->sm.wuw[t][1 - 1] - W2 * 2.0 / (pst->length * pst->length) * ((pst->length - 1) * pst->gv_vari[m] * (vari - pst->gv_mean[m]) + 2.0 * pst->gv_vari[m] * (pst->par[t][m] - mean) * (pst->par[t][m] - mean));
208 | if (pst->gv_switch[t])
209 | pst->sm.g[t] = 1.0 / h * (W1 * w * (-pst->sm.g[t] + pst->sm.wum[t]) + W2 * dv * (pst->par[t][m] - mean));
210 | else
211 | pst->sm.g[t] = 1.0 / h * (W1 * w * (-pst->sm.g[t] + pst->sm.wum[t]));
212 | }
213 |
214 | return (-(hmmobj + gvobj));
215 | }
216 |
217 | /* HTS_PStream_gv_parmgen: function for mlpg using GV */
218 | static void HTS_PStream_gv_parmgen(HTS_PStream * pst, size_t m)
219 | {
220 | size_t t, i;
221 | double step = STEPINIT;
222 | double prev = 0.0;
223 | double obj;
224 |
225 | if (pst->gv_length == 0)
226 | return;
227 |
228 | HTS_PStream_conv_gv(pst, m);
229 | if (GV_MAX_ITERATION > 0) {
230 | HTS_PStream_calc_wuw_and_wum(pst, m);
231 | for (i = 1; i <= GV_MAX_ITERATION; i++) {
232 | obj = HTS_PStream_calc_derivative(pst, m);
233 | if (i > 1) {
234 | if (obj > prev)
235 | step *= STEPDEC;
236 | if (obj < prev)
237 | step *= STEPINC;
238 | }
239 | for (t = 0; t < pst->length; t++)
240 | pst->par[t][m] += step * pst->sm.g[t];
241 | prev = obj;
242 | }
243 | }
244 | }
245 |
246 | /* HTS_PStream_mlpg: generate sequence of speech parameter vector maximizing its output probability for given pdf sequence */
247 | static void HTS_PStream_mlpg(HTS_PStream * pst)
248 | {
249 | size_t m;
250 |
251 | if (pst->length == 0)
252 | return;
253 |
254 | for (m = 0; m < pst->vector_length; m++) {
255 | HTS_PStream_calc_wuw_and_wum(pst, m);
256 | HTS_PStream_ldl_factorization(pst); /* LDL factorization */
257 | HTS_PStream_forward_substitution(pst); /* forward substitution */
258 | HTS_PStream_backward_substitution(pst, m); /* backward substitution */
259 | if (pst->gv_length > 0)
260 | HTS_PStream_gv_parmgen(pst, m);
261 | }
262 | }
263 |
264 | /* HTS_PStreamSet_initialize: initialize parameter stream set */
265 | void HTS_PStreamSet_initialize(HTS_PStreamSet * pss)
266 | {
267 | pss->pstream = NULL;
268 | pss->nstream = 0;
269 | pss->total_frame = 0;
270 | }
271 |
272 | /* HTS_PStreamSet_create: parameter generation using GV weight */
273 | HTS_Boolean HTS_PStreamSet_create(HTS_PStreamSet * pss, HTS_SStreamSet * sss, double *msd_threshold, double *gv_weight)
274 | {
275 | size_t i, j, k, l, m;
276 | int shift;
277 | size_t frame, msd_frame, state;
278 |
279 | HTS_PStream *pst;
280 | HTS_Boolean not_bound;
281 |
282 | if (pss->nstream != 0) {
283 | HTS_error(1, "HTS_PstreamSet_create: HTS_PStreamSet should be clear.\n");
284 | return FALSE;
285 | }
286 |
287 | /* initialize */
288 | pss->nstream = HTS_SStreamSet_get_nstream(sss);
289 | pss->pstream = (HTS_PStream *) HTS_calloc(pss->nstream, sizeof(HTS_PStream));
290 | pss->total_frame = HTS_SStreamSet_get_total_frame(sss);
291 |
292 | /* create */
293 | for (i = 0; i < pss->nstream; i++) {
294 | pst = &pss->pstream[i];
295 | if (HTS_SStreamSet_is_msd(sss, i)) { /* for MSD */
296 | pst->length = 0;
297 | for (state = 0; state < HTS_SStreamSet_get_total_state(sss); state++)
298 | if (HTS_SStreamSet_get_msd(sss, i, state) > msd_threshold[i])
299 | pst->length += HTS_SStreamSet_get_duration(sss, state);
300 | pst->msd_flag = (HTS_Boolean *) HTS_calloc(pss->total_frame, sizeof(HTS_Boolean));
301 | for (state = 0, frame = 0; state < HTS_SStreamSet_get_total_state(sss); state++)
302 | if (HTS_SStreamSet_get_msd(sss, i, state) > msd_threshold[i])
303 | for (j = 0; j < HTS_SStreamSet_get_duration(sss, state); j++) {
304 | pst->msd_flag[frame] = TRUE;
305 | frame++;
306 | } else
307 | for (j = 0; j < HTS_SStreamSet_get_duration(sss, state); j++) {
308 | pst->msd_flag[frame] = FALSE;
309 | frame++;
310 | }
311 | } else { /* for non MSD */
312 | pst->length = pss->total_frame;
313 | pst->msd_flag = NULL;
314 | }
315 | pst->vector_length = HTS_SStreamSet_get_vector_length(sss, i);
316 | pst->width = HTS_SStreamSet_get_window_max_width(sss, i) * 2 + 1; /* band width of R */
317 | pst->win_size = HTS_SStreamSet_get_window_size(sss, i);
318 | if (pst->length > 0) {
319 | pst->sm.mean = HTS_alloc_matrix(pst->length, pst->vector_length * pst->win_size);
320 | pst->sm.ivar = HTS_alloc_matrix(pst->length, pst->vector_length * pst->win_size);
321 | pst->sm.wum = (double *) HTS_calloc(pst->length, sizeof(double));
322 | pst->sm.wuw = HTS_alloc_matrix(pst->length, pst->width);
323 | pst->sm.g = (double *) HTS_calloc(pst->length, sizeof(double));
324 | pst->par = HTS_alloc_matrix(pst->length, pst->vector_length);
325 | }
326 | /* copy dynamic window */
327 | pst->win_l_width = (int *) HTS_calloc(pst->win_size, sizeof(int));
328 | pst->win_r_width = (int *) HTS_calloc(pst->win_size, sizeof(int));
329 | pst->win_coefficient = (double **) HTS_calloc(pst->win_size, sizeof(double));
330 | for (j = 0; j < pst->win_size; j++) {
331 | pst->win_l_width[j] = HTS_SStreamSet_get_window_left_width(sss, i, j);
332 | pst->win_r_width[j] = HTS_SStreamSet_get_window_right_width(sss, i, j);
333 | if (pst->win_l_width[j] + pst->win_r_width[j] == 0)
334 | pst->win_coefficient[j] = (double *)
335 | HTS_calloc(-2 * pst->win_l_width[j] + 1, sizeof(double));
336 | else
337 | pst->win_coefficient[j] = (double *)
338 | HTS_calloc(-2 * pst->win_l_width[j], sizeof(double));
339 | pst->win_coefficient[j] -= pst->win_l_width[j];
340 | for (shift = pst->win_l_width[j]; shift <= pst->win_r_width[j]; shift++)
341 | pst->win_coefficient[j][shift] = HTS_SStreamSet_get_window_coefficient(sss, i, j, shift);
342 | }
343 | /* copy GV */
344 | if (HTS_SStreamSet_use_gv(sss, i)) {
345 | pst->gv_mean = (double *) HTS_calloc(pst->vector_length, sizeof(double));
346 | pst->gv_vari = (double *) HTS_calloc(pst->vector_length, sizeof(double));
347 | for (j = 0; j < pst->vector_length; j++) {
348 | pst->gv_mean[j] = HTS_SStreamSet_get_gv_mean(sss, i, j) * gv_weight[i];
349 | pst->gv_vari[j] = HTS_SStreamSet_get_gv_vari(sss, i, j);
350 | }
351 | pst->gv_switch = (HTS_Boolean *) HTS_calloc(pst->length, sizeof(HTS_Boolean));
352 | if (HTS_SStreamSet_is_msd(sss, i)) { /* for MSD */
353 | for (state = 0, frame = 0, msd_frame = 0; state < HTS_SStreamSet_get_total_state(sss); state++)
354 | for (j = 0; j < HTS_SStreamSet_get_duration(sss, state); j++, frame++)
355 | if (pst->msd_flag[frame])
356 | pst->gv_switch[msd_frame++] = HTS_SStreamSet_get_gv_switch(sss, i, state);
357 | } else { /* for non MSD */
358 | for (state = 0, frame = 0; state < HTS_SStreamSet_get_total_state(sss); state++)
359 | for (j = 0; j < HTS_SStreamSet_get_duration(sss, state); j++)
360 | pst->gv_switch[frame++] = HTS_SStreamSet_get_gv_switch(sss, i, state);
361 | }
362 | for (j = 0, pst->gv_length = 0; j < pst->length; j++)
363 | if (pst->gv_switch[j])
364 | pst->gv_length++;
365 | } else {
366 | pst->gv_switch = NULL;
367 | pst->gv_length = 0;
368 | pst->gv_mean = NULL;
369 | pst->gv_vari = NULL;
370 | }
371 | /* copy pdfs */
372 | if (HTS_SStreamSet_is_msd(sss, i)) { /* for MSD */
373 | for (state = 0, frame = 0, msd_frame = 0; state < HTS_SStreamSet_get_total_state(sss); state++) {
374 | for (j = 0; j < HTS_SStreamSet_get_duration(sss, state); j++) {
375 | if (pst->msd_flag[frame]) {
376 | /* check current frame is MSD boundary or not */
377 | for (k = 0; k < pst->win_size; k++) {
378 | not_bound = TRUE;
379 | for (shift = pst->win_l_width[k]; shift <= pst->win_r_width[k]; shift++)
380 | if ((int) frame + shift < 0 || (int) pss->total_frame <= (int) frame + shift || !pst->msd_flag[frame + shift]) {
381 | not_bound = FALSE;
382 | break;
383 | }
384 | for (l = 0; l < pst->vector_length; l++) {
385 | m = pst->vector_length * k + l;
386 | pst->sm.mean[msd_frame][m] = HTS_SStreamSet_get_mean(sss, i, state, m);
387 | if (not_bound || k == 0)
388 | pst->sm.ivar[msd_frame][m] = HTS_finv(HTS_SStreamSet_get_vari(sss, i, state, m));
389 | else
390 | pst->sm.ivar[msd_frame][m] = 0.0;
391 | }
392 | }
393 | msd_frame++;
394 | }
395 | frame++;
396 | }
397 | }
398 | } else { /* for non MSD */
399 | for (state = 0, frame = 0; state < HTS_SStreamSet_get_total_state(sss); state++) {
400 | for (j = 0; j < HTS_SStreamSet_get_duration(sss, state); j++) {
401 | for (k = 0; k < pst->win_size; k++) {
402 | not_bound = TRUE;
403 | for (shift = pst->win_l_width[k]; shift <= pst->win_r_width[k]; shift++)
404 | if ((int) frame + shift < 0 || (int) pss->total_frame <= (int) frame + shift) {
405 | not_bound = FALSE;
406 | break;
407 | }
408 | for (l = 0; l < pst->vector_length; l++) {
409 | m = pst->vector_length * k + l;
410 | pst->sm.mean[frame][m] = HTS_SStreamSet_get_mean(sss, i, state, m);
411 | if (not_bound || k == 0)
412 | pst->sm.ivar[frame][m] = HTS_finv(HTS_SStreamSet_get_vari(sss, i, state, m));
413 | else
414 | pst->sm.ivar[frame][m] = 0.0;
415 | }
416 | }
417 | frame++;
418 | }
419 | }
420 | }
421 | /* parameter generation */
422 | HTS_PStream_mlpg(pst);
423 | }
424 |
425 | return TRUE;
426 | }
427 |
428 | /* HTS_PStreamSet_get_nstream: get number of stream */
429 | size_t HTS_PStreamSet_get_nstream(HTS_PStreamSet * pss)
430 | {
431 | return pss->nstream;
432 | }
433 |
434 | /* HTS_PStreamSet_get_vector_length: get feature length */
435 | size_t HTS_PStreamSet_get_vector_length(HTS_PStreamSet * pss, size_t stream_index)
436 | {
437 | return pss->pstream[stream_index].vector_length;
438 | }
439 |
440 | /* HTS_PStreamSet_get_total_frame: get total number of frame */
441 | size_t HTS_PStreamSet_get_total_frame(HTS_PStreamSet * pss)
442 | {
443 | return pss->total_frame;
444 | }
445 |
446 | /* HTS_PStreamSet_get_parameter: get parameter */
447 | double HTS_PStreamSet_get_parameter(HTS_PStreamSet * pss, size_t stream_index, size_t frame_index, size_t vector_index)
448 | {
449 | return pss->pstream[stream_index].par[frame_index][vector_index];
450 | }
451 |
452 | /* HTS_PStreamSet_get_parameter_vector: get parameter vector*/
453 | double *HTS_PStreamSet_get_parameter_vector(HTS_PStreamSet * pss, size_t stream_index, size_t frame_index)
454 | {
455 | return pss->pstream[stream_index].par[frame_index];
456 | }
457 |
458 | /* HTS_PStreamSet_get_msd_flag: get generated MSD flag per frame */
459 | HTS_Boolean HTS_PStreamSet_get_msd_flag(HTS_PStreamSet * pss, size_t stream_index, size_t frame_index)
460 | {
461 | return pss->pstream[stream_index].msd_flag[frame_index];
462 | }
463 |
464 | /* HTS_PStreamSet_is_msd: get MSD flag */
465 | HTS_Boolean HTS_PStreamSet_is_msd(HTS_PStreamSet * pss, size_t stream_index)
466 | {
467 | return pss->pstream[stream_index].msd_flag ? TRUE : FALSE;
468 | }
469 |
470 | /* HTS_PStreamSet_clear: free parameter stream set */
471 | void HTS_PStreamSet_clear(HTS_PStreamSet * pss)
472 | {
473 | size_t i, j;
474 | HTS_PStream *pstream;
475 |
476 | if (pss->pstream) {
477 | for (i = 0; i < pss->nstream; i++) {
478 | pstream = &pss->pstream[i];
479 | if (pstream->sm.wum)
480 | HTS_free(pstream->sm.wum);
481 | if (pstream->sm.g)
482 | HTS_free(pstream->sm.g);
483 | if (pstream->sm.wuw)
484 | HTS_free_matrix(pstream->sm.wuw, pstream->length);
485 | if (pstream->sm.ivar)
486 | HTS_free_matrix(pstream->sm.ivar, pstream->length);
487 | if (pstream->sm.mean)
488 | HTS_free_matrix(pstream->sm.mean, pstream->length);
489 | if (pstream->par)
490 | HTS_free_matrix(pstream->par, pstream->length);
491 | if (pstream->msd_flag)
492 | HTS_free(pstream->msd_flag);
493 | if (pstream->win_coefficient) {
494 | for (j = 0; j < pstream->win_size; j++) {
495 | pstream->win_coefficient[j] += pstream->win_l_width[j];
496 | HTS_free(pstream->win_coefficient[j]);
497 | }
498 | }
499 | if (pstream->gv_mean)
500 | HTS_free(pstream->gv_mean);
501 | if (pstream->gv_vari)
502 | HTS_free(pstream->gv_vari);
503 | if (pstream->win_coefficient)
504 | HTS_free(pstream->win_coefficient);
505 | if (pstream->win_l_width)
506 | HTS_free(pstream->win_l_width);
507 | if (pstream->win_r_width)
508 | HTS_free(pstream->win_r_width);
509 | if (pstream->gv_switch)
510 | HTS_free(pstream->gv_switch);
511 | }
512 | HTS_free(pss->pstream);
513 | }
514 | HTS_PStreamSet_initialize(pss);
515 | }
516 |
517 | HTS_PSTREAM_C_END;
518 |
519 | #endif /* !HTS_PSTREAM_C */
520 |
--------------------------------------------------------------------------------
/src/lib/HTS_sstream.c:
--------------------------------------------------------------------------------
1 | /* ----------------------------------------------------------------- */
2 | /* The HMM-Based Speech Synthesis Engine "hts_engine API" */
3 | /* developed by HTS Working Group */
4 | /* http://hts-engine.sourceforge.net/ */
5 | /* ----------------------------------------------------------------- */
6 | /* */
7 | /* Copyright (c) 2001-2014 Nagoya Institute of Technology */
8 | /* Department of Computer Science */
9 | /* */
10 | /* 2001-2008 Tokyo Institute of Technology */
11 | /* Interdisciplinary Graduate School of */
12 | /* Science and Engineering */
13 | /* */
14 | /* All rights reserved. */
15 | /* */
16 | /* Redistribution and use in source and binary forms, with or */
17 | /* without modification, are permitted provided that the following */
18 | /* conditions are met: */
19 | /* */
20 | /* - Redistributions of source code must retain the above copyright */
21 | /* notice, this list of conditions and the following disclaimer. */
22 | /* - Redistributions in binary form must reproduce the above */
23 | /* copyright notice, this list of conditions and the following */
24 | /* disclaimer in the documentation and/or other materials provided */
25 | /* with the distribution. */
26 | /* - Neither the name of the HTS working group nor the names of its */
27 | /* contributors may be used to endorse or promote products derived */
28 | /* from this software without specific prior written permission. */
29 | /* */
30 | /* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND */
31 | /* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, */
32 | /* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
33 | /* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
34 | /* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS */
35 | /* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, */
36 | /* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED */
37 | /* TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, */
38 | /* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON */
39 | /* ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, */
40 | /* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY */
41 | /* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
42 | /* POSSIBILITY OF SUCH DAMAGE. */
43 | /* ----------------------------------------------------------------- */
44 |
45 | #ifndef HTS_SSTREAM_C
46 | #define HTS_SSTREAM_C
47 |
48 | #ifdef __cplusplus
49 | #define HTS_SSTREAM_C_START extern "C" {
50 | #define HTS_SSTREAM_C_END }
51 | #else
52 | #define HTS_SSTREAM_C_START
53 | #define HTS_SSTREAM_C_END
54 | #endif /* __CPLUSPLUS */
55 |
56 | HTS_SSTREAM_C_START;
57 |
58 | #include
59 | #include
60 |
61 | /* hts_engine libraries */
62 | #include "HTS_hidden.h"
63 |
64 | /* HTS_set_default_duration: set default duration from state duration probability distribution */
65 | static double HTS_set_default_duration(size_t * duration, double *mean, double *vari, size_t size)
66 | {
67 | size_t i;
68 | double temp;
69 | size_t sum = 0;
70 |
71 | for (i = 0; i < size; i++) {
72 | temp = mean[i] + 0.5;
73 | if (temp < 1.0)
74 | duration[i] = 1;
75 | else
76 | duration[i] = (size_t) temp;
77 | sum += duration[i];
78 | }
79 |
80 | return (double) sum;
81 | }
82 |
83 | /* HTS_set_specified_duration: set duration from state duration probability distribution and specified frame length */
84 | static double HTS_set_specified_duration(size_t * duration, double *mean, double *vari, size_t size, double frame_length)
85 | {
86 | size_t i;
87 | int j;
88 | double temp1, temp2;
89 | double rho = 0.0;
90 | size_t sum = 0;
91 | size_t target_length;
92 |
93 | /* get the target frame length */
94 | if (frame_length + 0.5 < 1.0)
95 | target_length = 1;
96 | else
97 | target_length = (size_t) (frame_length + 0.5);
98 |
99 | /* check the specified duration */
100 | if (target_length <= size) {
101 | if (target_length < size)
102 | HTS_error(-1, "HTS_set_specified_duration: Specified frame length is too short.\n");
103 | for (i = 0; i < size; i++)
104 | duration[i] = 1;
105 | return (double) size;
106 | }
107 |
108 | /* RHO calculation */
109 | temp1 = 0.0;
110 | temp2 = 0.0;
111 | for (i = 0; i < size; i++) {
112 | temp1 += mean[i];
113 | temp2 += vari[i];
114 | }
115 | rho = ((double) target_length - temp1) / temp2;
116 |
117 | /* first estimation */
118 | for (i = 0; i < size; i++) {
119 | temp1 = mean[i] + rho * vari[i] + 0.5;
120 | if (temp1 < 1.0)
121 | duration[i] = 1;
122 | else
123 | duration[i] = (size_t) temp1;
124 | sum += duration[i];
125 | }
126 |
127 | /* loop estimation */
128 | while (target_length != sum) {
129 | /* sarch flexible state and modify its duration */
130 | if (target_length > sum) {
131 | j = -1;
132 | for (i = 0; i < size; i++) {
133 | temp2 = fabs(rho - ((double) duration[i] + 1 - mean[i]) / vari[i]);
134 | if (j < 0 || temp1 > temp2) {
135 | j = i;
136 | temp1 = temp2;
137 | }
138 | }
139 | sum++;
140 | duration[j]++;
141 | } else {
142 | j = -1;
143 | for (i = 0; i < size; i++) {
144 | if (duration[i] > 1) {
145 | temp2 = fabs(rho - ((double) duration[i] - 1 - mean[i]) / vari[i]);
146 | if (j < 0 || temp1 > temp2) {
147 | j = i;
148 | temp1 = temp2;
149 | }
150 | }
151 | }
152 | sum--;
153 | duration[j]--;
154 | }
155 | }
156 |
157 | return (double) target_length;
158 | }
159 |
160 | /* HTS_SStreamSet_initialize: initialize state stream set */
161 | void HTS_SStreamSet_initialize(HTS_SStreamSet * sss)
162 | {
163 | sss->nstream = 0;
164 | sss->nstate = 0;
165 | sss->sstream = NULL;
166 | sss->duration = NULL;
167 | sss->total_state = 0;
168 | sss->total_frame = 0;
169 | }
170 |
171 | /* HTS_SStreamSet_create: parse label and determine state duration */
172 | HTS_Boolean HTS_SStreamSet_create(HTS_SStreamSet * sss, HTS_ModelSet * ms, HTS_Label * label, HTS_Boolean phoneme_alignment_flag, double speed, double *duration_iw, double **parameter_iw, double **gv_iw)
173 | {
174 | size_t i, j, k;
175 | double temp;
176 | int shift;
177 | size_t state;
178 | HTS_SStream *sst;
179 | double *duration_mean, *duration_vari;
180 | double frame_length;
181 | size_t next_time;
182 | size_t next_state;
183 |
184 | /* check interpolation weights */
185 | for (i = 0, temp = 0.0; i < HTS_ModelSet_get_nvoices(ms); i++)
186 | temp += duration_iw[i];
187 | if (temp == 0.0) {
188 | return FALSE;
189 | } else if (temp != 1.0) {
190 | for (i = 0; i < HTS_ModelSet_get_nvoices(ms); i++)
191 | if (duration_iw[i] != 0.0)
192 | duration_iw[i] /= temp;
193 | }
194 |
195 | for (i = 0; i < HTS_ModelSet_get_nstream(ms); i++) {
196 | for (j = 0, temp = 0.0; j < HTS_ModelSet_get_nvoices(ms); j++)
197 | temp += parameter_iw[j][i];
198 | if (temp == 0.0) {
199 | return FALSE;
200 | } else if (temp != 1.0) {
201 | for (j = 0; j < HTS_ModelSet_get_nvoices(ms); j++)
202 | if (parameter_iw[j][i] != 0.0)
203 | parameter_iw[j][i] /= temp;
204 | }
205 | if (HTS_ModelSet_use_gv(ms, i)) {
206 | for (j = 0, temp = 0.0; j < HTS_ModelSet_get_nvoices(ms); j++)
207 | temp += gv_iw[j][i];
208 | if (temp == 0.0)
209 | return FALSE;
210 | else if (temp != 1.0)
211 | for (j = 0; j < HTS_ModelSet_get_nvoices(ms); j++)
212 | if (gv_iw[j][i] != 0.0)
213 | gv_iw[j][i] /= temp;
214 | }
215 | }
216 |
217 | /* initialize state sequence */
218 | sss->nstate = HTS_ModelSet_get_nstate(ms);
219 | sss->nstream = HTS_ModelSet_get_nstream(ms);
220 | sss->total_frame = 0;
221 | sss->total_state = HTS_Label_get_size(label) * sss->nstate;
222 | sss->duration = (size_t *) HTS_calloc(sss->total_state, sizeof(size_t));
223 | sss->sstream = (HTS_SStream *) HTS_calloc(sss->nstream, sizeof(HTS_SStream));
224 | for (i = 0; i < sss->nstream; i++) {
225 | sst = &sss->sstream[i];
226 | sst->vector_length = HTS_ModelSet_get_vector_length(ms, i);
227 | sst->mean = (double **) HTS_calloc(sss->total_state, sizeof(double *));
228 | sst->vari = (double **) HTS_calloc(sss->total_state, sizeof(double *));
229 | if (HTS_ModelSet_is_msd(ms, i))
230 | sst->msd = (double *) HTS_calloc(sss->total_state, sizeof(double));
231 | else
232 | sst->msd = NULL;
233 | for (j = 0; j < sss->total_state; j++) {
234 | sst->mean[j] = (double *) HTS_calloc(sst->vector_length * HTS_ModelSet_get_window_size(ms, i), sizeof(double));
235 | sst->vari[j] = (double *) HTS_calloc(sst->vector_length * HTS_ModelSet_get_window_size(ms, i), sizeof(double));
236 | }
237 | if (HTS_ModelSet_use_gv(ms, i)) {
238 | sst->gv_switch = (HTS_Boolean *) HTS_calloc(sss->total_state, sizeof(HTS_Boolean));
239 | for (j = 0; j < sss->total_state; j++)
240 | sst->gv_switch[j] = TRUE;
241 | } else {
242 | sst->gv_switch = NULL;
243 | }
244 | }
245 |
246 | /* determine state duration */
247 | duration_mean = (double *) HTS_calloc(sss->total_state, sizeof(double));
248 | duration_vari = (double *) HTS_calloc(sss->total_state, sizeof(double));
249 | for (i = 0; i < HTS_Label_get_size(label); i++)
250 | HTS_ModelSet_get_duration(ms, HTS_Label_get_string(label, i), duration_iw, &duration_mean[i * sss->nstate], &duration_vari[i * sss->nstate]);
251 | if (phoneme_alignment_flag == TRUE) {
252 | /* use duration set by user */
253 | next_time = 0;
254 | next_state = 0;
255 | state = 0;
256 | for (i = 0; i < HTS_Label_get_size(label); i++) {
257 | temp = HTS_Label_get_end_frame(label, i);
258 | if (temp >= 0) {
259 | next_time += (size_t) HTS_set_specified_duration(&sss->duration[next_state], &duration_mean[next_state], &duration_vari[next_state], state + sss->nstate - next_state, temp - next_time);
260 | next_state = state + sss->nstate;
261 | } else if (i + 1 == HTS_Label_get_size(label)) {
262 | HTS_error(-1, "HTS_SStreamSet_create: The time of final label is not specified.\n");
263 | HTS_set_default_duration(&sss->duration[next_state], &duration_mean[next_state], &duration_vari[next_state], state + sss->nstate - next_state);
264 | }
265 | state += sss->nstate;
266 | }
267 | } else {
268 | /* determine frame length */
269 | if (speed != 1.0) {
270 | temp = 0.0;
271 | for (i = 0; i < sss->total_state; i++) {
272 | temp += duration_mean[i];
273 | }
274 | frame_length = temp / speed;
275 | HTS_set_specified_duration(sss->duration, duration_mean, duration_vari, sss->total_state, frame_length);
276 | } else {
277 | HTS_set_default_duration(sss->duration, duration_mean, duration_vari, sss->total_state);
278 | }
279 | }
280 | HTS_free(duration_mean);
281 | HTS_free(duration_vari);
282 |
283 | /* get parameter */
284 | for (i = 0, state = 0; i < HTS_Label_get_size(label); i++) {
285 | for (j = 2; j <= sss->nstate + 1; j++) {
286 | sss->total_frame += sss->duration[state];
287 | for (k = 0; k < sss->nstream; k++) {
288 | sst = &sss->sstream[k];
289 | if (sst->msd)
290 | HTS_ModelSet_get_parameter(ms, k, j, HTS_Label_get_string(label, i), (const double *const *) parameter_iw, sst->mean[state], sst->vari[state], &sst->msd[state]);
291 | else
292 | HTS_ModelSet_get_parameter(ms, k, j, HTS_Label_get_string(label, i), (const double *const *) parameter_iw, sst->mean[state], sst->vari[state], NULL);
293 | }
294 | state++;
295 | }
296 | }
297 |
298 | /* copy dynamic window */
299 | for (i = 0; i < sss->nstream; i++) {
300 | sst = &sss->sstream[i];
301 | sst->win_size = HTS_ModelSet_get_window_size(ms, i);
302 | sst->win_max_width = HTS_ModelSet_get_window_max_width(ms, i);
303 | sst->win_l_width = (int *) HTS_calloc(sst->win_size, sizeof(int));
304 | sst->win_r_width = (int *) HTS_calloc(sst->win_size, sizeof(int));
305 | sst->win_coefficient = (double **) HTS_calloc(sst->win_size, sizeof(double));
306 | for (j = 0; j < sst->win_size; j++) {
307 | sst->win_l_width[j] = HTS_ModelSet_get_window_left_width(ms, i, j);
308 | sst->win_r_width[j] = HTS_ModelSet_get_window_right_width(ms, i, j);
309 | if (sst->win_l_width[j] + sst->win_r_width[j] == 0)
310 | sst->win_coefficient[j] = (double *) HTS_calloc(-2 * sst->win_l_width[j] + 1, sizeof(double));
311 | else
312 | sst->win_coefficient[j] = (double *) HTS_calloc(-2 * sst->win_l_width[j], sizeof(double));
313 | sst->win_coefficient[j] -= sst->win_l_width[j];
314 | for (shift = sst->win_l_width[j]; shift <= sst->win_r_width[j]; shift++)
315 | sst->win_coefficient[j][shift] = HTS_ModelSet_get_window_coefficient(ms, i, j, shift);
316 | }
317 | }
318 |
319 | /* determine GV */
320 | for (i = 0; i < sss->nstream; i++) {
321 | sst = &sss->sstream[i];
322 | if (HTS_ModelSet_use_gv(ms, i)) {
323 | sst->gv_mean = (double *) HTS_calloc(sst->vector_length, sizeof(double));
324 | sst->gv_vari = (double *) HTS_calloc(sst->vector_length, sizeof(double));
325 | HTS_ModelSet_get_gv(ms, i, HTS_Label_get_string(label, 0), (const double *const *) gv_iw, sst->gv_mean, sst->gv_vari);
326 | } else {
327 | sst->gv_mean = NULL;
328 | sst->gv_vari = NULL;
329 | }
330 | }
331 |
332 | for (i = 0; i < HTS_Label_get_size(label); i++)
333 | if (HTS_ModelSet_get_gv_flag(ms, HTS_Label_get_string(label, i)) == FALSE)
334 | for (j = 0; j < sss->nstream; j++)
335 | if (HTS_ModelSet_use_gv(ms, j) == TRUE)
336 | for (k = 0; k < sss->nstate; k++)
337 | sss->sstream[j].gv_switch[i * sss->nstate + k] = FALSE;
338 |
339 | return TRUE;
340 | }
341 |
342 | /* HTS_SStreamSet_get_nstream: get number of stream */
343 | size_t HTS_SStreamSet_get_nstream(HTS_SStreamSet * sss)
344 | {
345 | return sss->nstream;
346 | }
347 |
348 | /* HTS_SStreamSet_get_vector_length: get vector length */
349 | size_t HTS_SStreamSet_get_vector_length(HTS_SStreamSet * sss, size_t stream_index)
350 | {
351 | return sss->sstream[stream_index].vector_length;
352 | }
353 |
354 | /* HTS_SStreamSet_is_msd: get MSD flag */
355 | HTS_Boolean HTS_SStreamSet_is_msd(HTS_SStreamSet * sss, size_t stream_index)
356 | {
357 | return sss->sstream[stream_index].msd ? TRUE : FALSE;
358 | }
359 |
360 | /* HTS_SStreamSet_get_total_state: get total number of state */
361 | size_t HTS_SStreamSet_get_total_state(HTS_SStreamSet * sss)
362 | {
363 | return sss->total_state;
364 | }
365 |
366 | /* HTS_SStreamSet_get_total_frame: get total number of frame */
367 | size_t HTS_SStreamSet_get_total_frame(HTS_SStreamSet * sss)
368 | {
369 | return sss->total_frame;
370 | }
371 |
372 | /* HTS_SStreamSet_get_msd: get MSD parameter */
373 | double HTS_SStreamSet_get_msd(HTS_SStreamSet * sss, size_t stream_index, size_t state_index)
374 | {
375 | return sss->sstream[stream_index].msd[state_index];
376 | }
377 |
378 | /* HTS_SStreamSet_window_size: get dynamic window size */
379 | size_t HTS_SStreamSet_get_window_size(HTS_SStreamSet * sss, size_t stream_index)
380 | {
381 | return sss->sstream[stream_index].win_size;
382 | }
383 |
384 | /* HTS_SStreamSet_get_window_left_width: get left width of dynamic window */
385 | int HTS_SStreamSet_get_window_left_width(HTS_SStreamSet * sss, size_t stream_index, size_t window_index)
386 | {
387 | return sss->sstream[stream_index].win_l_width[window_index];
388 | }
389 |
390 | /* HTS_SStreamSet_get_winodow_right_width: get right width of dynamic window */
391 | int HTS_SStreamSet_get_window_right_width(HTS_SStreamSet * sss, size_t stream_index, size_t window_index)
392 | {
393 | return sss->sstream[stream_index].win_r_width[window_index];
394 | }
395 |
396 | /* HTS_SStreamSet_get_window_coefficient: get coefficient of dynamic window */
397 | double HTS_SStreamSet_get_window_coefficient(HTS_SStreamSet * sss, size_t stream_index, size_t window_index, int coefficient_index)
398 | {
399 | return sss->sstream[stream_index].win_coefficient[window_index][coefficient_index];
400 | }
401 |
402 | /* HTS_SStreamSet_get_window_max_width: get max width of dynamic window */
403 | size_t HTS_SStreamSet_get_window_max_width(HTS_SStreamSet * sss, size_t stream_index)
404 | {
405 | return sss->sstream[stream_index].win_max_width;
406 | }
407 |
408 | /* HTS_SStreamSet_use_gv: get GV flag */
409 | HTS_Boolean HTS_SStreamSet_use_gv(HTS_SStreamSet * sss, size_t stream_index)
410 | {
411 | return sss->sstream[stream_index].gv_mean ? TRUE : FALSE;
412 | }
413 |
414 | /* HTS_SStreamSet_get_duration: get state duration */
415 | size_t HTS_SStreamSet_get_duration(HTS_SStreamSet * sss, size_t state_index)
416 | {
417 | return sss->duration[state_index];
418 | }
419 |
420 | /* HTS_SStreamSet_get_mean: get mean parameter */
421 | double HTS_SStreamSet_get_mean(HTS_SStreamSet * sss, size_t stream_index, size_t state_index, size_t vector_index)
422 | {
423 | return sss->sstream[stream_index].mean[state_index][vector_index];
424 | }
425 |
426 | /* HTS_SStreamSet_set_mean: set mean parameter */
427 | void HTS_SStreamSet_set_mean(HTS_SStreamSet * sss, size_t stream_index, size_t state_index, size_t vector_index, double f)
428 | {
429 | sss->sstream[stream_index].mean[state_index][vector_index] = f;
430 | }
431 |
432 | /* HTS_SStreamSet_get_vari: get variance parameter */
433 | double HTS_SStreamSet_get_vari(HTS_SStreamSet * sss, size_t stream_index, size_t state_index, size_t vector_index)
434 | {
435 | return sss->sstream[stream_index].vari[state_index][vector_index];
436 | }
437 |
438 | /* HTS_SStreamSet_set_vari: set variance parameter */
439 | void HTS_SStreamSet_set_vari(HTS_SStreamSet * sss, size_t stream_index, size_t state_index, size_t vector_index, double f)
440 | {
441 | sss->sstream[stream_index].vari[state_index][vector_index] = f;
442 | }
443 |
444 | /* HTS_SStreamSet_get_gv_mean: get GV mean parameter */
445 | double HTS_SStreamSet_get_gv_mean(HTS_SStreamSet * sss, size_t stream_index, size_t vector_index)
446 | {
447 | return sss->sstream[stream_index].gv_mean[vector_index];
448 | }
449 |
450 | /* HTS_SStreamSet_get_gv_mean: get GV variance parameter */
451 | double HTS_SStreamSet_get_gv_vari(HTS_SStreamSet * sss, size_t stream_index, size_t vector_index)
452 | {
453 | return sss->sstream[stream_index].gv_vari[vector_index];
454 | }
455 |
456 | /* HTS_SStreamSet_set_gv_switch: set GV switch */
457 | void HTS_SStreamSet_set_gv_switch(HTS_SStreamSet * sss, size_t stream_index, size_t state_index, HTS_Boolean i)
458 | {
459 | sss->sstream[stream_index].gv_switch[state_index] = i;
460 | }
461 |
462 | /* HTS_SStreamSet_get_gv_switch: get GV switch */
463 | HTS_Boolean HTS_SStreamSet_get_gv_switch(HTS_SStreamSet * sss, size_t stream_index, size_t state_index)
464 | {
465 | return sss->sstream[stream_index].gv_switch[state_index];
466 | }
467 |
468 | /* HTS_SStreamSet_clear: free state stream set */
469 | void HTS_SStreamSet_clear(HTS_SStreamSet * sss)
470 | {
471 | size_t i, j;
472 | HTS_SStream *sst;
473 |
474 | if (sss->sstream) {
475 | for (i = 0; i < sss->nstream; i++) {
476 | sst = &sss->sstream[i];
477 | for (j = 0; j < sss->total_state; j++) {
478 | HTS_free(sst->mean[j]);
479 | HTS_free(sst->vari[j]);
480 | }
481 | if (sst->msd)
482 | HTS_free(sst->msd);
483 | HTS_free(sst->mean);
484 | HTS_free(sst->vari);
485 | for (j = 0; j < sst->win_size; j++) {
486 | sst->win_coefficient[j] += sst->win_l_width[j];
487 | HTS_free(sst->win_coefficient[j]);
488 | }
489 | HTS_free(sst->win_coefficient);
490 | HTS_free(sst->win_l_width);
491 | HTS_free(sst->win_r_width);
492 | if (sst->gv_mean)
493 | HTS_free(sst->gv_mean);
494 | if (sst->gv_vari)
495 | HTS_free(sst->gv_vari);
496 | if (sst->gv_switch)
497 | HTS_free(sst->gv_switch);
498 | }
499 | HTS_free(sss->sstream);
500 | }
501 | if (sss->duration)
502 | HTS_free(sss->duration);
503 |
504 | HTS_SStreamSet_initialize(sss);
505 | }
506 |
507 | HTS_SSTREAM_C_END;
508 |
509 | #endif /* !HTS_SSTREAM_C */
510 |
--------------------------------------------------------------------------------
/src/lib/HTS_vocoder.c:
--------------------------------------------------------------------------------
1 | /* ----------------------------------------------------------------- */
2 | /* The HMM-Based Speech Synthesis Engine "hts_engine API" */
3 | /* developed by HTS Working Group */
4 | /* http://hts-engine.sourceforge.net/ */
5 | /* ----------------------------------------------------------------- */
6 | /* */
7 | /* Copyright (c) 2001-2014 Nagoya Institute of Technology */
8 | /* Department of Computer Science */
9 | /* */
10 | /* 2001-2008 Tokyo Institute of Technology */
11 | /* Interdisciplinary Graduate School of */
12 | /* Science and Engineering */
13 | /* */
14 | /* All rights reserved. */
15 | /* */
16 | /* Redistribution and use in source and binary forms, with or */
17 | /* without modification, are permitted provided that the following */
18 | /* conditions are met: */
19 | /* */
20 | /* - Redistributions of source code must retain the above copyright */
21 | /* notice, this list of conditions and the following disclaimer. */
22 | /* - Redistributions in binary form must reproduce the above */
23 | /* copyright notice, this list of conditions and the following */
24 | /* disclaimer in the documentation and/or other materials provided */
25 | /* with the distribution. */
26 | /* - Neither the name of the HTS working group nor the names of its */
27 | /* contributors may be used to endorse or promote products derived */
28 | /* from this software without specific prior written permission. */
29 | /* */
30 | /* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND */
31 | /* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, */
32 | /* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
33 | /* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
34 | /* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS */
35 | /* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, */
36 | /* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED */
37 | /* TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, */
38 | /* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON */
39 | /* ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, */
40 | /* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY */
41 | /* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
42 | /* POSSIBILITY OF SUCH DAMAGE. */
43 | /* ----------------------------------------------------------------- */
44 |
45 | #ifndef HTS_VOCODER_C
46 | #define HTS_VOCODER_C
47 |
48 | #ifdef __cplusplus
49 | #define HTS_VOCODER_C_START extern "C" {
50 | #define HTS_VOCODER_C_END }
51 | #else
52 | #define HTS_VOCODER_C_START
53 | #define HTS_VOCODER_C_END
54 | #endif /* __CPLUSPLUS */
55 |
56 | HTS_VOCODER_C_START;
57 |
58 | #include /* for sqrt(),log(),exp(),pow(),cos() */
59 |
60 | /* hts_engine libraries */
61 | #include "HTS_hidden.h"
62 |
63 | static const double HTS_pade[21] = {
64 | 1.00000000000,
65 | 1.00000000000,
66 | 0.00000000000,
67 | 1.00000000000,
68 | 0.00000000000,
69 | 0.00000000000,
70 | 1.00000000000,
71 | 0.00000000000,
72 | 0.00000000000,
73 | 0.00000000000,
74 | 1.00000000000,
75 | 0.49992730000,
76 | 0.10670050000,
77 | 0.01170221000,
78 | 0.00056562790,
79 | 1.00000000000,
80 | 0.49993910000,
81 | 0.11070980000,
82 | 0.01369984000,
83 | 0.00095648530,
84 | 0.00003041721
85 | };
86 |
87 | /* HTS_movem: move memory */
88 | static void HTS_movem(double *a, double *b, const int nitem)
89 | {
90 | long i = (long) nitem;
91 |
92 | if (a > b)
93 | while (i--)
94 | *b++ = *a++;
95 | else {
96 | a += i;
97 | b += i;
98 | while (i--)
99 | *--b = *--a;
100 | }
101 | }
102 |
103 | /* HTS_mlsafir: sub functions for MLSA filter */
104 | static double HTS_mlsafir(const double x, const double *b, const int m, const double a, const double aa, double *d)
105 | {
106 | double y = 0.0;
107 | int i;
108 |
109 | d[0] = x;
110 | d[1] = aa * d[0] + a * d[1];
111 |
112 | for (i = 2; i <= m; i++)
113 | d[i] += a * (d[i + 1] - d[i - 1]);
114 |
115 | for (i = 2; i <= m; i++)
116 | y += d[i] * b[i];
117 |
118 | for (i = m + 1; i > 1; i--)
119 | d[i] = d[i - 1];
120 |
121 | return (y);
122 | }
123 |
124 | /* HTS_mlsadf1: sub functions for MLSA filter */
125 | static double HTS_mlsadf1(double x, const double *b, const int m, const double a, const double aa, const int pd, double *d, const double *ppade)
126 | {
127 | double v, out = 0.0, *pt;
128 | int i;
129 |
130 | pt = &d[pd + 1];
131 |
132 | for (i = pd; i >= 1; i--) {
133 | d[i] = aa * pt[i - 1] + a * d[i];
134 | pt[i] = d[i] * b[1];
135 | v = pt[i] * ppade[i];
136 | x += (1 & i) ? v : -v;
137 | out += v;
138 | }
139 |
140 | pt[0] = x;
141 | out += x;
142 |
143 | return (out);
144 | }
145 |
146 | /* HTS_mlsadf2: sub functions for MLSA filter */
147 | static double HTS_mlsadf2(double x, const double *b, const int m, const double a, const double aa, const int pd, double *d, const double *ppade)
148 | {
149 | double v, out = 0.0, *pt;
150 | int i;
151 |
152 | pt = &d[pd * (m + 2)];
153 |
154 | for (i = pd; i >= 1; i--) {
155 | pt[i] = HTS_mlsafir(pt[i - 1], b, m, a, aa, &d[(i - 1) * (m + 2)]);
156 | v = pt[i] * ppade[i];
157 |
158 | x += (1 & i) ? v : -v;
159 | out += v;
160 | }
161 |
162 | pt[0] = x;
163 | out += x;
164 |
165 | return (out);
166 | }
167 |
168 | /* HTS_mlsadf: functions for MLSA filter */
169 | static double HTS_mlsadf(double x, const double *b, const int m, const double a, const int pd, double *d)
170 | {
171 | const double aa = 1 - a * a;
172 | const double *ppade = &(HTS_pade[pd * (pd + 1) / 2]);
173 |
174 | x = HTS_mlsadf1(x, b, m, a, aa, pd, d, ppade);
175 | x = HTS_mlsadf2(x, b, m, a, aa, pd, &d[2 * (pd + 1)], ppade);
176 |
177 | return (x);
178 | }
179 |
180 | /* HTS_rnd: functions for random noise generation */
181 | static double HTS_rnd(unsigned long *next)
182 | {
183 | double r;
184 |
185 | *next = *next * 1103515245L + 12345;
186 | r = (*next / 65536L) % 32768L;
187 |
188 | return (r / RANDMAX);
189 | }
190 |
191 | /* HTS_nrandom: functions for gaussian random noise generation */
192 | static double HTS_nrandom(HTS_Vocoder * v)
193 | {
194 | if (v->sw == 0) {
195 | v->sw = 1;
196 | do {
197 | v->r1 = 2 * HTS_rnd(&v->next) - 1;
198 | v->r2 = 2 * HTS_rnd(&v->next) - 1;
199 | v->s = v->r1 * v->r1 + v->r2 * v->r2;
200 | } while (v->s > 1 || v->s == 0);
201 | v->s = sqrt(-2 * log(v->s) / v->s);
202 | return (v->r1 * v->s);
203 | } else {
204 | v->sw = 0;
205 | return (v->r2 * v->s);
206 | }
207 | }
208 |
209 | /* HTS_mceq: function for M-sequence random noise generation */
210 | static int HTS_mseq(HTS_Vocoder * v)
211 | {
212 | int x0, x28;
213 |
214 | v->x >>= 1;
215 | if (v->x & B0)
216 | x0 = 1;
217 | else
218 | x0 = -1;
219 | if (v->x & B28)
220 | x28 = 1;
221 | else
222 | x28 = -1;
223 | if (x0 + x28)
224 | v->x &= B31_;
225 | else
226 | v->x |= B31;
227 |
228 | return (x0);
229 | }
230 |
231 | /* HTS_mc2b: transform mel-cepstrum to MLSA digital fillter coefficients */
232 | static void HTS_mc2b(double *mc, double *b, int m, const double a)
233 | {
234 | if (mc != b) {
235 | if (a != 0.0) {
236 | b[m] = mc[m];
237 | for (m--; m >= 0; m--)
238 | b[m] = mc[m] - a * b[m + 1];
239 | } else
240 | HTS_movem(mc, b, m + 1);
241 | } else if (a != 0.0)
242 | for (m--; m >= 0; m--)
243 | b[m] -= a * b[m + 1];
244 | }
245 |
246 | /* HTS_b2bc: transform MLSA digital filter coefficients to mel-cepstrum */
247 | static void HTS_b2mc(const double *b, double *mc, int m, const double a)
248 | {
249 | double d, o;
250 |
251 | d = mc[m] = b[m];
252 | for (m--; m >= 0; m--) {
253 | o = b[m] + a * d;
254 | d = b[m];
255 | mc[m] = o;
256 | }
257 | }
258 |
259 | /* HTS_freqt: frequency transformation */
260 | static void HTS_freqt(HTS_Vocoder * v, const double *c1, const int m1, double *c2, const int m2, const double a)
261 | {
262 | int i, j;
263 | const double b = 1 - a * a;
264 | double *g;
265 |
266 | if (m2 > v->freqt_size) {
267 | if (v->freqt_buff != NULL)
268 | HTS_free(v->freqt_buff);
269 | v->freqt_buff = (double *) HTS_calloc(m2 + m2 + 2, sizeof(double));
270 | v->freqt_size = m2;
271 | }
272 | g = v->freqt_buff + v->freqt_size + 1;
273 |
274 | for (i = 0; i < m2 + 1; i++)
275 | g[i] = 0.0;
276 |
277 | for (i = -m1; i <= 0; i++) {
278 | if (0 <= m2)
279 | g[0] = c1[-i] + a * (v->freqt_buff[0] = g[0]);
280 | if (1 <= m2)
281 | g[1] = b * v->freqt_buff[0] + a * (v->freqt_buff[1] = g[1]);
282 | for (j = 2; j <= m2; j++)
283 | g[j] = v->freqt_buff[j - 1] + a * ((v->freqt_buff[j] = g[j]) - g[j - 1]);
284 | }
285 |
286 | HTS_movem(g, c2, m2 + 1);
287 | }
288 |
289 | /* HTS_c2ir: The minimum phase impulse response is evaluated from the minimum phase cepstrum */
290 | static void HTS_c2ir(const double *c, const int nc, double *h, const int leng)
291 | {
292 | int n, k, upl;
293 | double d;
294 |
295 | h[0] = exp(c[0]);
296 | for (n = 1; n < leng; n++) {
297 | d = 0;
298 | upl = (n >= nc) ? nc - 1 : n;
299 | for (k = 1; k <= upl; k++)
300 | d += k * c[k] * h[n - k];
301 | h[n] = d / n;
302 | }
303 | }
304 |
305 | /* HTS_b2en: calculate frame energy */
306 | static double HTS_b2en(HTS_Vocoder * v, const double *b, const int m, const double a)
307 | {
308 | int i;
309 | double en = 0.0;
310 | double *cep;
311 | double *ir;
312 |
313 | if (v->spectrum2en_size < m) {
314 | if (v->spectrum2en_buff != NULL)
315 | HTS_free(v->spectrum2en_buff);
316 | v->spectrum2en_buff = (double *) HTS_calloc((m + 1) + 2 * IRLENG, sizeof(double));
317 | v->spectrum2en_size = m;
318 | }
319 | cep = v->spectrum2en_buff + m + 1;
320 | ir = cep + IRLENG;
321 |
322 | HTS_b2mc(b, v->spectrum2en_buff, m, a);
323 | HTS_freqt(v, v->spectrum2en_buff, m, cep, IRLENG - 1, -a);
324 | HTS_c2ir(cep, IRLENG, ir, IRLENG);
325 |
326 | for (i = 0; i < IRLENG; i++)
327 | en += ir[i] * ir[i];
328 |
329 | return (en);
330 | }
331 |
332 | /* HTS_ignorm: inverse gain normalization */
333 | static void HTS_ignorm(double *c1, double *c2, int m, const double g)
334 | {
335 | double k;
336 | if (g != 0.0) {
337 | k = pow(c1[0], g);
338 | for (; m >= 1; m--)
339 | c2[m] = k * c1[m];
340 | c2[0] = (k - 1.0) / g;
341 | } else {
342 | HTS_movem(&c1[1], &c2[1], m);
343 | c2[0] = log(c1[0]);
344 | }
345 | }
346 |
347 | /* HTS_gnorm: gain normalization */
348 | static void HTS_gnorm(double *c1, double *c2, int m, const double g)
349 | {
350 | double k;
351 | if (g != 0.0) {
352 | k = 1.0 + g * c1[0];
353 | for (; m >= 1; m--)
354 | c2[m] = c1[m] / k;
355 | c2[0] = pow(k, 1.0 / g);
356 | } else {
357 | HTS_movem(&c1[1], &c2[1], m);
358 | c2[0] = exp(c1[0]);
359 | }
360 | }
361 |
362 | /* HTS_lsp2lpc: transform LSP to LPC */
363 | static void HTS_lsp2lpc(HTS_Vocoder * v, double *lsp, double *a, const int m)
364 | {
365 | int i, k, mh1, mh2, flag_odd;
366 | double xx, xf, xff;
367 | double *p, *q;
368 | double *a0, *a1, *a2, *b0, *b1, *b2;
369 |
370 | flag_odd = 0;
371 | if (m % 2 == 0)
372 | mh1 = mh2 = m / 2;
373 | else {
374 | mh1 = (m + 1) / 2;
375 | mh2 = (m - 1) / 2;
376 | flag_odd = 1;
377 | }
378 |
379 | if (m > v->lsp2lpc_size) {
380 | if (v->lsp2lpc_buff != NULL)
381 | HTS_free(v->lsp2lpc_buff);
382 | v->lsp2lpc_buff = (double *) HTS_calloc(5 * m + 6, sizeof(double));
383 | v->lsp2lpc_size = m;
384 | }
385 | p = v->lsp2lpc_buff + m;
386 | q = p + mh1;
387 | a0 = q + mh2;
388 | a1 = a0 + (mh1 + 1);
389 | a2 = a1 + (mh1 + 1);
390 | b0 = a2 + (mh1 + 1);
391 | b1 = b0 + (mh2 + 1);
392 | b2 = b1 + (mh2 + 1);
393 |
394 | HTS_movem(lsp, v->lsp2lpc_buff, m);
395 |
396 | for (i = 0; i < mh1 + 1; i++)
397 | a0[i] = 0.0;
398 | for (i = 0; i < mh1 + 1; i++)
399 | a1[i] = 0.0;
400 | for (i = 0; i < mh1 + 1; i++)
401 | a2[i] = 0.0;
402 | for (i = 0; i < mh2 + 1; i++)
403 | b0[i] = 0.0;
404 | for (i = 0; i < mh2 + 1; i++)
405 | b1[i] = 0.0;
406 | for (i = 0; i < mh2 + 1; i++)
407 | b2[i] = 0.0;
408 |
409 | /* lsp filter parameters */
410 | for (i = k = 0; i < mh1; i++, k += 2)
411 | p[i] = -2.0 * cos(v->lsp2lpc_buff[k]);
412 | for (i = k = 0; i < mh2; i++, k += 2)
413 | q[i] = -2.0 * cos(v->lsp2lpc_buff[k + 1]);
414 |
415 | /* impulse response of analysis filter */
416 | xx = 1.0;
417 | xf = xff = 0.0;
418 |
419 | for (k = 0; k <= m; k++) {
420 | if (flag_odd) {
421 | a0[0] = xx;
422 | b0[0] = xx - xff;
423 | xff = xf;
424 | xf = xx;
425 | } else {
426 | a0[0] = xx + xf;
427 | b0[0] = xx - xf;
428 | xf = xx;
429 | }
430 |
431 | for (i = 0; i < mh1; i++) {
432 | a0[i + 1] = a0[i] + p[i] * a1[i] + a2[i];
433 | a2[i] = a1[i];
434 | a1[i] = a0[i];
435 | }
436 |
437 | for (i = 0; i < mh2; i++) {
438 | b0[i + 1] = b0[i] + q[i] * b1[i] + b2[i];
439 | b2[i] = b1[i];
440 | b1[i] = b0[i];
441 | }
442 |
443 | if (k != 0)
444 | a[k - 1] = -0.5 * (a0[mh1] + b0[mh2]);
445 | xx = 0.0;
446 | }
447 |
448 | for (i = m - 1; i >= 0; i--)
449 | a[i + 1] = -a[i];
450 | a[0] = 1.0;
451 | }
452 |
453 | /* HTS_gc2gc: generalized cepstral transformation */
454 | static void HTS_gc2gc(HTS_Vocoder * v, double *c1, const int m1, const double g1, double *c2, const int m2, const double g2)
455 | {
456 | int i, min, k, mk;
457 | double ss1, ss2, cc;
458 |
459 | if (m1 > v->gc2gc_size) {
460 | if (v->gc2gc_buff != NULL)
461 | HTS_free(v->gc2gc_buff);
462 | v->gc2gc_buff = (double *) HTS_calloc(m1 + 1, sizeof(double));
463 | v->gc2gc_size = m1;
464 | }
465 |
466 | HTS_movem(c1, v->gc2gc_buff, m1 + 1);
467 |
468 | c2[0] = v->gc2gc_buff[0];
469 | for (i = 1; i <= m2; i++) {
470 | ss1 = ss2 = 0.0;
471 | min = m1 < i ? m1 : i - 1;
472 | for (k = 1; k <= min; k++) {
473 | mk = i - k;
474 | cc = v->gc2gc_buff[k] * c2[mk];
475 | ss2 += k * cc;
476 | ss1 += mk * cc;
477 | }
478 |
479 | if (i <= m1)
480 | c2[i] = v->gc2gc_buff[i] + (g2 * ss2 - g1 * ss1) / i;
481 | else
482 | c2[i] = (g2 * ss2 - g1 * ss1) / i;
483 | }
484 | }
485 |
486 | /* HTS_mgc2mgc: frequency and generalized cepstral transformation */
487 | static void HTS_mgc2mgc(HTS_Vocoder * v, double *c1, const int m1, const double a1, const double g1, double *c2, const int m2, const double a2, const double g2)
488 | {
489 | double a;
490 |
491 | if (a1 == a2) {
492 | HTS_gnorm(c1, c1, m1, g1);
493 | HTS_gc2gc(v, c1, m1, g1, c2, m2, g2);
494 | HTS_ignorm(c2, c2, m2, g2);
495 | } else {
496 | a = (a2 - a1) / (1 - a1 * a2);
497 | HTS_freqt(v, c1, m1, c2, m2, a);
498 | HTS_gnorm(c2, c2, m2, g1);
499 | HTS_gc2gc(v, c2, m2, g1, c2, m2, g2);
500 | HTS_ignorm(c2, c2, m2, g2);
501 | }
502 | }
503 |
504 | /* HTS_lsp2mgc: transform LSP to MGC */
505 | static void HTS_lsp2mgc(HTS_Vocoder * v, double *lsp, double *mgc, const int m, const double alpha)
506 | {
507 | int i;
508 | /* lsp2lpc */
509 | HTS_lsp2lpc(v, lsp + 1, mgc, m);
510 | if (v->use_log_gain)
511 | mgc[0] = exp(lsp[0]);
512 | else
513 | mgc[0] = lsp[0];
514 |
515 | /* mgc2mgc */
516 | if (NORMFLG1)
517 | HTS_ignorm(mgc, mgc, m, v->gamma);
518 | else if (MULGFLG1)
519 | mgc[0] = (1.0 - mgc[0]) * ((double) v->stage);
520 | if (MULGFLG1)
521 | for (i = m; i >= 1; i--)
522 | mgc[i] *= -((double) v->stage);
523 | HTS_mgc2mgc(v, mgc, m, alpha, v->gamma, mgc, m, alpha, v->gamma);
524 | if (NORMFLG2)
525 | HTS_gnorm(mgc, mgc, m, v->gamma);
526 | else if (MULGFLG2)
527 | mgc[0] = mgc[0] * v->gamma + 1.0;
528 | if (MULGFLG2)
529 | for (i = m; i >= 1; i--)
530 | mgc[i] *= v->gamma;
531 | }
532 |
533 | /* HTS_mglsadff: sub functions for MGLSA filter */
534 | static double HTS_mglsadff(double x, const double *b, const int m, const double a, double *d)
535 | {
536 | int i;
537 |
538 | double y;
539 | y = d[0] * b[1];
540 | for (i = 1; i < m; i++) {
541 | d[i] += a * (d[i + 1] - d[i - 1]);
542 | y += d[i] * b[i + 1];
543 | }
544 | x -= y;
545 |
546 | for (i = m; i > 0; i--)
547 | d[i] = d[i - 1];
548 | d[0] = a * d[0] + (1 - a * a) * x;
549 | return x;
550 | }
551 |
552 | /* HTS_mglsadf: sub functions for MGLSA filter */
553 | static double HTS_mglsadf(double x, const double *b, const int m, const double a, const int n, double *d)
554 | {
555 | int i;
556 |
557 | for (i = 0; i < n; i++)
558 | x = HTS_mglsadff(x, b, m, a, &d[i * (m + 1)]);
559 |
560 | return x;
561 | }
562 |
563 | /* THS_check_lsp_stability: check LSP stability */
564 | static void HTS_check_lsp_stability(double *lsp, size_t m)
565 | {
566 | size_t i, j;
567 | double tmp;
568 | double min = (CHECK_LSP_STABILITY_MIN * PI) / (m + 1);
569 | HTS_Boolean find;
570 |
571 | for (i = 0; i < CHECK_LSP_STABILITY_NUM; i++) {
572 | find = FALSE;
573 |
574 | for (j = 1; j < m; j++) {
575 | tmp = lsp[j + 1] - lsp[j];
576 | if (tmp < min) {
577 | lsp[j] -= 0.5 * (min - tmp);
578 | lsp[j + 1] += 0.5 * (min - tmp);
579 | find = TRUE;
580 | }
581 | }
582 |
583 | if (lsp[1] < min) {
584 | lsp[1] = min;
585 | find = TRUE;
586 | }
587 | if (lsp[m] > PI - min) {
588 | lsp[m] = PI - min;
589 | find = TRUE;
590 | }
591 |
592 | if (find == FALSE)
593 | break;
594 | }
595 | }
596 |
597 | /* HTS_lsp2en: calculate frame energy */
598 | static double HTS_lsp2en(HTS_Vocoder * v, double *lsp, size_t m, double alpha)
599 | {
600 | size_t i;
601 | double en = 0.0;
602 | double *buff;
603 |
604 | if (v->spectrum2en_size < m) {
605 | if (v->spectrum2en_buff != NULL)
606 | HTS_free(v->spectrum2en_buff);
607 | v->spectrum2en_buff = (double *) HTS_calloc(m + 1 + IRLENG, sizeof(double));
608 | v->spectrum2en_size = m;
609 | }
610 | buff = v->spectrum2en_buff + m + 1;
611 |
612 | /* lsp2lpc */
613 | HTS_lsp2lpc(v, lsp + 1, v->spectrum2en_buff, m);
614 | if (v->use_log_gain)
615 | v->spectrum2en_buff[0] = exp(lsp[0]);
616 | else
617 | v->spectrum2en_buff[0] = lsp[0];
618 |
619 | /* mgc2mgc */
620 | if (NORMFLG1)
621 | HTS_ignorm(v->spectrum2en_buff, v->spectrum2en_buff, m, v->gamma);
622 | else if (MULGFLG1)
623 | v->spectrum2en_buff[0] = (1.0 - v->spectrum2en_buff[0]) * ((double) v->stage);
624 | if (MULGFLG1)
625 | for (i = 1; i <= m; i++)
626 | v->spectrum2en_buff[i] *= -((double) v->stage);
627 | HTS_mgc2mgc(v, v->spectrum2en_buff, m, alpha, v->gamma, buff, IRLENG - 1, 0.0, 1);
628 |
629 | for (i = 0; i < IRLENG; i++)
630 | en += buff[i] * buff[i];
631 | return en;
632 | }
633 |
634 | /* HTS_white_noise: return white noise */
635 | static double HTS_white_noise(HTS_Vocoder * v)
636 | {
637 | if (v->gauss)
638 | return (double) HTS_nrandom(v);
639 | else
640 | return (double) HTS_mseq(v);
641 | }
642 |
643 | /* HTS_Vocoder_initialize_excitation: initialize excitation */
644 | static void HTS_Vocoder_initialize_excitation(HTS_Vocoder * v, double pitch, size_t nlpf)
645 | {
646 | size_t i;
647 |
648 | v->pitch_of_curr_point = pitch;
649 | v->pitch_counter = pitch;
650 | v->pitch_inc_per_point = 0.0;
651 | if (nlpf > 0) {
652 | v->excite_buff_size = nlpf;
653 | v->excite_ring_buff = (double *) HTS_calloc(v->excite_buff_size, sizeof(double));
654 | for (i = 0; i < v->excite_buff_size; i++)
655 | v->excite_ring_buff[i] = 0.0;
656 | v->excite_buff_index = 0;
657 | } else {
658 | v->excite_buff_size = 0;
659 | v->excite_ring_buff = NULL;
660 | v->excite_buff_index = 0;
661 | }
662 | }
663 |
664 | /* HTS_Vocoder_start_excitation: start excitation of each frame */
665 | static void HTS_Vocoder_start_excitation(HTS_Vocoder * v, double pitch)
666 | {
667 | if (v->pitch_of_curr_point != 0.0 && pitch != 0.0) {
668 | v->pitch_inc_per_point = (pitch - v->pitch_of_curr_point) / v->fprd;
669 | } else {
670 | v->pitch_inc_per_point = 0.0;
671 | v->pitch_of_curr_point = pitch;
672 | v->pitch_counter = pitch;
673 | }
674 | }
675 |
676 | /* HTS_Vocoder_excite_unvoiced_frame: ping noise to ring buffer */
677 | static void HTS_Vocoder_excite_unvoiced_frame(HTS_Vocoder * v, double noise)
678 | {
679 | size_t center = (v->excite_buff_size - 1) / 2;
680 | v->excite_ring_buff[(v->excite_buff_index + center) % v->excite_buff_size] += noise;
681 | }
682 |
683 | /* HTS_Vocoder_excite_vooiced_frame: ping noise and pulse to ring buffer */
684 | static void HTS_Vocoder_excite_voiced_frame(HTS_Vocoder * v, double noise, double pulse, const double *lpf)
685 | {
686 | size_t i;
687 | size_t center = (v->excite_buff_size - 1) / 2;
688 |
689 | if (noise != 0.0) {
690 | for (i = 0; i < v->excite_buff_size; i++) {
691 | if (i == center)
692 | v->excite_ring_buff[(v->excite_buff_index + i) % v->excite_buff_size] += noise * (1.0 - lpf[i]);
693 | else
694 | v->excite_ring_buff[(v->excite_buff_index + i) % v->excite_buff_size] += noise * (0.0 - lpf[i]);
695 | }
696 | }
697 | if (pulse != 0.0) {
698 | for (i = 0; i < v->excite_buff_size; i++)
699 | v->excite_ring_buff[(v->excite_buff_index + i) % v->excite_buff_size] += pulse * lpf[i];
700 | }
701 | }
702 |
703 | /* HTS_Vocoder_get_excitation: get excitation of each sample */
704 | static double HTS_Vocoder_get_excitation(HTS_Vocoder * v, const double *lpf)
705 | {
706 | double x;
707 | double noise, pulse = 0.0;
708 |
709 | if (v->excite_buff_size > 0) {
710 | noise = HTS_white_noise(v);
711 | pulse = 0.0;
712 | if (v->pitch_of_curr_point == 0.0) {
713 | HTS_Vocoder_excite_unvoiced_frame(v, noise);
714 | } else {
715 | v->pitch_counter += 1.0;
716 | if (v->pitch_counter >= v->pitch_of_curr_point) {
717 | pulse = sqrt(v->pitch_of_curr_point);
718 | v->pitch_counter -= v->pitch_of_curr_point;
719 | }
720 | HTS_Vocoder_excite_voiced_frame(v, noise, pulse, lpf);
721 | v->pitch_of_curr_point += v->pitch_inc_per_point;
722 | }
723 | x = v->excite_ring_buff[v->excite_buff_index];
724 | v->excite_ring_buff[v->excite_buff_index] = 0.0;
725 | v->excite_buff_index++;
726 | if (v->excite_buff_index >= v->excite_buff_size)
727 | v->excite_buff_index = 0;
728 | } else {
729 | if (v->pitch_of_curr_point == 0.0) {
730 | x = HTS_white_noise(v);
731 | } else {
732 | v->pitch_counter += 1.0;
733 | if (v->pitch_counter >= v->pitch_of_curr_point) {
734 | x = sqrt(v->pitch_of_curr_point);
735 | v->pitch_counter -= v->pitch_of_curr_point;
736 | } else {
737 | x = 0.0;
738 | }
739 | v->pitch_of_curr_point += v->pitch_inc_per_point;
740 | }
741 | }
742 |
743 | return x;
744 | }
745 |
746 | /* HTS_Vocoder_end_excitation: end excitation of each frame */
747 | static void HTS_Vocoder_end_excitation(HTS_Vocoder * v, double pitch)
748 | {
749 | v->pitch_of_curr_point = pitch;
750 | }
751 |
752 | /* HTS_Vocoder_postfilter_mcp: postfilter for MCP */
753 | static void HTS_Vocoder_postfilter_mcp(HTS_Vocoder * v, double *mcp, const int m, double alpha, double beta)
754 | {
755 | double e1, e2;
756 | int k;
757 |
758 | if (beta > 0.0 && m > 1) {
759 | if (v->postfilter_size < m) {
760 | if (v->postfilter_buff != NULL)
761 | HTS_free(v->postfilter_buff);
762 | v->postfilter_buff = (double *) HTS_calloc(m + 1, sizeof(double));
763 | v->postfilter_size = m;
764 | }
765 | HTS_mc2b(mcp, v->postfilter_buff, m, alpha);
766 | e1 = HTS_b2en(v, v->postfilter_buff, m, alpha);
767 |
768 | v->postfilter_buff[1] -= beta * alpha * v->postfilter_buff[2];
769 | for (k = 2; k <= m; k++)
770 | v->postfilter_buff[k] *= (1.0 + beta);
771 |
772 | e2 = HTS_b2en(v, v->postfilter_buff, m, alpha);
773 | v->postfilter_buff[0] += log(e1 / e2) / 2;
774 | HTS_b2mc(v->postfilter_buff, mcp, m, alpha);
775 | }
776 | }
777 |
778 | /* HTS_Vocoder_postfilter_lsp: postfilter for LSP */
779 | static void HTS_Vocoder_postfilter_lsp(HTS_Vocoder * v, double *lsp, size_t m, double alpha, double beta)
780 | {
781 | double e1, e2;
782 | size_t i;
783 | double d1, d2;
784 |
785 | if (beta > 0.0 && m > 1) {
786 | if (v->postfilter_size < m) {
787 | if (v->postfilter_buff != NULL)
788 | HTS_free(v->postfilter_buff);
789 | v->postfilter_buff = (double *) HTS_calloc(m + 1, sizeof(double));
790 | v->postfilter_size = m;
791 | }
792 |
793 | e1 = HTS_lsp2en(v, lsp, m, alpha);
794 |
795 | /* postfiltering */
796 | for (i = 0; i <= m; i++) {
797 | if (i > 1 && i < m) {
798 | d1 = beta * (lsp[i + 1] - lsp[i]);
799 | d2 = beta * (lsp[i] - lsp[i - 1]);
800 | v->postfilter_buff[i] = lsp[i - 1] + d2 + (d2 * d2 * ((lsp[i + 1] - lsp[i - 1]) - (d1 + d2))) / ((d2 * d2) + (d1 * d1));
801 | } else {
802 | v->postfilter_buff[i] = lsp[i];
803 | }
804 | }
805 | HTS_movem(v->postfilter_buff, lsp, m + 1);
806 |
807 | e2 = HTS_lsp2en(v, lsp, m, alpha);
808 |
809 | if (e1 != e2) {
810 | if (v->use_log_gain)
811 | lsp[0] += 0.5 * log(e1 / e2);
812 | else
813 | lsp[0] *= sqrt(e1 / e2);
814 | }
815 | }
816 | }
817 |
818 | /* HTS_Vocoder_initialize: initialize vocoder */
819 | void HTS_Vocoder_initialize(HTS_Vocoder * v, size_t m, size_t stage, HTS_Boolean use_log_gain, size_t rate, size_t fperiod)
820 | {
821 | /* set parameter */
822 | v->is_first = TRUE;
823 | v->stage = stage;
824 | if (stage != 0)
825 | v->gamma = -1.0 / v->stage;
826 | else
827 | v->gamma = 0.0;
828 | v->use_log_gain = use_log_gain;
829 | v->fprd = fperiod;
830 | v->next = SEED;
831 | v->gauss = GAUSS;
832 | v->rate = rate;
833 | v->pitch_of_curr_point = 0.0;
834 | v->pitch_counter = 0.0;
835 | v->pitch_inc_per_point = 0.0;
836 | v->excite_ring_buff = NULL;
837 | v->excite_buff_size = 0;
838 | v->excite_buff_index = 0;
839 | v->sw = 0;
840 | v->x = 0x55555555;
841 | /* init buffer */
842 | v->freqt_buff = NULL;
843 | v->freqt_size = 0;
844 | v->gc2gc_buff = NULL;
845 | v->gc2gc_size = 0;
846 | v->lsp2lpc_buff = NULL;
847 | v->lsp2lpc_size = 0;
848 | v->postfilter_buff = NULL;
849 | v->postfilter_size = 0;
850 | v->spectrum2en_buff = NULL;
851 | v->spectrum2en_size = 0;
852 | if (v->stage == 0) { /* for MCP */
853 | v->c = (double *) HTS_calloc(m * (3 + PADEORDER) + 5 * PADEORDER + 6, sizeof(double));
854 | v->cc = v->c + m + 1;
855 | v->cinc = v->cc + m + 1;
856 | v->d1 = v->cinc + m + 1;
857 | } else { /* for LSP */
858 | v->c = (double *) HTS_calloc((m + 1) * (v->stage + 3), sizeof(double));
859 | v->cc = v->c + m + 1;
860 | v->cinc = v->cc + m + 1;
861 | v->d1 = v->cinc + m + 1;
862 | }
863 | }
864 |
865 | /* HTS_Vocoder_synthesize: pulse/noise excitation and MLSA/MGLSA filster based waveform synthesis */
866 | void HTS_Vocoder_synthesize(HTS_Vocoder * v, size_t m, double lf0, double *spectrum, size_t nlpf, double *lpf, double alpha, double beta, double volume, double *rawdata, HTS_Audio * audio)
867 | {
868 | double x;
869 | int i, j;
870 | short xs;
871 | int rawidx = 0;
872 | double p;
873 |
874 | /* lf0 -> pitch */
875 | if (lf0 == LZERO)
876 | p = 0.0;
877 | else if (lf0 <= MIN_LF0)
878 | p = v->rate / MIN_F0;
879 | else if (lf0 >= MAX_LF0)
880 | p = v->rate / MAX_F0;
881 | else
882 | p = v->rate / exp(lf0);
883 |
884 | /* first time */
885 | if (v->is_first == TRUE) {
886 | HTS_Vocoder_initialize_excitation(v, p, nlpf);
887 | if (v->stage == 0) { /* for MCP */
888 | HTS_mc2b(spectrum, v->c, m, alpha);
889 | } else { /* for LSP */
890 | HTS_movem(spectrum, v->c, m + 1);
891 | HTS_lsp2mgc(v, v->c, v->c, m, alpha);
892 | HTS_mc2b(v->c, v->c, m, alpha);
893 | HTS_gnorm(v->c, v->c, m, v->gamma);
894 | for (i = 1; i <= m; i++)
895 | v->c[i] *= v->gamma;
896 | }
897 | v->is_first = FALSE;
898 | }
899 |
900 | HTS_Vocoder_start_excitation(v, p);
901 | if (v->stage == 0) { /* for MCP */
902 | HTS_Vocoder_postfilter_mcp(v, spectrum, m, alpha, beta);
903 | HTS_mc2b(spectrum, v->cc, m, alpha);
904 | for (i = 0; i <= m; i++)
905 | v->cinc[i] = (v->cc[i] - v->c[i]) / v->fprd;
906 | } else { /* for LSP */
907 | HTS_Vocoder_postfilter_lsp(v, spectrum, m, alpha, beta);
908 | HTS_check_lsp_stability(spectrum, m);
909 | HTS_lsp2mgc(v, spectrum, v->cc, m, alpha);
910 | HTS_mc2b(v->cc, v->cc, m, alpha);
911 | HTS_gnorm(v->cc, v->cc, m, v->gamma);
912 | for (i = 1; i <= m; i++)
913 | v->cc[i] *= v->gamma;
914 | for (i = 0; i <= m; i++)
915 | v->cinc[i] = (v->cc[i] - v->c[i]) / v->fprd;
916 | }
917 |
918 | for (j = 0; j < v->fprd; j++) {
919 | x = HTS_Vocoder_get_excitation(v, lpf);
920 | if (v->stage == 0) { /* for MCP */
921 | if (x != 0.0)
922 | x *= exp(v->c[0]);
923 | x = HTS_mlsadf(x, v->c, m, alpha, PADEORDER, v->d1);
924 | } else { /* for LSP */
925 | if (!NGAIN)
926 | x *= v->c[0];
927 | x = HTS_mglsadf(x, v->c, m, alpha, v->stage, v->d1);
928 | }
929 | x *= volume;
930 |
931 | /* output */
932 | if (rawdata)
933 | rawdata[rawidx++] = x;
934 | if (audio) {
935 | if (x > 32767.0)
936 | xs = 32767;
937 | else if (x < -32768.0)
938 | xs = -32768;
939 | else
940 | xs = (short) x;
941 | HTS_Audio_write(audio, xs);
942 | }
943 |
944 | for (i = 0; i <= m; i++)
945 | v->c[i] += v->cinc[i];
946 | }
947 |
948 | HTS_Vocoder_end_excitation(v, p);
949 | HTS_movem(v->cc, v->c, m + 1);
950 | }
951 |
952 | /* HTS_Vocoder_clear: clear vocoder */
953 | void HTS_Vocoder_clear(HTS_Vocoder * v)
954 | {
955 | if (v != NULL) {
956 | /* free buffer */
957 | if (v->freqt_buff != NULL) {
958 | HTS_free(v->freqt_buff);
959 | v->freqt_buff = NULL;
960 | }
961 | v->freqt_size = 0;
962 | if (v->gc2gc_buff != NULL) {
963 | HTS_free(v->gc2gc_buff);
964 | v->gc2gc_buff = NULL;
965 | }
966 | v->gc2gc_size = 0;
967 | if (v->lsp2lpc_buff != NULL) {
968 | HTS_free(v->lsp2lpc_buff);
969 | v->lsp2lpc_buff = NULL;
970 | }
971 | v->lsp2lpc_size = 0;
972 | if (v->postfilter_buff != NULL) {
973 | HTS_free(v->postfilter_buff);
974 | v->postfilter_buff = NULL;
975 | }
976 | v->postfilter_size = 0;
977 | if (v->spectrum2en_buff != NULL) {
978 | HTS_free(v->spectrum2en_buff);
979 | v->spectrum2en_buff = NULL;
980 | }
981 | v->spectrum2en_size = 0;
982 | if (v->c != NULL) {
983 | HTS_free(v->c);
984 | v->c = NULL;
985 | }
986 | v->excite_buff_size = 0;
987 | v->excite_buff_index = 0;
988 | if (v->excite_ring_buff != NULL) {
989 | HTS_free(v->excite_ring_buff);
990 | v->excite_ring_buff = NULL;
991 | }
992 | }
993 | }
994 |
995 | HTS_VOCODER_C_END;
996 |
997 | #endif /* !HTS_VOCODER_C */
998 |
--------------------------------------------------------------------------------
/src/lib/Makefile.am:
--------------------------------------------------------------------------------
1 |
2 | EXTRA_DIST = Makefile.mak
3 |
4 | AM_CPPFLAGS = -I @top_srcdir@/include
5 |
6 | lib_LIBRARIES = libHTSEngine.a
7 |
8 | libHTSEngine_a_SOURCES = HTS_audio.c HTS_engine.c HTS_hidden.h HTS_misc.c \
9 | HTS_pstream.c HTS_sstream.c HTS_model.c HTS_vocoder.c \
10 | HTS_gstream.c HTS_label.c
11 |
12 | DISTCLEANFILES = *.log *.out *~
13 |
14 | MAINTAINERCLEANFILES = Makefile.in
15 |
--------------------------------------------------------------------------------
/src/lib/Makefile.mak:
--------------------------------------------------------------------------------
1 |
2 | CC = cl
3 |
4 | CFLAGS = /O2 /Ob2 /Oi /Ot /Oy /GT /GL /TC /I ..\include
5 | LFLAGS = /LTCG
6 |
7 | CORES = HTS_audio.obj HTS_engine.obj HTS_gstream.obj HTS_label.obj HTS_misc.obj HTS_model.obj HTS_pstream.obj HTS_sstream.obj HTS_vocoder.obj
8 |
9 | all: hts_engine_API.lib
10 |
11 | hts_engine_API.lib: $(CORES)
12 | lib $(LFLAGS) /OUT:$@ $(CORES)
13 |
14 | .c.obj:
15 | $(CC) $(CFLAGS) /c $<
16 |
17 | clean:
18 | del *.lib
19 | del *.obj
20 |
--------------------------------------------------------------------------------