├── .cvsignore
├── .github
    └── workflows
    │   └── ccpp.yaml
├── .gitignore
├── .project
├── .travis.yml
├── README.md
├── appveyor.yml
├── msys_build.sh
└── src
    ├── .cvsignore
    ├── AUTHORS
    ├── CMakeLists.txt
    ├── COPYING
    ├── INSTALL
    ├── Makefile.am
    ├── Makefile.mak
    ├── NEWS
    ├── README
    ├── bin
        ├── .cvsignore
        ├── Makefile.am
        ├── Makefile.mak
        └── hts_engine.c
    ├── config
        └── .cvsignore
    ├── configure.ac
    ├── hts_engine_API.pc.in
    ├── include
        └── HTS_engine.h
    └── lib
        ├── .cvsignore
        ├── HTS_audio.c
        ├── HTS_engine.c
        ├── HTS_gstream.c
        ├── HTS_hidden.h
        ├── HTS_label.c
        ├── HTS_misc.c
        ├── HTS_model.c
        ├── HTS_pstream.c
        ├── HTS_sstream.c
        ├── HTS_vocoder.c
        ├── Makefile.am
        └── Makefile.mak


/.cvsignore:
--------------------------------------------------------------------------------
1 | memo.txt
2 | 


--------------------------------------------------------------------------------
/.github/workflows/ccpp.yaml:
--------------------------------------------------------------------------------
 1 | name: C/C++ CI
 2 | 
 3 | on:
 4 |   push:
 5 |     branches: [ master ]
 6 |   pull_request:
 7 |     branches: [ master ]
 8 | 
 9 | jobs:
10 |   build:
11 |     strategy:
12 |       matrix:
13 |         build_type: [ Release ]
14 |         config:
15 |           - os: ubuntu-latest
16 |             cc: gcc
17 |             cxx: g++
18 |           - os: ubuntu-latest
19 |             cc: clang
20 |             cxx: clang++
21 |           - os: macos-latest
22 |             cc: gcc
23 |             cxx: g++
24 |           - os: macos-latest
25 |             cc: clang
26 |             cxx: clang++
27 |           - os: windows-latest
28 |             cc: cl
29 |             cxx: cl
30 | 
31 |     env:
32 |       CC: ${{ matrix.config.cc }}
33 |       CXX: ${{ matrix.config.cxx }}
34 | 
35 |     runs-on: ${{ matrix.config.os }}
36 | 
37 |     steps:
38 |       - uses: actions/checkout@v2
39 |       - name: Build
40 |         working-directory: src
41 |         run: |
42 |           cmake -D CMAKE_BUILD_TYPE=${{ matrix.build_type }} -D BUILD_SHARED_LIBS=ON -S . -B build
43 |           cmake --build build --config ${{ matrix.build_type }}
44 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Created by https://www.gitignore.io
  2 | 
  3 | ### Emacs ###
  4 | # -*- mode: gitignore; -*-
  5 | *~
  6 | \#*\#
  7 | /.emacs.desktop
  8 | /.emacs.desktop.lock
  9 | *.elc
 10 | auto-save-list
 11 | tramp
 12 | .\#*
 13 | 
 14 | # Org-mode
 15 | .org-id-locations
 16 | *_archive
 17 | 
 18 | # flymake-mode
 19 | *_flymake.*
 20 | 
 21 | # eshell files
 22 | /eshell/history
 23 | /eshell/lastdir
 24 | 
 25 | # elpa packages
 26 | /elpa/
 27 | 
 28 | # reftex files
 29 | *.rel
 30 | 
 31 | # AUCTeX auto folder
 32 | /auto/
 33 | 
 34 | # cask packages
 35 | .cask/
 36 | 
 37 | 
 38 | ### vim ###
 39 | [._]*.s[a-w][a-z]
 40 | [._]s[a-w][a-z]
 41 | *.un~
 42 | Session.vim
 43 | .netrwhist
 44 | *~
 45 | 
 46 | 
 47 | ### C++ ###
 48 | # Compiled Object files
 49 | *.slo
 50 | *.lo
 51 | *.o
 52 | *.obj
 53 | 
 54 | # Precompiled Headers
 55 | *.gch
 56 | *.pch
 57 | 
 58 | # Compiled Dynamic libraries
 59 | *.so
 60 | *.dylib
 61 | *.dll
 62 | 
 63 | # Fortran module files
 64 | *.mod
 65 | 
 66 | # Compiled Static libraries
 67 | *.lai
 68 | *.la
 69 | *.a
 70 | *.lib
 71 | 
 72 | # Executables
 73 | *.exe
 74 | *.out
 75 | *.app
 76 | 
 77 | 
 78 | ### Autotools ###
 79 | # http://www.gnu.org/software/automake
 80 | 
 81 | Makefile.in
 82 | 
 83 | # http://www.gnu.org/software/autoconf
 84 | 
 85 | /autom4te.cache
 86 | /aclocal.m4
 87 | /compile
 88 | /configure
 89 | /depcomp
 90 | /install-sh
 91 | /missing
 92 | /stamp-h1
 93 | 
 94 | 
 95 | ### CMake ###
 96 | CMakeCache.txt
 97 | CMakeFiles
 98 | Makefile
 99 | cmake_install.cmake
100 | install_manifest.txt
101 | 
102 | 
103 | ### Linux ###
104 | *~
105 | 
106 | # KDE directory preferences
107 | .directory
108 | 
109 | 
110 | ### OSX ###
111 | .DS_Store
112 | .AppleDouble
113 | .LSOverride
114 | 
115 | # Icon must end with two \r
116 | Icon
117 | 
118 | 
119 | # Thumbnails
120 | ._*
121 | 
122 | # Files that might appear on external disk
123 | .Spotlight-V100
124 | .Trashes
125 | 
126 | # Directories potentially created on remote AFP share
127 | .AppleDB
128 | .AppleDesktop
129 | Network Trash Folder
130 | Temporary Items
131 | .apdisk
132 | 
133 | 
134 | ### Windows ###
135 | # Windows image file caches
136 | Thumbs.db
137 | ehthumbs.db
138 | 
139 | # Folder config file
140 | Desktop.ini
141 | 
142 | # Recycle Bin used on file shares
143 | $RECYCLE.BIN/
144 | 
145 | # Windows Installer files
146 | *.cab
147 | *.msi
148 | *.msm
149 | *.msp
150 | 
151 | # Windows shortcuts
152 | *.lnk
153 | 
154 | # manually added
155 | .waf*
156 | .dropbox
157 | .lock-*
158 | *.lib
159 | *.log
160 | build
161 | Makefile
162 | .deps/
163 | config.status


--------------------------------------------------------------------------------
/.project:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <projectDescription>
 3 | 	<name>sourceforge_hts_engine_API</name>
 4 | 	<comment></comment>
 5 | 	<projects>
 6 | 	</projects>
 7 | 	<buildSpec>
 8 | 	</buildSpec>
 9 | 	<natures>
10 | 	</natures>
11 | </projectDescription>
12 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | language: cpp
 2 | 
 3 | # Ubuntu 18.04 https://docs.travis-ci.com/user/reference/bionic
 4 | # NOTE: this project requires cmake >= 3.11
 5 | dist: bionic
 6 | 
 7 | compiler:
 8 |   - gcc
 9 |   - clang
10 | 
11 | # Just check if build success or not for now
12 | script:
13 |   - cd src
14 |   - mkdir -p build && cd build
15 |   - cmake -DCMAKE_BUILD_TYPE=Release -DBUILD_SHARED_LIBS=ON ..
16 |   - make -j
17 | 
18 | notifications:
19 |   email: false
20 | 
21 | os:
22 |   - linux
23 |   - osx
24 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # hts_engine_API
 2 | 
 3 | 
 4 | ![C/C++ CI](https://github.com/r9y9/hts_engine_API/workflows/C/C++%20CI/badge.svg)
 5 | [![Build Status](https://travis-ci.org/r9y9/hts_engine_API.svg?branch=master)](https://travis-ci.org/r9y9/hts_engine_API)
 6 | [![Build status](https://ci.appveyor.com/api/projects/status/7tm96g50a9i43mhl/branch/master?svg=true)](https://ci.appveyor.com/project/r9y9/hts-engine-api/branch/master)
 7 | 
 8 | A fork of hts_engine_API
 9 | 
10 | ## Why
11 | 
12 | Wanted to fork it with *git*.
13 | 
14 | **NOTE**: To preserve history of cvs version of hts_engine_API, this fork was originially created by:
15 | 
16 | ```
17 | git cvsimport -v \
18 |   -d :pserver:anonymous@hts-engine.cvs.sourceforge.net:/cvsroot/hts-engine \
19 |   -C hts_engine_API hts_engine_API
20 | ```
21 | 
22 | ## Supported platforms
23 | 
24 | - Linux
25 | - Mac OS X
26 | - Windows (gcc/msvc)
27 | 
28 | ## Changes
29 | 
30 | The important changes from the original hts_engine_API are summarized below:
31 | 
32 | - CMake support
33 | - Add pkg-config suppport
34 | - Continuous integratioin support
35 | - keep sementic versioning http://semver.org/
36 | 


--------------------------------------------------------------------------------
/appveyor.yml:
--------------------------------------------------------------------------------
 1 | environment:
 2 |   PYTHON: "C:/Python36"
 3 |   matrix:
 4 | # NOTE: we may want to revert this back if needed
 5 | #    - COMPILER: gcc
 6 | #      ARCH: "i686"
 7 | #
 8 | #    - COMPILER: gcc
 9 | #      ARCH: "x86_64"
10 | 
11 |     - COMPILER: msvc
12 |       ARCH: "i686"
13 | 
14 |     - COMPILER: msvc
15 |       ARCH: "x86_64"
16 | 
17 | init:
18 |   - "ECHO %PYTHON%"
19 |   - ps: "ls C:/Python*"
20 |   - "SET PATH=%PYTHON%;%PYTHON%\\Scripts;%PATH%"
21 | 
22 | cache:
23 |   - mingw-w32-bin-i686-20200211.7z
24 |   - mingw-w64-bin-x86_64-20200211.7z
25 | 
26 | skip_commits:
27 | # Add [av skip] to commit messages for docfixes, etc to reduce load on queue
28 |   message: /\[av skip\]/
29 | 
30 | notifications:
31 |   - provider: Email
32 |     on_build_success: false
33 |     on_build_failure: false
34 |     on_build_status_changed: false
35 | 
36 | build_script:
37 |   - C:\MinGW\msys\1.0\bin\sh --login /c/projects/hts-engine-api/msys_build.sh
38 | 
39 | artifacts:
40 |   - path: '**\*.dll'
41 |     name: hts_engine_API
42 | 


--------------------------------------------------------------------------------
/msys_build.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | 
 3 | set -e
 4 | 
 5 | if [ "$ARCH" = x86_64 ]; then
 6 |   bits=64
 7 | else
 8 |   bits=32
 9 | fi
10 | 
11 | # Use this mingw instead of the pre-installed mingw on Appveyor
12 | if [ "$COMPILER" = gcc ]; then
13 |     f=mingw-w$bits-bin-$ARCH-20200211.7z
14 |     if ! [ -e $f ]; then
15 | 	echo "Downloading $f"
16 | 	curl -LsSO https://sourceforge.net/projects/mingw-w64-dgn/files/mingw-w64/$f
17 |     fi
18 |     7z x $f > /dev/null
19 |     export PATH=$PWD/mingw$bits/bin:$PATH
20 |     export CC=$PWD/mingw$bits/bin/gcc
21 | fi
22 | 
23 | # Build
24 | cd /c/projects/hts-engine-api/src
25 | mkdir -p build && cd build
26 | # NOTE: it seems this does not work as expected...
27 | if [ "$COMPILER" = gcc ]; then
28 |   cmake -DCMAKE_C_COMPILER=gcc -DCMAKE_CXX_COMPILER=g++ ..
29 | else
30 |   cmake ..
31 | fi
32 | cmake --build . --config Release


--------------------------------------------------------------------------------
/src/.cvsignore:
--------------------------------------------------------------------------------
 1 | autom4te.cache
 2 | aclocal.m4
 3 | config.status
 4 | cvs2cl.pl
 5 | Makefile
 6 | Makefile.in
 7 | configure
 8 | format.sh
 9 | makeclean.sh
10 | test_mcp.sh
11 | test_mcp
12 | conf.sh
13 | *test*
14 | config.log
15 | hts_engine_API-0.99.tar.*
16 | *.bz2
17 | 


--------------------------------------------------------------------------------
/src/AUTHORS:
--------------------------------------------------------------------------------
 1 | The hts_engine API is software to synthesize speech waveform from HMMs trained
 2 | by the HMM-based speech synthesis system (HTS). This software is released
 3 | under the Modified BSD license. See the COPYING file in the same directory as
 4 | this file for the license.
 5 | 
 6 | The hts_engine_API has been developed by several members of HTS working group
 7 | and some graduate students in Nagoya Institute of Technology:
 8 | 
 9 |  Keiichi Tokuda      http://www.sp.nitech.ac.jp/~tokuda/
10 |  (Produce and Design)
11 |  Keiichiro Oura      http://www.sp.nitech.ac.jp/~uratec/
12 |  (Design and Development, Main Maintainer)
13 |  Heiga Zen
14 |  Shinji Sako         http://www.mmsp.nitech.ac.jp/~sako/
15 | 


--------------------------------------------------------------------------------
/src/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | cmake_minimum_required(VERSION 2.8)
 2 | 
 3 | # The name "HTSEngine" is used in the original makefile
 4 | # Due to historical reasons, let me use "hts_engine_API" instead first.
 5 | # we may want to use "HTSEngine" in the future.
 6 | # NOTE: we should use semanitc versioning. 1.09 -> 1.0.9!
 7 | project(hts_engine_API)
 8 | 
 9 | set(PROJECT_VER_MAJOR 1)
10 | set(PROJECT_VER_MINOR 0)
11 | set(PROJECT_VER_PATCH 9)
12 | set(PROJECT_VER "${PROJECT_VER_MAJOR}.${PROJECT_VER_MINOR}.${PROJECT_VER_PATCH}")
13 | set(PROJECT_APIVER "${PROJECT_VER_MAJOR}.${PROJECT_VER_MINOR}")
14 | 
15 | option(AUDIO_PLAY_NONE "Disable audio playing support" ON)
16 | 
17 | # NOTE: we can use portaudio or win32 if we really want.
18 | # I disable it by default assuming most people don't want audio playing functionality
19 | if (${AUDIO_PLAY_NONE})
20 |   add_compile_options("-D AUDIO_PLAY_NONE")
21 | endif ()
22 | 
23 | # set the default path for built executables to the "bin" directory
24 | set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${PROJECT_BINARY_DIR}/bin)
25 | 
26 | # set the default path for built libraries to the "lib" directory
27 | set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${PROJECT_BINARY_DIR}/lib)
28 | set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${PROJECT_BINARY_DIR}/lib)
29 | 
30 | configure_file(hts_engine_API.pc.in "${PROJECT_BINARY_DIR}/hts_engine_API.pc" @ONLY)
31 | 
32 | file(GLOB_RECURSE SOURCE_FILES CONFIGURE_DEPENDS ${PROJECT_SOURCE_DIR}/lib/*.c)
33 | file(GLOB_RECURSE HEADER_FILES CONFIGURE_DEPENDS ${PROJECT_SOURCE_DIR}/lib/*.h ${PROJECT_SOURCE_DIR}/include/*.h)
34 | 
35 | include_directories(${PROJECT_SOURCE_DIR}/include)
36 | 
37 | # The hts_engine_API library
38 | add_library(${PROJECT_NAME} ${SOURCE_FILES} ${HEADER_FILES})
39 | set_target_properties(${PROJECT_NAME} PROPERTIES
40 |   VERSION ${PROJECT_VER}
41 |   SOVERSION ${PROJECT_APIVER}
42 | )
43 | 
44 | # hts_engine binary
45 | add_executable(hts_engine bin/hts_engine.c)
46 | target_link_libraries(hts_engine hts_engine_API)
47 | 
48 | if(${CMAKE_SYSTEM_NAME} STREQUAL "Windows")
49 |     target_link_libraries(${PROJECT_NAME} winmm)
50 |     target_link_libraries(hts_engine winmm)
51 | else()
52 |     target_link_libraries(hts_engine m)
53 | endif()
54 | 
55 | install(TARGETS ${PROJECT_NAME} hts_engine DESTINATION lib RUNTIME DESTINATION bin)
56 | install(FILES include/HTS_engine.h DESTINATION include)
57 | install(FILES "${PROJECT_BINARY_DIR}/hts_engine_API.pc" DESTINATION lib/pkgconfig/)


--------------------------------------------------------------------------------
/src/COPYING:
--------------------------------------------------------------------------------
 1 | /* ----------------------------------------------------------------- */
 2 | /*           The HMM-Based Speech Synthesis Engine "hts_engine API"  */
 3 | /*           developed by HTS Working Group                          */
 4 | /*           http://hts-engine.sourceforge.net/                      */
 5 | /* ----------------------------------------------------------------- */
 6 | /*                                                                   */
 7 | /*  Copyright (c) 2001-2014  Nagoya Institute of Technology          */
 8 | /*                           Department of Computer Science          */
 9 | /*                                                                   */
10 | /*                2001-2008  Tokyo Institute of Technology           */
11 | /*                           Interdisciplinary Graduate School of    */
12 | /*                           Science and Engineering                 */
13 | /*                                                                   */
14 | /* All rights reserved.                                              */
15 | /*                                                                   */
16 | /* Redistribution and use in source and binary forms, with or        */
17 | /* without modification, are permitted provided that the following   */
18 | /* conditions are met:                                               */
19 | /*                                                                   */
20 | /* - Redistributions of source code must retain the above copyright  */
21 | /*   notice, this list of conditions and the following disclaimer.   */
22 | /* - Redistributions in binary form must reproduce the above         */
23 | /*   copyright notice, this list of conditions and the following     */
24 | /*   disclaimer in the documentation and/or other materials provided */
25 | /*   with the distribution.                                          */
26 | /* - Neither the name of the HTS working group nor the names of its  */
27 | /*   contributors may be used to endorse or promote products derived */
28 | /*   from this software without specific prior written permission.   */
29 | /*                                                                   */
30 | /* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND            */
31 | /* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,       */
32 | /* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF          */
33 | /* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE          */
34 | /* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS */
35 | /* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,          */
36 | /* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED   */
37 | /* TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,     */
38 | /* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON */
39 | /* ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,   */
40 | /* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY    */
41 | /* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE           */
42 | /* POSSIBILITY OF SUCH DAMAGE.                                       */
43 | /* ----------------------------------------------------------------- */
44 | 


--------------------------------------------------------------------------------
/src/INSTALL:
--------------------------------------------------------------------------------
 1 | Installation Instructions
 2 | *************************
 3 | 
 4 | 1. Cd to ./src directory.
 5 | 
 6 |    % cd src
 7 | 
 8 | 2. Create "build" directory and cd to ./src/build.
 9 | 
10 |    % mkdir -p build && cd build
11 | 
12 | 3. Run "cmake" to create Makefile and "make" to compile.
13 | 
14 |    % cmake -DCMAKE_BUILD_TYPE=Release -DBUILD_SHARED_LIBS=ON ..
15 |    % make -j
16 | 
17 | 4. Install library and binary.
18 | 
19 |    % make install


--------------------------------------------------------------------------------
/src/Makefile.am:
--------------------------------------------------------------------------------
 1 | 
 2 | EXTRA_DIST = AUTHORS COPYING ChangeLog INSTALL NEWS README Makefile.mak
 3 | 
 4 | SUBDIRS = lib bin
 5 | 
 6 | include_HEADERS = include/HTS_engine.h
 7 | 
 8 | DISTCLEANFILES = *.log *.out *~ config/*~ include/*~
 9 | 
10 | MAINTAINERCLEANFILES = aclocal.m4 configure Makefile.in config/compile \
11 |                        config/config.guess config/config.sub config/depcomp \
12 |                        config/install-sh config/missing
13 | 


--------------------------------------------------------------------------------
/src/Makefile.mak:
--------------------------------------------------------------------------------
 1 | 
 2 | INSTALLDIR = C:\hts_engine_API
 3 | 
 4 | all:
 5 | 	cd lib
 6 | 	nmake /f Makefile.mak
 7 | 	cd ..
 8 | 	cd bin
 9 | 	nmake /f Makefile.mak
10 | 	cd ..
11 | 
12 | clean:
13 | 	cd lib
14 | 	nmake /f Makefile.mak clean
15 | 	cd ..
16 | 	cd bin
17 | 	nmake /f Makefile.mak clean
18 | 	cd ..
19 | 
20 | install::
21 | 	@if not exist "$(INSTALLDIR)\lib" mkdir "$(INSTALLDIR)\lib"
22 | 	cd lib
23 | 	copy *.lib $(INSTALLDIR)\lib
24 | 	cd ..
25 | 	@if not exist "$(INSTALLDIR)\bin" mkdir "$(INSTALLDIR)\bin"
26 | 	cd bin
27 | 	copy *.exe $(INSTALLDIR)\bin
28 | 	cd ..
29 | 	@if not exist "$(INSTALLDIR)\include" mkdir "$(INSTALLDIR)\include"
30 | 	cd include
31 | 	copy *.h $(INSTALLDIR)\include
32 | 	cd ..
33 | 


--------------------------------------------------------------------------------
/src/NEWS:
--------------------------------------------------------------------------------
 1 | Version 1.09:
 2 |  * add '-g' option to change volume.
 3 |  * add some functions to get fullcontext label format and version defined in HTS voice.
 4 |  * support 64-bit Windows audio.
 5 |  * bug fixes.
 6 | 
 7 | Version 1.08:
 8 |  * support 64-bit machine.
 9 |  * re-write excitation function.
10 |  * add some functions to get generated parameters and number of frames.
11 |  * add some functions for separating synthesis step.
12 |  * bug fixes.
13 | 
14 | Version 1.07:
15 |  * support new HTS voice format.
16 |  * add LSP postfilter.
17 |  * change volume unit to DB.
18 |  * add function to get generated speech.
19 |  * bug fixes.
20 | 
21 | Version 1.06:
22 |  * modify state duration calculation algorithm.
23 |  * change many function from void to boolean.
24 |  * change source format.
25 |  * bug fixes.
26 | 
27 | Version 1.05:
28 |  * support PortAudio for audio output.
29 | 
30 | Version 1.04:
31 |  * support C++ compiler.
32 |  * add low-pass filter stream.
33 |  * change definition of GV weights.
34 |  * add stop switch.
35 |  * add volume controller.
36 |  * bug fixes.
37 | 
38 | Version 1.03:
39 |  * add '-z' option to control audio buffer size.
40 |  * change PDF file format.
41 |  * bug fixes.
42 | 
43 | Version 1.02:
44 |  * context-dependent GV without silent and pause phoneme.
45 |  * buffer size control of audio output for Windows.
46 |  * makefiles for nmake of VC.
47 |  * save detail information.
48 |  * support singing voice synthesis.
49 |  * bug fixes.
50 | 
51 | Version 1.01:
52 |  * bug fixes.
53 |  * transfer site to SourceForge.
54 | 
55 | Version 1.00:
56 |  * bug fixes and performance improvements.
57 |  * support linear gain in addition to log gain for LSP-type parameters.
58 |  * first stable release.
59 | 
60 | Version 0.99:
61 |  * bug fixes.
62 |  * switch license to the New and Simplified BSD license.
63 | 
64 | Version 0.96:
65 |  * bug fixes.
66 |  * support flexible model structure.
67 |  * support LSP-type parameters.
68 | 
69 | Version 0.95:
70 |  * support GV.
71 | 
72 | Version 0.9:
73 |  * implement API-style hts_engine.
74 |  * support speaker-interpolation.
75 | 


--------------------------------------------------------------------------------
/src/README:
--------------------------------------------------------------------------------
  1 | ===============================================================================
  2 |       The HMM-Based Speech Synthesis Engine "hts_engine API" version 1.09
  3 |                            release December 25, 2014
  4 | 
  5 | 
  6 | The hts_engine API is an API version of hts_engine which has been released
  7 | since HTS version 1.1. It has been being developed by the HTS working group
  8 | (see "Who we are" below) and some graduate students in Nagoya Institute of
  9 | Technology (see "AUTHORS" in the same directory).
 10 | 
 11 | *******************************************************************************
 12 |                                     Copying
 13 | *******************************************************************************
 14 | 
 15 | The hts_engine API is released under the Modified BSD license (see
 16 | http://www.opensource.org/). Using and distributing this software is free
 17 | (without restriction including without limitation the rights to use, copy,
 18 | modify, merge, publish, distribute, sublicense, and/or sell copies of this
 19 | work, and to permit persons to whom this work is furnished to do so) subject to
 20 | the conditions in the following license:
 21 | 
 22 | /* ----------------------------------------------------------------- */
 23 | /*           The HMM-Based Speech Synthesis Engine "hts_engine API"  */
 24 | /*           developed by HTS Working Group                          */
 25 | /*           http://hts-engine.sourceforge.net/                      */
 26 | /* ----------------------------------------------------------------- */
 27 | /*                                                                   */
 28 | /*  Copyright (c) 2001-2014  Nagoya Institute of Technology          */
 29 | /*                           Department of Computer Science          */
 30 | /*                                                                   */
 31 | /*                2001-2008  Tokyo Institute of Technology           */
 32 | /*                           Interdisciplinary Graduate School of    */
 33 | /*                           Science and Engineering                 */
 34 | /*                                                                   */
 35 | /* All rights reserved.                                              */
 36 | /*                                                                   */
 37 | /* Redistribution and use in source and binary forms, with or        */
 38 | /* without modification, are permitted provided that the following   */
 39 | /* conditions are met:                                               */
 40 | /*                                                                   */
 41 | /* - Redistributions of source code must retain the above copyright  */
 42 | /*   notice, this list of conditions and the following disclaimer.   */
 43 | /* - Redistributions in binary form must reproduce the above         */
 44 | /*   copyright notice, this list of conditions and the following     */
 45 | /*   disclaimer in the documentation and/or other materials provided */
 46 | /*   with the distribution.                                          */
 47 | /* - Neither the name of the HTS working group nor the names of its  */
 48 | /*   contributors may be used to endorse or promote products derived */
 49 | /*   from this software without specific prior written permission.   */
 50 | /*                                                                   */
 51 | /* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND            */
 52 | /* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,       */
 53 | /* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF          */
 54 | /* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE          */
 55 | /* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS */
 56 | /* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,          */
 57 | /* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED   */
 58 | /* TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,     */
 59 | /* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON */
 60 | /* ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,   */
 61 | /* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY    */
 62 | /* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE           */
 63 | /* POSSIBILITY OF SUCH DAMAGE.                                       */
 64 | /* ----------------------------------------------------------------- */
 65 | 
 66 | Although this software is free, we still offer no warranties and no
 67 | maintenance. We will continue to endeavor to fix bugs and answer queries when
 68 | can, but are not in a position to guarantee it. We will consider consultancy if
 69 | desired, please contacts us for details.
 70 | 
 71 | If you are using the hts_engine API in commercial environments, even though no
 72 | license is required, we would be grateful if you let us know as it helps
 73 | justify ourselves to our various sponsors. We also strongly encourage you to
 74 | 
 75 |  * refer to the use of hts_engine API in any publications that use this
 76 |    software
 77 |  * report bugs, where possible with bug fixes, that are found
 78 | 
 79 | See also "COPYING" file in the current directory for details.
 80 | 
 81 | *******************************************************************************
 82 |                                  Installation
 83 | *******************************************************************************
 84 | 
 85 | See "INSTALL" in the same directory for details.
 86 | 
 87 | *******************************************************************************
 88 |                                  Documentation
 89 | *******************************************************************************
 90 | 
 91 | Reference manual of hts_engine API is available at
 92 | 
 93 | http://hts-engine.sourceforge.net/
 94 | 
 95 | *******************************************************************************
 96 |                                Acknowledgements
 97 | *******************************************************************************
 98 | 
 99 | Keiichi Tokuda
100 | Shinji Sako
101 | Heiga Zen
102 | Keiichiro Oura
103 | Kazuhiro Nakamura
104 | Keijiro Saino
105 | 
106 | *******************************************************************************
107 |                                   Who we are
108 | *******************************************************************************
109 | 
110 | The HTS working group is a voluntary group for developing the HMM-Based Speech
111 | Synthesis System. Current members are
112 | 
113 |  Keiichi Tokuda      http://www.sp.nitech.ac.jp/~tokuda/
114 |  (Produce and Design)
115 |  Keiichiro Oura      http://www.sp.nitech.ac.jp/~uratec/
116 |  (Design and Development, Main Maintainer)
117 |  Kei Hashimoto       http://www.sp.nitech.ac.jp/~bonanza/
118 |  Sayaka Shiota       http://www.sp.nitech.ac.jp/~sayaka/
119 |  Shinji Takaki       http://www.sp.nitech.ac.jp/~k-prr44/
120 |  Heiga Zen
121 |  Junichi Yamagishi   http://homepages.inf.ed.ac.uk/jyamagis/
122 |  Tomoki Toda         http://spalab.naist.jp/~tomoki/index_e.html
123 |  Takashi Nose
124 |  Shinji Sako         http://www.mmsp.nitech.ac.jp/~sako/
125 |  Alan W. Black       http://www.cs.cmu.edu/~awb/
126 | 
127 | and the members are dynamically changing. The current formal contact address of
128 | HTS working group and a mailing list for HTS users can be found at
129 | http://hts.sp.nitech.ac.jp/
130 | ===============================================================================
131 | 


--------------------------------------------------------------------------------
/src/bin/.cvsignore:
--------------------------------------------------------------------------------
1 | Makefile.in
2 | hts_engine
3 | Makefile
4 | .deps
5 | 


--------------------------------------------------------------------------------
/src/bin/Makefile.am:
--------------------------------------------------------------------------------
 1 | 
 2 | EXTRA_DIST = Makefile.mak
 3 | 
 4 | AM_CPPFLAGS = -I @top_srcdir@/include
 5 | 
 6 | bin_PROGRAMS = hts_engine
 7 | 
 8 | hts_engine_SOURCES = hts_engine.c 
 9 | 
10 | hts_engine_LDADD = ../lib/libHTSEngine.a
11 | 
12 | DISTCLEANFILES = *.log *.out *~
13 | 
14 | MAINTAINERCLEANFILES = Makefile.in
15 | 


--------------------------------------------------------------------------------
/src/bin/Makefile.mak:
--------------------------------------------------------------------------------
 1 | 
 2 | CC = cl
 3 | CL = link
 4 | 
 5 | CFLAGS = /O2 /Ob2 /Oi /Ot /Oy /GT /GL /TC /I ..\include
 6 | LFLAGS = /LTCG
 7 | 
 8 | LIBS = ..\lib\hts_engine_API.lib winmm.lib
 9 | 
10 | all: hts_engine.exe
11 | 
12 | hts_engine.exe : hts_engine.obj
13 | 	$(CC) $(CFLAGS) /c $(@B).c
14 | 	$(CL) $(LFLAGS) /OUT:$@ $(LIBS) $(@B).obj
15 | 
16 | clean:	
17 | 	del *.exe
18 | 	del *.obj
19 | 


--------------------------------------------------------------------------------
/src/bin/hts_engine.c:
--------------------------------------------------------------------------------
  1 | /* ----------------------------------------------------------------- */
  2 | /*           The HMM-Based Speech Synthesis Engine "hts_engine API"  */
  3 | /*           developed by HTS Working Group                          */
  4 | /*           http://hts-engine.sourceforge.net/                      */
  5 | /* ----------------------------------------------------------------- */
  6 | /*                                                                   */
  7 | /*  Copyright (c) 2001-2014  Nagoya Institute of Technology          */
  8 | /*                           Department of Computer Science          */
  9 | /*                                                                   */
 10 | /*                2001-2008  Tokyo Institute of Technology           */
 11 | /*                           Interdisciplinary Graduate School of    */
 12 | /*                           Science and Engineering                 */
 13 | /*                                                                   */
 14 | /* All rights reserved.                                              */
 15 | /*                                                                   */
 16 | /* Redistribution and use in source and binary forms, with or        */
 17 | /* without modification, are permitted provided that the following   */
 18 | /* conditions are met:                                               */
 19 | /*                                                                   */
 20 | /* - Redistributions of source code must retain the above copyright  */
 21 | /*   notice, this list of conditions and the following disclaimer.   */
 22 | /* - Redistributions in binary form must reproduce the above         */
 23 | /*   copyright notice, this list of conditions and the following     */
 24 | /*   disclaimer in the documentation and/or other materials provided */
 25 | /*   with the distribution.                                          */
 26 | /* - Neither the name of the HTS working group nor the names of its  */
 27 | /*   contributors may be used to endorse or promote products derived */
 28 | /*   from this software without specific prior written permission.   */
 29 | /*                                                                   */
 30 | /* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND            */
 31 | /* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,       */
 32 | /* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF          */
 33 | /* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE          */
 34 | /* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS */
 35 | /* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,          */
 36 | /* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED   */
 37 | /* TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,     */
 38 | /* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON */
 39 | /* ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,   */
 40 | /* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY    */
 41 | /* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE           */
 42 | /* POSSIBILITY OF SUCH DAMAGE.                                       */
 43 | /* ----------------------------------------------------------------- */
 44 | 
 45 | #ifndef HTS_ENGINE_C
 46 | #define HTS_ENGINE_C
 47 | 
 48 | #ifdef __cplusplus
 49 | #define HTS_ENGINE_C_START extern "C" {
 50 | #define HTS_ENGINE_C_END   }
 51 | #else
 52 | #define HTS_ENGINE_C_START
 53 | #define HTS_ENGINE_C_END
 54 | #endif                          /* __CPLUSPLUS */
 55 | 
 56 | HTS_ENGINE_C_START;
 57 | 
 58 | #include <stdlib.h>
 59 | 
 60 | #include "HTS_engine.h"
 61 | 
 62 | /* usage: output usage */
 63 | void usage(void)
 64 | {
 65 |    fprintf(stderr, "%s\n", HTS_COPYRIGHT);
 66 |    fprintf(stderr, "hts_engine - The HMM-based speech synthesis engine \"hts_engine API\"\n");
 67 |    fprintf(stderr, "\n");
 68 |    fprintf(stderr, "  usage:\n");
 69 |    fprintf(stderr, "    hts_engine [ options ] [ infile ]\n");
 70 |    fprintf(stderr, "  options:                                                                   [  def][ min-- max]\n");
 71 |    fprintf(stderr, "    -m  htsvoice   : HTS voice files                                         [  N/A]\n");
 72 |    fprintf(stderr, "    -od s          : filename of output label with duration                  [  N/A]\n");
 73 |    fprintf(stderr, "    -om s          : filename of output spectrum                             [  N/A]\n");
 74 |    fprintf(stderr, "    -of s          : filename of output log F0                               [  N/A]\n");
 75 |    fprintf(stderr, "    -ol s          : filename of output low-pass filter                      [  N/A]\n");
 76 |    fprintf(stderr, "    -or s          : filename of output raw audio (generated speech)         [  N/A]\n");
 77 |    fprintf(stderr, "    -ow s          : filename of output wav audio (generated speech)         [  N/A]\n");
 78 |    fprintf(stderr, "    -ot s          : filename of output trace information                    [  N/A]\n");
 79 |    fprintf(stderr, "    -vp            : use phoneme alignment for duration                      [  N/A]\n");
 80 |    fprintf(stderr, "    -i  i f1 .. fi : enable interpolation & specify number(i),coefficient(f) [  N/A]\n");
 81 |    fprintf(stderr, "    -s  i          : sampling frequency                                      [ auto][   1--    ]\n");
 82 |    fprintf(stderr, "    -p  i          : frame period (point)                                    [ auto][   1--    ]\n");
 83 |    fprintf(stderr, "    -a  f          : all-pass constant                                       [ auto][ 0.0-- 1.0]\n");
 84 |    fprintf(stderr, "    -b  f          : postfiltering coefficient                               [  0.0][ 0.0-- 1.0]\n");
 85 |    fprintf(stderr, "    -r  f          : speech speed rate                                       [  1.0][ 0.0--    ]\n");
 86 |    fprintf(stderr, "    -fm f          : additional half-tone                                    [  0.0][    --    ]\n");
 87 |    fprintf(stderr, "    -u  f          : voiced/unvoiced threshold                               [  0.5][ 0.0-- 1.0]\n");
 88 |    fprintf(stderr, "    -jm f          : weight of GV for spectrum                               [  1.0][ 0.0--    ]\n");
 89 |    fprintf(stderr, "    -jf f          : weight of GV for log F0                                 [  1.0][ 0.0--    ]\n");
 90 |    fprintf(stderr, "    -g  f          : volume (dB)                                             [  0.0][    --    ]\n");
 91 |    fprintf(stderr, "    -z  i          : audio buffer size (if i==0, turn off)                   [    0][   0--    ]\n");
 92 |    fprintf(stderr, "  infile:\n");
 93 |    fprintf(stderr, "    label file\n");
 94 |    fprintf(stderr, "  note:\n");
 95 |    fprintf(stderr, "    generated spectrum, log F0, and low-pass filter coefficient\n");
 96 |    fprintf(stderr, "    sequences are saved in natural endian, binary (float) format.\n");
 97 |    fprintf(stderr, "\n");
 98 | 
 99 |    exit(0);
100 | }
101 | 
102 | int main(int argc, char **argv)
103 | {
104 |    int i;
105 |    double f;
106 | 
107 |    /* hts_engine API */
108 |    HTS_Engine engine;
109 | 
110 |    /* HTS voices */
111 |    size_t num_voices;
112 |    char **fn_voices;
113 | 
114 |    /* input label file name */
115 |    char *labfn = NULL;
116 | 
117 |    /* output file pointers */
118 |    FILE *durfp = NULL, *mgcfp = NULL, *lf0fp = NULL, *lpffp = NULL, *wavfp = NULL, *rawfp = NULL, *tracefp = NULL;
119 | 
120 |    /* interpolation weights */
121 |    size_t num_interpolation_weights;
122 | 
123 |    /* output usage */
124 |    if (argc <= 1)
125 |       usage();
126 | 
127 |    /* initialize hts_engine API */
128 |    HTS_Engine_initialize(&engine);
129 | 
130 |    /* get HTS voice file names */
131 |    num_voices = 0;
132 |    fn_voices = (char **) malloc(argc * sizeof(char *));
133 |    for (i = 0; i < argc; i++) {
134 |       if (argv[i][0] == '-' && argv[i][1] == 'm')
135 |          fn_voices[num_voices++] = argv[++i];
136 |       if (argv[i][0] == '-' && argv[i][1] == 'h')
137 |          usage();
138 |    }
139 |    if (num_voices == 0) {
140 |       fprintf(stderr, "Error: HTS voice must be specified.\n");
141 |       free(fn_voices);
142 |       exit(1);
143 |    }
144 | 
145 |    /* load HTS voices */
146 |    if (HTS_Engine_load(&engine, fn_voices, num_voices) != TRUE) {
147 |       fprintf(stderr, "Error: HTS voices cannot be loaded.\n");
148 |       free(fn_voices);
149 |       HTS_Engine_clear(&engine);
150 |       exit(1);
151 |    }
152 |    free(fn_voices);
153 | 
154 |    /* get options */
155 |    while (--argc) {
156 |       if (**++argv == '-') {
157 |          switch (*(*argv + 1)) {
158 |          case 'v':
159 |             switch (*(*argv + 2)) {
160 |             case 'p':
161 |                HTS_Engine_set_phoneme_alignment_flag(&engine, TRUE);
162 |                break;
163 |             default:
164 |                fprintf(stderr, "Error: Invalid option '-v%c'.\n", *(*argv + 2));
165 |                HTS_Engine_clear(&engine);
166 |                exit(1);
167 |             }
168 |             break;
169 |          case 'o':
170 |             switch (*(*argv + 2)) {
171 |             case 'w':
172 |                wavfp = fopen(*++argv, "wb");
173 |                break;
174 |             case 'r':
175 |                rawfp = fopen(*++argv, "wb");
176 |                break;
177 |             case 'd':
178 |                durfp = fopen(*++argv, "wt");
179 |                break;
180 |             case 'm':
181 |                mgcfp = fopen(*++argv, "wb");
182 |                break;
183 |             case 'f':
184 |             case 'p':
185 |                lf0fp = fopen(*++argv, "wb");
186 |                break;
187 |             case 'l':
188 |                lpffp = fopen(*++argv, "wb");
189 |                break;
190 |             case 't':
191 |                tracefp = fopen(*++argv, "wt");
192 |                break;
193 |             default:
194 |                fprintf(stderr, "Error: Invalid option '-o%c'.\n", *(*argv + 2));
195 |                HTS_Engine_clear(&engine);
196 |                exit(1);
197 |             }
198 |             --argc;
199 |             break;
200 |          case 'h':
201 |             usage();
202 |             break;
203 |          case 'm':
204 |             argv++;             /* HTS voices were already loaded */
205 |             --argc;
206 |             break;
207 |          case 's':
208 |             HTS_Engine_set_sampling_frequency(&engine, (size_t) atoi(*++argv));
209 |             --argc;
210 |             break;
211 |          case 'p':
212 |             HTS_Engine_set_fperiod(&engine, (size_t) atoi(*++argv));
213 |             --argc;
214 |             break;
215 |          case 'a':
216 |             HTS_Engine_set_alpha(&engine, atof(*++argv));
217 |             --argc;
218 |             break;
219 |          case 'b':
220 |             HTS_Engine_set_beta(&engine, atof(*++argv));
221 |             --argc;
222 |             break;
223 |          case 'r':
224 |             HTS_Engine_set_speed(&engine, atof(*++argv));
225 |             --argc;
226 |             break;
227 |          case 'f':
228 |             switch (*(*argv + 2)) {
229 |             case 'm':
230 |                HTS_Engine_add_half_tone(&engine, atof(*++argv));
231 |                break;
232 |             default:
233 |                fprintf(stderr, "Error: Invalid option '-f%c'.\n", *(*argv + 2));
234 |                HTS_Engine_clear(&engine);
235 |                exit(1);
236 |             }
237 |             --argc;
238 |             break;
239 |          case 'u':
240 |             HTS_Engine_set_msd_threshold(&engine, 1, atof(*++argv));
241 |             --argc;
242 |             break;
243 |          case 'i':
244 |             num_interpolation_weights = atoi(*++argv);
245 |             argc--;
246 |             if (num_interpolation_weights != num_voices) {
247 |                HTS_Engine_clear(&engine);
248 |                exit(1);
249 |             }
250 |             for (i = 0; i < num_interpolation_weights; i++) {
251 |                f = atof(*++argv);
252 |                argc--;
253 |                HTS_Engine_set_duration_interpolation_weight(&engine, i, f);
254 |                HTS_Engine_set_parameter_interpolation_weight(&engine, i, 0, f);
255 |                HTS_Engine_set_parameter_interpolation_weight(&engine, i, 1, f);
256 |                HTS_Engine_set_gv_interpolation_weight(&engine, i, 0, f);
257 |                HTS_Engine_set_gv_interpolation_weight(&engine, i, 1, f);
258 |             }
259 |             break;
260 |          case 'j':
261 |             switch (*(*argv + 2)) {
262 |             case 'm':
263 |                HTS_Engine_set_gv_weight(&engine, 0, atof(*++argv));
264 |                break;
265 |             case 'f':
266 |             case 'p':
267 |                HTS_Engine_set_gv_weight(&engine, 1, atof(*++argv));
268 |                break;
269 |             default:
270 |                fprintf(stderr, "Error: Invalid option '-j%c'.\n", *(*argv + 2));
271 |                HTS_Engine_clear(&engine);
272 |                exit(1);
273 |             }
274 |             --argc;
275 |             break;
276 |          case 'g':
277 |             HTS_Engine_set_volume(&engine, atof(*++argv));
278 |             --argc;
279 |             break;
280 |          case 'z':
281 |             HTS_Engine_set_audio_buff_size(&engine, (size_t) atoi(*++argv));
282 |             --argc;
283 |             break;
284 |          default:
285 |             fprintf(stderr, "Error: Invalid option '-%c'.\n", *(*argv + 1));
286 |             HTS_Engine_clear(&engine);
287 |             exit(1);
288 |          }
289 |       } else {
290 |          labfn = *argv;
291 |       }
292 |    }
293 | 
294 |    /* synthesize */
295 |    if (HTS_Engine_synthesize_from_fn(&engine, labfn) != TRUE) {
296 |       fprintf(stderr, "Error: waveform cannot be synthesized.\n");
297 |       HTS_Engine_clear(&engine);
298 |       exit(1);
299 |    }
300 | 
301 |    /* output */
302 |    if (tracefp != NULL)
303 |       HTS_Engine_save_information(&engine, tracefp);
304 |    if (durfp != NULL)
305 |       HTS_Engine_save_label(&engine, durfp);
306 |    if (rawfp)
307 |       HTS_Engine_save_generated_speech(&engine, rawfp);
308 |    if (wavfp)
309 |       HTS_Engine_save_riff(&engine, wavfp);
310 |    if (mgcfp)
311 |       HTS_Engine_save_generated_parameter(&engine, 0, mgcfp);
312 |    if (lf0fp)
313 |       HTS_Engine_save_generated_parameter(&engine, 1, lf0fp);
314 |    if (lpffp)
315 |       HTS_Engine_save_generated_parameter(&engine, 2, lpffp);
316 | 
317 |    /* reset */
318 |    HTS_Engine_refresh(&engine);
319 | 
320 |    /* free memory */
321 |    HTS_Engine_clear(&engine);
322 | 
323 |    /* close files */
324 |    if (durfp != NULL)
325 |       fclose(durfp);
326 |    if (mgcfp != NULL)
327 |       fclose(mgcfp);
328 |    if (lf0fp != NULL)
329 |       fclose(lf0fp);
330 |    if (lpffp != NULL)
331 |       fclose(lpffp);
332 |    if (wavfp != NULL)
333 |       fclose(wavfp);
334 |    if (rawfp != NULL)
335 |       fclose(rawfp);
336 |    if (tracefp != NULL)
337 |       fclose(tracefp);
338 | 
339 |    return 0;
340 | }
341 | 
342 | HTS_ENGINE_C_END;
343 | 
344 | #endif                          /* !HTS_ENGINE_C */
345 | 


--------------------------------------------------------------------------------
/src/config/.cvsignore:
--------------------------------------------------------------------------------
1 | depcomp
2 | install-sh
3 | missing
4 | config.guess
5 | config.sub
6 | 


--------------------------------------------------------------------------------
/src/configure.ac:
--------------------------------------------------------------------------------
 1 | #                                               -*- Autoconf -*-
 2 | # Process this file with autoconf to produce a configure script.
 3 | 
 4 | AC_PREREQ(2.59)
 5 | AC_INIT(hts_engine_API, 1.09, hts-engine-users@lists.sourceforge.net, hts_engine_API)
 6 | AC_CONFIG_AUX_DIR([config])
 7 | AC_COPYRIGHT(Copyright 2001-2014 Nagoya Institute of Technology)
 8 | AC_COPYRIGHT(Copyright 2001-2008 Tokyo Institute of Technology)
 9 | AM_INIT_AUTOMAKE
10 | 
11 | # Checks for C compiler
12 | AC_PROG_CC
13 | AM_PROG_CC_C_O
14 | AC_PROG_INSTALL
15 | AC_PROG_RANLIB
16 | AN_MAKEVAR([AR], [AC_PROG_AR])
17 | AN_PROGRAM([ar], [AC_PROG_AR])
18 | AC_DEFUN([AC_PROG_AR], [AC_CHECK_TOOL(AR, ar, :)])
19 | AC_PROG_AR
20 | 
21 | 
22 | # Checks for libraries.
23 | AC_CHECK_LIB([m], [log])
24 | 
25 | 
26 | # Checks for header files.
27 | AC_HEADER_STDC
28 | AC_CHECK_HEADERS([stdlib.h string.h])
29 | 
30 | 
31 | # Checks for typedefs, structures, and compiler characteristics.
32 | AC_C_CONST
33 | AC_TYPE_SIZE_T
34 | 
35 | # Checks for library functions.
36 | AC_FUNC_VPRINTF
37 | AC_CHECK_FUNCS([sqrt strchr strrchr strstr])
38 | 
39 | 
40 | # Checks for embedded device compile
41 | AC_ARG_ENABLE(embedded, [  --enable-embedded   turn on compiling for embedded devices (default=no)],,enable_embedded=no)
42 | AC_MSG_CHECKING(whether to enable compiling for embedded devices)
43 | if test x$enable_embedded = xyes; then
44 | 	AC_MSG_RESULT(yes)
45 | 	AC_DEFINE(HTS_EMBEDDED)
46 | else
47 | 	AC_MSG_RESULT(no)
48 | fi
49 | 
50 | 
51 | # Checks for using festival
52 | AC_ARG_ENABLE(festival, [  --enable-festival   use memory allocation/free functions of speech tools (default=no)],,enable_festival=no)
53 | AC_MSG_CHECKING(whether to use memory allocation/free functions of speech tools)
54 | if test x$enable_festival = xyes; then
55 | 	AC_MSG_RESULT(yes)
56 | 	AC_DEFINE(FESTIVAL)
57 | else
58 | 	AC_MSG_RESULT(no)
59 | fi
60 | 
61 | 
62 | AC_CANONICAL_HOST
63 | AC_C_BIGENDIAN
64 | 
65 | 
66 | # Checks library for windows audio devices
67 | case "$host_os" in
68 |  *win32* | *wince* | *cygwin* | *mingw* )
69 | 	AC_HAVE_LIBRARY([winmm],,AC_MSG_ERROR(No winmm))
70 | 	;;
71 |  *)
72 | 	;;
73 | esac
74 | 
75 | 
76 | AC_CONFIG_FILES([Makefile bin/Makefile lib/Makefile])
77 | 
78 | AC_OUTPUT
79 | 
80 | 


--------------------------------------------------------------------------------
/src/hts_engine_API.pc.in:
--------------------------------------------------------------------------------
 1 | prefix=@CMAKE_INSTALL_PREFIX@
 2 | exec_prefix=@CMAKE_INSTALL_PREFIX@
 3 | libdir=${prefix}/lib
 4 | includedir=${prefix}/include
 5 | 
 6 | Name: @CMAKE_PROJECT_NAME@
 7 | Description: hts_engine API - a library to synthesize speech waveform from HMMs trained by the HMM-based speech synthesis system (HTS).
 8 | Version: @CMAKE_PROJECT_VERSION@
 9 | Cflags: -I${includedir}
10 | Libs: -L${libdir} -l@CMAKE_PROJECT_NAME@
11 | 


--------------------------------------------------------------------------------
/src/include/HTS_engine.h:
--------------------------------------------------------------------------------
  1 | /* ----------------------------------------------------------------- */
  2 | /*           The HMM-Based Speech Synthesis Engine "hts_engine API"  */
  3 | /*           developed by HTS Working Group                          */
  4 | /*           http://hts-engine.sourceforge.net/                      */
  5 | /* ----------------------------------------------------------------- */
  6 | /*                                                                   */
  7 | /*  Copyright (c) 2001-2014  Nagoya Institute of Technology          */
  8 | /*                           Department of Computer Science          */
  9 | /*                                                                   */
 10 | /*                2001-2008  Tokyo Institute of Technology           */
 11 | /*                           Interdisciplinary Graduate School of    */
 12 | /*                           Science and Engineering                 */
 13 | /*                                                                   */
 14 | /* All rights reserved.                                              */
 15 | /*                                                                   */
 16 | /* Redistribution and use in source and binary forms, with or        */
 17 | /* without modification, are permitted provided that the following   */
 18 | /* conditions are met:                                               */
 19 | /*                                                                   */
 20 | /* - Redistributions of source code must retain the above copyright  */
 21 | /*   notice, this list of conditions and the following disclaimer.   */
 22 | /* - Redistributions in binary form must reproduce the above         */
 23 | /*   copyright notice, this list of conditions and the following     */
 24 | /*   disclaimer in the documentation and/or other materials provided */
 25 | /*   with the distribution.                                          */
 26 | /* - Neither the name of the HTS working group nor the names of its  */
 27 | /*   contributors may be used to endorse or promote products derived */
 28 | /*   from this software without specific prior written permission.   */
 29 | /*                                                                   */
 30 | /* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND            */
 31 | /* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,       */
 32 | /* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF          */
 33 | /* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE          */
 34 | /* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS */
 35 | /* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,          */
 36 | /* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED   */
 37 | /* TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,     */
 38 | /* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON */
 39 | /* ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,   */
 40 | /* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY    */
 41 | /* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE           */
 42 | /* POSSIBILITY OF SUCH DAMAGE.                                       */
 43 | /* ----------------------------------------------------------------- */
 44 | 
 45 | #ifndef HTS_ENGINE_H
 46 | #define HTS_ENGINE_H
 47 | 
 48 | #ifdef __cplusplus
 49 | #define HTS_ENGINE_H_START extern "C" {
 50 | #define HTS_ENGINE_H_END   }
 51 | #else
 52 | #define HTS_ENGINE_H_START
 53 | #define HTS_ENGINE_H_END
 54 | #endif                          /* __CPLUSPLUS */
 55 | 
 56 | HTS_ENGINE_H_START;
 57 | 
 58 | #include <stdio.h>
 59 | 
 60 | #ifndef HTS_ENGINE_DLLEXPORT_H_
 61 | #define HTS_ENGINE_DLLEXPORT_H_
 62 | 
 63 | #ifndef DLLEXPORT
 64 | #  ifdef _WIN32
 65 | #      define DLLEXPORT __declspec(dllexport)
 66 | #    else
 67 | #      define DLLEXPORT
 68 | #  endif
 69 | #endif
 70 | #endif
 71 | 
 72 | /* common ---------------------------------------------------------- */
 73 | 
 74 | typedef char HTS_Boolean;
 75 | 
 76 | #ifndef TRUE
 77 | #define TRUE  1
 78 | #endif                          /* !TRUE */
 79 | 
 80 | #ifndef FALSE
 81 | #define FALSE 0
 82 | #endif                          /* !FALSE */
 83 | 
 84 | #ifndef HTS_NODATA
 85 | #define HTS_NODATA (-1.0e+10)
 86 | #endif                          /* HTS_NODATA */
 87 | 
 88 | /* copyright ------------------------------------------------------- */
 89 | 
 90 | #define HTS_COPYRIGHT "The HMM-Based Speech Synthesis Engine \"hts_engine API\"\nVersion 1.09 (http://hts-engine.sourceforge.net/)\nCopyright (C) 2001-2014 Nagoya Institute of Technology\n              2001-2008 Tokyo Institute of Technology\nAll rights reserved.\n"
 91 | 
 92 | /* audio ----------------------------------------------------------- */
 93 | 
 94 | /* HTS_Audio: audio output wrapper */
 95 | typedef struct _HTS_Audio {
 96 |    size_t sampling_frequency;   /* sampling frequency */
 97 |    size_t max_buff_size;        /* buffer size for audio output interface */
 98 |    short *buff;                 /* current buffer */
 99 |    size_t buff_size;            /* current buffer size */
100 |    void *audio_interface;       /* audio interface specified in compile step */
101 | } HTS_Audio;
102 | 
103 | /* model ----------------------------------------------------------- */
104 | 
105 | /* HTS_Window: window coefficients to calculate dynamic features. */
106 | typedef struct _HTS_Window {
107 |    size_t size;                 /* # of windows (static + deltas) */
108 |    int *l_width;                /* left width of windows */
109 |    int *r_width;                /* right width of windows */
110 |    double **coefficient;        /* window coefficient */
111 |    size_t max_width;            /* maximum width of windows */
112 | } HTS_Window;
113 | 
114 | /* HTS_Pattern: list of patterns in a question and a tree. */
115 | typedef struct _HTS_Pattern {
116 |    char *string;                /* pattern string */
117 |    struct _HTS_Pattern *next;   /* pointer to the next pattern */
118 | } HTS_Pattern;
119 | 
120 | /* HTS_Question: list of questions in a tree. */
121 | typedef struct _HTS_Question {
122 |    char *string;                /* name of this question */
123 |    HTS_Pattern *head;           /* pointer to the head of pattern list */
124 |    struct _HTS_Question *next;  /* pointer to the next question */
125 | } HTS_Question;
126 | 
127 | /* HTS_Node: list of tree nodes in a tree. */
128 | typedef struct _HTS_Node {
129 |    int index;                   /* index of this node */
130 |    size_t pdf;                  /* index of PDF for this node (leaf node only) */
131 |    struct _HTS_Node *yes;       /* pointer to its child node (yes) */
132 |    struct _HTS_Node *no;        /* pointer to its child node (no) */
133 |    struct _HTS_Node *next;      /* pointer to the next node */
134 |    HTS_Question *quest;         /* question applied at this node */
135 | } HTS_Node;
136 | 
137 | /* HTS_Tree: list of decision trees in a model. */
138 | typedef struct _HTS_Tree {
139 |    HTS_Pattern *head;           /* pointer to the head of pattern list for this tree */
140 |    struct _HTS_Tree *next;      /* pointer to next tree */
141 |    HTS_Node *root;              /* root node of this tree */
142 |    size_t state;                /* state index of this tree */
143 | } HTS_Tree;
144 | 
145 | /* HTS_Model: set of PDFs, decision trees and questions. */
146 | typedef struct _HTS_Model {
147 |    size_t vector_length;        /* vector length (static features only) */
148 |    size_t num_windows;          /* # of windows for delta */
149 |    HTS_Boolean is_msd;          /* flag for MSD */
150 |    size_t ntree;                /* # of trees */
151 |    size_t *npdf;                /* # of PDFs at each tree */
152 |    float ***pdf;                /* PDFs */
153 |    HTS_Tree *tree;              /* pointer to the list of trees */
154 |    HTS_Question *question;      /* pointer to the list of questions */
155 | } HTS_Model;
156 | 
157 | /* HTS_ModelSet: set of duration models, HMMs and GV models. */
158 | typedef struct _HTS_ModelSet {
159 |    char *hts_voice_version;     /* version of HTS voice format */
160 |    size_t sampling_frequency;   /* sampling frequency */
161 |    size_t frame_period;         /* frame period */
162 |    size_t num_voices;           /* # of HTS voices */
163 |    size_t num_states;           /* # of HMM states */
164 |    size_t num_streams;          /* # of streams */
165 |    char *stream_type;           /* stream type */
166 |    char *fullcontext_format;    /* fullcontext label format */
167 |    char *fullcontext_version;   /* version of fullcontext label */
168 |    HTS_Question *gv_off_context;        /* GV switch */
169 |    char **option;               /* options for each stream */
170 |    HTS_Model *duration;         /* duration PDFs and trees */
171 |    HTS_Window *window;          /* window coefficients for delta */
172 |    HTS_Model **stream;          /* parameter PDFs and trees */
173 |    HTS_Model **gv;              /* GV PDFs and trees */
174 | } HTS_ModelSet;
175 | 
176 | /* label ----------------------------------------------------------- */
177 | 
178 | /* HTS_LabelString: individual label string with time information */
179 | typedef struct _HTS_LabelString {
180 |    struct _HTS_LabelString *next;       /* pointer to next label string */
181 |    char *name;                  /* label string */
182 |    double start;                /* start frame specified in the given label */
183 |    double end;                  /* end frame specified in the given label */
184 | } HTS_LabelString;
185 | 
186 | /* HTS_Label: list of label strings */
187 | typedef struct _HTS_Label {
188 |    HTS_LabelString *head;       /* pointer to the head of label string */
189 |    size_t size;                 /* # of label strings */
190 | } HTS_Label;
191 | 
192 | /* sstream --------------------------------------------------------- */
193 | 
194 | /* HTS_SStream: individual state stream */
195 | typedef struct _HTS_SStream {
196 |    size_t vector_length;        /* vector length (static features only) */
197 |    double **mean;               /* mean vector sequence */
198 |    double **vari;               /* variance vector sequence */
199 |    double *msd;                 /* MSD parameter sequence */
200 |    size_t win_size;             /* # of windows (static + deltas) */
201 |    int *win_l_width;            /* left width of windows */
202 |    int *win_r_width;            /* right width of windows */
203 |    double **win_coefficient;    /* window cofficients */
204 |    size_t win_max_width;        /* maximum width of windows */
205 |    double *gv_mean;             /* mean vector of GV */
206 |    double *gv_vari;             /* variance vector of GV */
207 |    HTS_Boolean *gv_switch;      /* GV flag sequence */
208 | } HTS_SStream;
209 | 
210 | /* HTS_SStreamSet: set of state stream */
211 | typedef struct _HTS_SStreamSet {
212 |    HTS_SStream *sstream;        /* state streams */
213 |    size_t nstream;              /* # of streams */
214 |    size_t nstate;               /* # of states */
215 |    size_t *duration;            /* duration sequence */
216 |    size_t total_state;          /* total state */
217 |    size_t total_frame;          /* total frame */
218 | } HTS_SStreamSet;
219 | 
220 | /* pstream --------------------------------------------------------- */
221 | 
222 | /* HTS_SMatrices: matrices/vectors used in the speech parameter generation algorithm. */
223 | typedef struct _HTS_SMatrices {
224 |    double **mean;               /* mean vector sequence */
225 |    double **ivar;               /* inverse diag variance sequence */
226 |    double *g;                   /* vector used in the forward substitution */
227 |    double **wuw;                /* W' U^-1 W  */
228 |    double *wum;                 /* W' U^-1 mu */
229 | } HTS_SMatrices;
230 | 
231 | /* HTS_PStream: individual PDF stream. */
232 | typedef struct _HTS_PStream {
233 |    size_t vector_length;        /* vector length (static features only) */
234 |    size_t length;               /* stream length */
235 |    size_t width;                /* width of dynamic window */
236 |    double **par;                /* output parameter vector */
237 |    HTS_SMatrices sm;            /* matrices for parameter generation */
238 |    size_t win_size;             /* # of windows (static + deltas) */
239 |    int *win_l_width;            /* left width of windows */
240 |    int *win_r_width;            /* right width of windows */
241 |    double **win_coefficient;    /* window coefficients */
242 |    HTS_Boolean *msd_flag;       /* Boolean sequence for MSD */
243 |    double *gv_mean;             /* mean vector of GV */
244 |    double *gv_vari;             /* variance vector of GV */
245 |    HTS_Boolean *gv_switch;      /* GV flag sequence */
246 |    size_t gv_length;            /* frame length for GV calculation */
247 | } HTS_PStream;
248 | 
249 | /* HTS_PStreamSet: set of PDF streams. */
250 | typedef struct _HTS_PStreamSet {
251 |    HTS_PStream *pstream;        /* PDF streams */
252 |    size_t nstream;              /* # of PDF streams */
253 |    size_t total_frame;          /* total frame */
254 | } HTS_PStreamSet;
255 | 
256 | /* gstream --------------------------------------------------------- */
257 | 
258 | /* HTS_GStream: generated parameter stream. */
259 | typedef struct _HTS_GStream {
260 |    size_t vector_length;        /* vector length (static features only) */
261 |    double **par;                /* generated parameter */
262 | } HTS_GStream;
263 | 
264 | /* HTS_GStreamSet: set of generated parameter stream. */
265 | typedef struct _HTS_GStreamSet {
266 |    size_t total_nsample;        /* total sample */
267 |    size_t total_frame;          /* total frame */
268 |    size_t nstream;              /* # of streams */
269 |    HTS_GStream *gstream;        /* generated parameter streams */
270 |    double *gspeech;             /* generated speech */
271 | } HTS_GStreamSet;
272 | 
273 | /* engine ---------------------------------------------------------- */
274 | 
275 | /* HTS_Condition: synthesis condition */
276 | typedef struct _HTS_Condition {
277 |    /* global */
278 |    size_t sampling_frequency;   /* sampling frequency */
279 |    size_t fperiod;              /* frame period */
280 |    size_t audio_buff_size;      /* audio buffer size (for audio device) */
281 |    HTS_Boolean stop;            /* stop flag */
282 |    double volume;               /* volume */
283 |    double *msd_threshold;       /* MSD thresholds */
284 |    double *gv_weight;           /* GV weights */
285 | 
286 |    /* duration */
287 |    HTS_Boolean phoneme_alignment_flag;  /* flag for using phoneme alignment in label */
288 |    double speed;                /* speech speed */
289 | 
290 |    /* spectrum */
291 |    size_t stage;                /* if stage=0 then gamma=0 else gamma=-1/stage */
292 |    HTS_Boolean use_log_gain;    /* log gain flag (for LSP) */
293 |    double alpha;                /* all-pass constant */
294 |    double beta;                 /* postfiltering coefficient */
295 | 
296 |    /* log F0 */
297 |    double additional_half_tone; /* additional half tone */
298 | 
299 |    /* interpolation weights */
300 |    double *duration_iw;         /* weights for duration interpolation */
301 |    double **parameter_iw;       /* weights for parameter interpolation */
302 |    double **gv_iw;              /* weights for GV interpolation */
303 | } HTS_Condition;
304 | 
305 | /* HTS_Engine: Engine itself. */
306 | typedef struct _HTS_Engine {
307 |    HTS_Condition condition;     /* synthesis condition */
308 |    HTS_Audio audio;             /* audio output */
309 |    HTS_ModelSet ms;             /* set of duration models, HMMs and GV models */
310 |    HTS_Label label;             /* label */
311 |    HTS_SStreamSet sss;          /* set of state streams */
312 |    HTS_PStreamSet pss;          /* set of PDF streams */
313 |    HTS_GStreamSet gss;          /* set of generated parameter streams */
314 | } HTS_Engine;
315 | 
316 | /* engine method --------------------------------------------------- */
317 | 
318 | /* HTS_Engine_initialize: initialize engine */
319 | DLLEXPORT void HTS_Engine_initialize(HTS_Engine * engine);
320 | 
321 | /* HTS_Engine_load: load HTS voices */
322 | DLLEXPORT HTS_Boolean HTS_Engine_load(HTS_Engine * engine, char **voices, size_t num_voices);
323 | 
324 | /* HTS_Engine_set_sampling_frequency: set sampling fraquency */
325 | DLLEXPORT void HTS_Engine_set_sampling_frequency(HTS_Engine * engine, size_t i);
326 | 
327 | /* HTS_Engine_get_sampling_frequency: get sampling frequency */
328 | DLLEXPORT size_t HTS_Engine_get_sampling_frequency(HTS_Engine * engine);
329 | 
330 | /* HTS_Engine_set_fperiod: set frame period */
331 | DLLEXPORT void HTS_Engine_set_fperiod(HTS_Engine * engine, size_t i);
332 | 
333 | /* HTS_Engine_get_fperiod: get frame period */
334 | DLLEXPORT size_t HTS_Engine_get_fperiod(HTS_Engine * engine);
335 | 
336 | /* HTS_Engine_set_audio_buff_size: set audio buffer size */
337 | DLLEXPORT void HTS_Engine_set_audio_buff_size(HTS_Engine * engine, size_t i);
338 | 
339 | /* HTS_Engine_get_audio_buff_size: get audio buffer size */
340 | DLLEXPORT size_t HTS_Engine_get_audio_buff_size(HTS_Engine * engine);
341 | 
342 | /* HTS_Engine_set_stop_flag: set stop flag */
343 | DLLEXPORT void HTS_Engine_set_stop_flag(HTS_Engine * engine, HTS_Boolean b);
344 | 
345 | /* HTS_Engine_get_stop_flag: get stop flag */
346 | DLLEXPORT HTS_Boolean HTS_Engine_get_stop_flag(HTS_Engine * engine);
347 | 
348 | /* HTS_Engine_set_volume: set volume in db */
349 | DLLEXPORT void HTS_Engine_set_volume(HTS_Engine * engine, double f);
350 | 
351 | /* HTS_Engine_get_volume: get volume in db */
352 | DLLEXPORT double HTS_Engine_get_volume(HTS_Engine * engine);
353 | 
354 | /* HTS_Egnine_set_msd_threshold: set MSD threshold */
355 | DLLEXPORT void HTS_Engine_set_msd_threshold(HTS_Engine * engine, size_t stream_index, double f);
356 | 
357 | /* HTS_Engine_get_msd_threshold: get MSD threshold */
358 | DLLEXPORT double HTS_Engine_get_msd_threshold(HTS_Engine * engine, size_t stream_index);
359 | 
360 | /* HTS_Engine_set_gv_weight: set GV weight */
361 | DLLEXPORT void HTS_Engine_set_gv_weight(HTS_Engine * engine, size_t stream_index, double f);
362 | 
363 | /* HTS_Engine_get_gv_weight: get GV weight */
364 | DLLEXPORT double HTS_Engine_get_gv_weight(HTS_Engine * engine, size_t stream_index);
365 | 
366 | /* HTS_Engine_set_speed: set speech speed */
367 | DLLEXPORT void HTS_Engine_set_speed(HTS_Engine * engine, double f);
368 | 
369 | /* HTS_Engine_set_phoneme_alignment_flag: set flag for using phoneme alignment in label */
370 | DLLEXPORT void HTS_Engine_set_phoneme_alignment_flag(HTS_Engine * engine, HTS_Boolean b);
371 | 
372 | /* HTS_Engine_set_alpha: set alpha */
373 | DLLEXPORT void HTS_Engine_set_alpha(HTS_Engine * engine, double f);
374 | 
375 | /* HTS_Engine_get_alpha: get alpha */
376 | DLLEXPORT double HTS_Engine_get_alpha(HTS_Engine * engine);
377 | 
378 | /* HTS_Engine_set_beta: set beta */
379 | DLLEXPORT void HTS_Engine_set_beta(HTS_Engine * engine, double f);
380 | 
381 | /* HTS_Engine_get_beta: get beta */
382 | DLLEXPORT double HTS_Engine_get_beta(HTS_Engine * engine);
383 | 
384 | /* HTS_Engine_add_half_tone: add half tone */
385 | DLLEXPORT void HTS_Engine_add_half_tone(HTS_Engine * engine, double f);
386 | 
387 | /* HTS_Engine_set_duration_interpolation_weight: set interpolation weight for duration */
388 | DLLEXPORT void HTS_Engine_set_duration_interpolation_weight(HTS_Engine * engine, size_t voice_index, double f);
389 | 
390 | /* HTS_Engine_get_duration_interpolation_weight: get interpolation weight for duration */
391 | DLLEXPORT double HTS_Engine_get_duration_interpolation_weight(HTS_Engine * engine, size_t voice_index);
392 | 
393 | /* HTS_Engine_set_parameter_interpolation_weight: set interpolation weight for parameter */
394 | DLLEXPORT void HTS_Engine_set_parameter_interpolation_weight(HTS_Engine * engine, size_t voice_index, size_t stream_index, double f);
395 | 
396 | /* HTS_Engine_get_parameter_interpolation_weight: get interpolation weight for parameter */
397 | DLLEXPORT double HTS_Engine_get_parameter_interpolation_weight(HTS_Engine * engine, size_t voice_index, size_t stream_index);
398 | 
399 | /* HTS_Engine_set_gv_interpolation_weight: set interpolation weight for GV */
400 | DLLEXPORT void HTS_Engine_set_gv_interpolation_weight(HTS_Engine * engine, size_t voice_index, size_t stream_index, double f);
401 | 
402 | /* HTS_Engine_get_gv_interpolation_weight: get interpolation weight for GV */
403 | DLLEXPORT double HTS_Engine_get_gv_interpolation_weight(HTS_Engine * engine, size_t voice_index, size_t stream_index);
404 | 
405 | /* HTS_Engine_get_total_state: get total number of state */
406 | DLLEXPORT size_t HTS_Engine_get_total_state(HTS_Engine * engine);
407 | 
408 | /* HTS_Engine_set_state_mean: set mean value of state */
409 | DLLEXPORT void HTS_Engine_set_state_mean(HTS_Engine * engine, size_t stream_index, size_t state_index, size_t vector_index, double f);
410 | 
411 | /* HTS_Engine_get_state_mean: get mean value of state */
412 | DLLEXPORT double HTS_Engine_get_state_mean(HTS_Engine * engine, size_t stream_index, size_t state_index, size_t vector_index);
413 | 
414 | /* HTS_Engine_get_state_duration: get state duration */
415 | DLLEXPORT size_t HTS_Engine_get_state_duration(HTS_Engine * engine, size_t state_index);
416 | 
417 | /* HTS_Engine_get_nvoices: get number of voices */
418 | DLLEXPORT size_t HTS_Engine_get_nvoices(HTS_Engine * engine);
419 | 
420 | /* HTS_Engine_get_nstream: get number of stream */
421 | DLLEXPORT size_t HTS_Engine_get_nstream(HTS_Engine * engine);
422 | 
423 | /* HTS_Engine_get_nstate: get number of state */
424 | DLLEXPORT size_t HTS_Engine_get_nstate(HTS_Engine * engine);
425 | 
426 | /* HTS_Engine_get_fullcontext_label_format: get full context label format */
427 | DLLEXPORT const char *HTS_Engine_get_fullcontext_label_format(HTS_Engine * engine);
428 | 
429 | /* HTS_Engine_get_fullcontext_label_version: get full context label version */
430 | DLLEXPORT const char *HTS_Engine_get_fullcontext_label_version(HTS_Engine * engine);
431 | 
432 | /* HTS_Engine_get_total_frame: get total number of frame */
433 | DLLEXPORT size_t HTS_Engine_get_total_frame(HTS_Engine * engine);
434 | 
435 | /* HTS_Engine_get_nsamples: get number of samples */
436 | DLLEXPORT size_t HTS_Engine_get_nsamples(HTS_Engine * engine);
437 | 
438 | /* HTS_Engine_get_generated_parameter: output generated parameter */
439 | DLLEXPORT double HTS_Engine_get_generated_parameter(HTS_Engine * engine, size_t stream_index, size_t frame_index, size_t vector_index);
440 | 
441 | /* HTS_Engine_get_generated_speech: output generated speech */
442 | DLLEXPORT double HTS_Engine_get_generated_speech(HTS_Engine * engine, size_t index);
443 | 
444 | /* HTS_Engine_synthesize_from_fn: synthesize speech from file name */
445 | DLLEXPORT HTS_Boolean HTS_Engine_synthesize_from_fn(HTS_Engine * engine, const char *fn);
446 | 
447 | /* HTS_Engine_synthesize_from_strings: synthesize speech from string list */
448 | DLLEXPORT HTS_Boolean HTS_Engine_synthesize_from_strings(HTS_Engine * engine, char **lines, size_t num_lines);
449 | 
450 | /* HTS_Engine_generate_state_sequence_from_fn: generate state sequence from file name (1st synthesis step) */
451 | DLLEXPORT HTS_Boolean HTS_Engine_generate_state_sequence_from_fn(HTS_Engine * engine, const char *fn);
452 | 
453 | /* HTS_Engine_generate_state_sequence_from_strings: generate state sequence from string list (1st synthesis step) */
454 | DLLEXPORT HTS_Boolean HTS_Engine_generate_state_sequence_from_strings(HTS_Engine * engine, char **lines, size_t num_lines);
455 | 
456 | /* HTS_Engine_generate_parameter_sequence: generate parameter sequence (2nd synthesis step) */
457 | DLLEXPORT HTS_Boolean HTS_Engine_generate_parameter_sequence(HTS_Engine * engine);
458 | 
459 | /* HTS_Engine_generate_sample_sequence: generate sample sequence (3rd synthesis step) */
460 | DLLEXPORT HTS_Boolean HTS_Engine_generate_sample_sequence(HTS_Engine * engine);
461 | 
462 | /* HTS_Engine_save_information: save trace information */
463 | DLLEXPORT void HTS_Engine_save_information(HTS_Engine * engine, FILE * fp);
464 | 
465 | /* HTS_Engine_save_label: save label with time */
466 | DLLEXPORT void HTS_Engine_save_label(HTS_Engine * engine, FILE * fp);
467 | 
468 | /* HTS_Engine_save_generated_parameter: save generated parameter */
469 | DLLEXPORT void HTS_Engine_save_generated_parameter(HTS_Engine * engine, size_t stream_index, FILE * fp);
470 | 
471 | /* HTS_Engine_save_generated_speech: save generated speech */
472 | DLLEXPORT void HTS_Engine_save_generated_speech(HTS_Engine * engine, FILE * fp);
473 | 
474 | /* HTS_Engine_save_riff: save RIFF format file */
475 | DLLEXPORT void HTS_Engine_save_riff(HTS_Engine * engine, FILE * fp);
476 | 
477 | /* HTS_Engine_refresh: free memory per one time synthesis */
478 | DLLEXPORT void HTS_Engine_refresh(HTS_Engine * engine);
479 | 
480 | /* HTS_Engine_clear: free engine */
481 | DLLEXPORT void HTS_Engine_clear(HTS_Engine * engine);
482 | 
483 | HTS_ENGINE_H_END;
484 | 
485 | #endif                          /* !HTS_ENGINE_H */
486 | 


--------------------------------------------------------------------------------
/src/lib/.cvsignore:
--------------------------------------------------------------------------------
1 | Makefile
2 | Makefile.in
3 | libHTSEngine.a
4 | .deps
5 | 


--------------------------------------------------------------------------------
/src/lib/HTS_audio.c:
--------------------------------------------------------------------------------
  1 | /* ----------------------------------------------------------------- */
  2 | /*           The HMM-Based Speech Synthesis Engine "hts_engine API"  */
  3 | /*           developed by HTS Working Group                          */
  4 | /*           http://hts-engine.sourceforge.net/                      */
  5 | /* ----------------------------------------------------------------- */
  6 | /*                                                                   */
  7 | /*  Copyright (c) 2001-2014  Nagoya Institute of Technology          */
  8 | /*                           Department of Computer Science          */
  9 | /*                                                                   */
 10 | /*                2001-2008  Tokyo Institute of Technology           */
 11 | /*                           Interdisciplinary Graduate School of    */
 12 | /*                           Science and Engineering                 */
 13 | /*                                                                   */
 14 | /* All rights reserved.                                              */
 15 | /*                                                                   */
 16 | /* Redistribution and use in source and binary forms, with or        */
 17 | /* without modification, are permitted provided that the following   */
 18 | /* conditions are met:                                               */
 19 | /*                                                                   */
 20 | /* - Redistributions of source code must retain the above copyright  */
 21 | /*   notice, this list of conditions and the following disclaimer.   */
 22 | /* - Redistributions in binary form must reproduce the above         */
 23 | /*   copyright notice, this list of conditions and the following     */
 24 | /*   disclaimer in the documentation and/or other materials provided */
 25 | /*   with the distribution.                                          */
 26 | /* - Neither the name of the HTS working group nor the names of its  */
 27 | /*   contributors may be used to endorse or promote products derived */
 28 | /*   from this software without specific prior written permission.   */
 29 | /*                                                                   */
 30 | /* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND            */
 31 | /* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,       */
 32 | /* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF          */
 33 | /* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE          */
 34 | /* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS */
 35 | /* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,          */
 36 | /* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED   */
 37 | /* TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,     */
 38 | /* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON */
 39 | /* ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,   */
 40 | /* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY    */
 41 | /* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE           */
 42 | /* POSSIBILITY OF SUCH DAMAGE.                                       */
 43 | /* ----------------------------------------------------------------- */
 44 | 
 45 | #ifndef HTS_AUDIO_C
 46 | #define HTS_AUDIO_C
 47 | 
 48 | #ifdef __cplusplus
 49 | #define HTS_AUDIO_C_START extern "C" {
 50 | #define HTS_AUDIO_C_END   }
 51 | #else
 52 | #define HTS_AUDIO_C_START
 53 | #define HTS_AUDIO_C_END
 54 | #endif                          /* __CPLUSPLUS */
 55 | 
 56 | HTS_AUDIO_C_START;
 57 | 
 58 | #if !defined(AUDIO_PLAY_WIN32) && !defined(AUDIO_PLAY_PORTAUDIO) && !defined(AUDIO_PLAY_NONE)
 59 | #if defined(__WINCE__) || defined(_WINCE) || defined(_WINCE) || defined(__WINCE) || defined(__WIN32__) || defined(__WIN32) || defined(_WIN32) || defined(WIN32) || defined(__CYGWIN__) || defined(__MINGW32__)
 60 | #define AUDIO_PLAY_WIN32
 61 | #else
 62 | #define AUDIO_PLAY_NONE
 63 | #endif                          /* __WINCE__ || _WINCE || _WINCE || __WINCE || __WIN32__ || __WIN32 || _WIN32 || WIN32 || __CYGWIN__ || __MINGW32__ */
 64 | #endif                          /* !AUDIO_PLAY_WIN32 && !AUDIO_PLAY_PORTAUDIO && !AUDIO_PLAY_NONE */
 65 | 
 66 | /* hts_engine libralies */
 67 | #include "HTS_hidden.h"
 68 | 
 69 | #ifdef AUDIO_PLAY_WIN32
 70 | 
 71 | #include <windows.h>
 72 | #include <mmsystem.h>
 73 | #define AUDIO_WAIT_BUFF_MS 10   /* wait time (0.01 sec) */
 74 | #define AUDIO_CHANNEL      1    /* monaural */
 75 | #ifdef _M_X64
 76 | #define AUDIO_POINTER_TYPE DWORD_PTR
 77 | #else
 78 | #define AUDIO_POINTER_TYPE DWORD
 79 | #endif
 80 | 
 81 | /* HTS_Audio: audio interface for Windows */
 82 | typedef struct _HTS_AudioInterface {
 83 |    HWAVEOUT hwaveout;           /* audio device handle */
 84 |    WAVEFORMATEX waveformatex;   /* wave formatex */
 85 |    unsigned char which_buff;    /* double buffering flag */
 86 |    HTS_Boolean now_buff_1;      /* double buffering flag */
 87 |    HTS_Boolean now_buff_2;      /* double buffering flag */
 88 |    WAVEHDR buff_1;              /* buffer */
 89 |    WAVEHDR buff_2;              /* buffer */
 90 | } HTS_AudioInterface;
 91 | 
 92 | /* HTS_AudioInterface_callback_function: callback function from audio device */
 93 | static void CALLBACK HTS_AudioInterface_callback_function(HWAVEOUT hwaveout, UINT msg, AUDIO_POINTER_TYPE user_data, AUDIO_POINTER_TYPE param1, AUDIO_POINTER_TYPE param2)
 94 | {
 95 |    WAVEHDR *wavehdr = (WAVEHDR *) param1;
 96 |    HTS_AudioInterface *audio_interface = (HTS_AudioInterface *) user_data;
 97 | 
 98 |    if (msg == MM_WOM_DONE && wavehdr && (wavehdr->dwFlags & WHDR_DONE)) {
 99 |       if (audio_interface->now_buff_1 == TRUE && wavehdr == &(audio_interface->buff_1)) {
100 |          audio_interface->now_buff_1 = FALSE;
101 |       } else if (audio_interface->now_buff_2 == TRUE && wavehdr == &(audio_interface->buff_2)) {
102 |          audio_interface->now_buff_2 = FALSE;
103 |       }
104 |    }
105 | }
106 | 
107 | /* HTS_AudioInterface_write: send buffer to audio device */
108 | static HTS_Boolean HTS_AudioInterface_write(HTS_AudioInterface * audio_interface, const short *buff, size_t buff_size)
109 | {
110 |    MMRESULT result;
111 | 
112 |    if (audio_interface->which_buff == 1) {
113 |       while (audio_interface->now_buff_1 == TRUE)
114 |          Sleep(AUDIO_WAIT_BUFF_MS);
115 |       audio_interface->now_buff_1 = TRUE;
116 |       audio_interface->which_buff = 2;
117 |       memcpy(audio_interface->buff_1.lpData, buff, buff_size * sizeof(short));
118 |       audio_interface->buff_1.dwBufferLength = (DWORD) buff_size *sizeof(short);
119 |       result = waveOutWrite(audio_interface->hwaveout, &(audio_interface->buff_1), sizeof(WAVEHDR));
120 |    } else {
121 |       while (audio_interface->now_buff_2 == TRUE)
122 |          Sleep(AUDIO_WAIT_BUFF_MS);
123 |       audio_interface->now_buff_2 = TRUE;
124 |       audio_interface->which_buff = 1;
125 |       memcpy(audio_interface->buff_2.lpData, buff, buff_size * sizeof(short));
126 |       audio_interface->buff_2.dwBufferLength = (DWORD) buff_size *sizeof(short);
127 |       result = waveOutWrite(audio_interface->hwaveout, &(audio_interface->buff_2), sizeof(WAVEHDR));
128 |    }
129 | 
130 |    if (result != MMSYSERR_NOERROR)
131 |       HTS_error(0, "hts_engine: Cannot send datablocks to your output audio device to play waveform.\n");
132 | 
133 |    return (result == MMSYSERR_NOERROR) ? TRUE : FALSE;
134 | }
135 | 
136 | /* HTS_AudioInterface_close: close audio device */
137 | static void HTS_AudioInterface_close(HTS_AudioInterface * audio_interface)
138 | {
139 |    MMRESULT result;
140 | 
141 |    /* stop audio */
142 |    result = waveOutReset(audio_interface->hwaveout);
143 |    if (result != MMSYSERR_NOERROR)
144 |       HTS_error(0, "hts_engine: Cannot stop and reset your output audio device.\n");
145 |    /* unprepare */
146 |    result = waveOutUnprepareHeader(audio_interface->hwaveout, &(audio_interface->buff_1), sizeof(WAVEHDR));
147 |    if (result != MMSYSERR_NOERROR)
148 |       HTS_error(0, "hts_engine: Cannot cleanup the audio datablocks to play waveform.\n");
149 |    result = waveOutUnprepareHeader(audio_interface->hwaveout, &(audio_interface->buff_2), sizeof(WAVEHDR));
150 |    if (result != MMSYSERR_NOERROR)
151 |       HTS_error(0, "hts_engine: Cannot cleanup the audio datablocks to play waveform.\n");
152 |    /* close */
153 |    result = waveOutClose(audio_interface->hwaveout);
154 |    if (result != MMSYSERR_NOERROR)
155 |       HTS_error(0, "hts_engine: Failed to close your output audio device.\n");
156 |    if (audio_interface->buff_1.lpData != NULL)
157 |       HTS_free(audio_interface->buff_1.lpData);
158 |    if (audio_interface->buff_2.lpData != NULL)
159 |       HTS_free(audio_interface->buff_2.lpData);
160 | 
161 |    HTS_free(audio_interface);
162 | }
163 | 
164 | static HTS_AudioInterface *HTS_AudioInterface_open(size_t sampling_frequency, size_t max_buff_size)
165 | {
166 |    HTS_AudioInterface *audio_interface;
167 |    MMRESULT result;
168 | 
169 |    /* make audio interface */
170 |    audio_interface = (HTS_AudioInterface *) HTS_calloc(1, sizeof(HTS_AudioInterface));
171 | 
172 |    audio_interface->hwaveout = 0;
173 |    audio_interface->which_buff = 1;
174 |    audio_interface->now_buff_1 = FALSE;
175 |    audio_interface->now_buff_2 = FALSE;
176 | 
177 |    /* format */
178 |    audio_interface->waveformatex.wFormatTag = WAVE_FORMAT_PCM;
179 |    audio_interface->waveformatex.nChannels = AUDIO_CHANNEL;
180 |    audio_interface->waveformatex.nSamplesPerSec = (DWORD) sampling_frequency;
181 |    audio_interface->waveformatex.wBitsPerSample = sizeof(short) * 8;
182 |    audio_interface->waveformatex.nBlockAlign = AUDIO_CHANNEL * audio_interface->waveformatex.wBitsPerSample / 8;
183 |    audio_interface->waveformatex.nAvgBytesPerSec = (DWORD) sampling_frequency *audio_interface->waveformatex.nBlockAlign;
184 |    /* open */
185 |    result = waveOutOpen(&audio_interface->hwaveout, WAVE_MAPPER, &audio_interface->waveformatex, (AUDIO_POINTER_TYPE) HTS_AudioInterface_callback_function, (AUDIO_POINTER_TYPE) audio_interface, CALLBACK_FUNCTION);
186 |    if (result != MMSYSERR_NOERROR) {
187 |       HTS_error(0, "hts_engine: Failed to open your output audio_interface device to play waveform.\n");
188 |       HTS_free(audio_interface);
189 |       return NULL;
190 |    }
191 | 
192 |    /* prepare */
193 |    audio_interface->buff_1.lpData = (LPSTR) HTS_calloc(max_buff_size, sizeof(short));
194 |    audio_interface->buff_1.dwBufferLength = (DWORD) max_buff_size *sizeof(short);
195 |    audio_interface->buff_1.dwFlags = WHDR_BEGINLOOP | WHDR_ENDLOOP;
196 |    audio_interface->buff_1.dwLoops = 1;
197 |    audio_interface->buff_1.lpNext = 0;
198 |    audio_interface->buff_1.reserved = 0;
199 |    result = waveOutPrepareHeader(audio_interface->hwaveout, &(audio_interface->buff_1), sizeof(WAVEHDR));
200 |    if (result != MMSYSERR_NOERROR) {
201 |       HTS_error(0, "hts_engine: Cannot initialize audio_interface datablocks to play waveform.\n");
202 |       HTS_free(audio_interface->buff_1.lpData);
203 |       HTS_free(audio_interface);
204 |       return NULL;
205 |    }
206 |    audio_interface->buff_2.lpData = (LPSTR) HTS_calloc(max_buff_size, sizeof(short));
207 |    audio_interface->buff_2.dwBufferLength = (DWORD) max_buff_size *sizeof(short);
208 |    audio_interface->buff_2.dwFlags = WHDR_BEGINLOOP | WHDR_ENDLOOP;
209 |    audio_interface->buff_2.dwLoops = 1;
210 |    audio_interface->buff_2.lpNext = 0;
211 |    audio_interface->buff_2.reserved = 0;
212 |    result = waveOutPrepareHeader(audio_interface->hwaveout, &(audio_interface->buff_2), sizeof(WAVEHDR));
213 |    if (result != MMSYSERR_NOERROR) {
214 |       HTS_error(0, "hts_engine: Cannot initialize audio_interface datablocks to play waveform.\n");
215 |       HTS_free(audio_interface->buff_1.lpData);
216 |       HTS_free(audio_interface->buff_2.lpData);
217 |       HTS_free(audio_interface);
218 |       return NULL;
219 |    }
220 | 
221 |    return audio_interface;
222 | }
223 | 
224 | /* HTS_Audio_initialize: initialize audio */
225 | void HTS_Audio_initialize(HTS_Audio * audio)
226 | {
227 |    if (audio == NULL)
228 |       return;
229 | 
230 |    audio->sampling_frequency = 0;
231 |    audio->max_buff_size = 0;
232 |    audio->buff = NULL;
233 |    audio->buff_size = 0;
234 |    audio->audio_interface = NULL;
235 | }
236 | 
237 | /* HTS_Audio_set_parameter: set parameters for audio */
238 | void HTS_Audio_set_parameter(HTS_Audio * audio, size_t sampling_frequency, size_t max_buff_size)
239 | {
240 |    if (audio == NULL)
241 |       return;
242 | 
243 |    if (audio->sampling_frequency == sampling_frequency && audio->max_buff_size == max_buff_size)
244 |       return;
245 | 
246 |    HTS_Audio_clear(audio);
247 | 
248 |    if (sampling_frequency == 0 || max_buff_size == 0)
249 |       return;
250 | 
251 |    audio->audio_interface = HTS_AudioInterface_open(sampling_frequency, max_buff_size);
252 |    if (audio->audio_interface == NULL)
253 |       return;
254 | 
255 |    audio->sampling_frequency = sampling_frequency;
256 |    audio->max_buff_size = max_buff_size;
257 |    audio->buff = (short *) HTS_calloc(max_buff_size, sizeof(short));
258 |    audio->buff_size = 0;
259 | }
260 | 
261 | /* HTS_Audio_write: send data to audio */
262 | void HTS_Audio_write(HTS_Audio * audio, short data)
263 | {
264 |    if (audio == NULL || audio->audio_interface == NULL)
265 |       return;
266 | 
267 |    audio->buff[audio->buff_size++] = data;
268 | 
269 |    if (audio->buff_size >= audio->max_buff_size) {
270 |       if (HTS_AudioInterface_write((HTS_AudioInterface *) audio->audio_interface, audio->buff, audio->buff_size) != TRUE) {
271 |          HTS_Audio_clear(audio);
272 |          return;
273 |       }
274 |       audio->buff_size = 0;
275 |    }
276 | }
277 | 
278 | /* HTS_Audio_flush: flush remain data */
279 | void HTS_Audio_flush(HTS_Audio * audio)
280 | {
281 |    HTS_AudioInterface *audio_interface;
282 | 
283 |    if (audio == NULL || audio->audio_interface == NULL)
284 |       return;
285 | 
286 |    audio_interface = (HTS_AudioInterface *) audio->audio_interface;
287 |    if (audio->buff_size > 0) {
288 |       if (HTS_AudioInterface_write(audio_interface, audio->buff, audio->buff_size) != TRUE) {
289 |          HTS_Audio_clear(audio);
290 |          return;
291 |       }
292 |       audio->buff_size = 0;
293 |    }
294 |    while (audio_interface->now_buff_1 == TRUE || audio_interface->now_buff_2 == TRUE)
295 |       Sleep(AUDIO_WAIT_BUFF_MS);
296 | }
297 | 
298 | /* HTS_Audio_clear: free audio */
299 | void HTS_Audio_clear(HTS_Audio * audio)
300 | {
301 |    HTS_AudioInterface *audio_interface;
302 | 
303 |    if (audio == NULL || audio->audio_interface == NULL)
304 |       return;
305 | 
306 |    audio_interface = (HTS_AudioInterface *) audio->audio_interface;
307 |    HTS_AudioInterface_close(audio_interface);
308 |    if (audio->buff != NULL)
309 |       free(audio->buff);
310 |    HTS_Audio_initialize(audio);
311 | }
312 | 
313 | #endif                          /* AUDIO_PLAY_WIN32 */
314 | 
315 | #ifdef AUDIO_PLAY_PORTAUDIO
316 | 
317 | #include "portaudio.h"
318 | 
319 | /* HTS_AudioInterface: audio output for PortAudio */
320 | typedef struct _HTS_AudioInterface {
321 |    PaStreamParameters parameters;       /* parameters for output stream */
322 |    PaStream *stream;            /* output stream */
323 | } HTS_AudioInterface;
324 | 
325 | /* HTS_AudioInterface_write: send data to audio device */
326 | static void HTS_AudioInterface_write(HTS_AudioInterface * audio_interface, const short *buff, size_t buff_size)
327 | {
328 |    PaError err;
329 | 
330 |    err = Pa_WriteStream(audio_interface->stream, buff, buff_size);
331 |    if (err != paNoError && err != paOutputUnderflowed)
332 |       HTS_error(0, "hts_engine: Cannot send datablocks to your output audio device to play waveform.\n");
333 | }
334 | 
335 | /* HTS_AudioInterface_close: close audio device */
336 | static void HTS_AudioInterface_close(HTS_AudioInterface * audio_interface)
337 | {
338 |    PaError err;
339 | 
340 |    err = Pa_StopStream(audio_interface->stream);
341 |    if (err != paNoError)
342 |       HTS_error(0, "hts_engine: Cannot stop your output audio device.\n");
343 |    err = Pa_CloseStream(audio_interface->stream);
344 |    if (err != paNoError)
345 |       HTS_error(0, "hts_engine: Failed to close your output audio device.\n");
346 |    Pa_Terminate();
347 | 
348 |    HTS_free(audio_interface);
349 | }
350 | 
351 | static HTS_AudioInterface *HTS_AudioInterface_open(size_t sampling_frequency, size_t max_buff_size)
352 | {
353 |    HTS_AudioInterface *audio_interface;
354 |    PaError err;
355 | 
356 |    audio_interface = HTS_calloc(1, sizeof(HTS_AudioInterface));
357 |    audio_interface->stream = NULL;
358 | 
359 |    err = Pa_Initialize();
360 |    if (err != paNoError) {
361 |       HTS_error(0, "hts_engine: Failed to initialize your output audio device to play waveform.\n");
362 |       HTS_free(audio_interface);
363 |       return NULL;
364 |    }
365 | 
366 |    audio_interface->parameters.device = Pa_GetDefaultOutputDevice();
367 |    audio_interface->parameters.channelCount = 1;
368 |    audio_interface->parameters.sampleFormat = paInt16;
369 |    audio_interface->parameters.suggestedLatency = Pa_GetDeviceInfo(audio_interface->parameters.device)->defaultLowOutputLatency;
370 |    audio_interface->parameters.hostApiSpecificStreamInfo = NULL;
371 | 
372 |    err = Pa_OpenStream(&audio_interface->stream, NULL, &audio_interface->parameters, sampling_frequency, max_buff_size, paClipOff, NULL, NULL);
373 |    if (err != paNoError) {
374 |       HTS_error(0, "hts_engine: Failed to open your output audio device to play waveform.\n");
375 |       Pa_Terminate();
376 |       HTS_free(audio_interface);
377 |       return NULL;
378 |    }
379 | 
380 |    err = Pa_StartStream(audio_interface->stream);
381 |    if (err != paNoError) {
382 |       HTS_error(0, "hts_engine: Failed to start your output audio device to play waveform.\n");
383 |       Pa_CloseStream(audio_interface->stream);
384 |       Pa_Terminate();
385 |       HTS_free(audio_interface);
386 |       return NULL;
387 |    }
388 | 
389 |    return audio_interface;
390 | }
391 | 
392 | /* HTS_Audio_initialize: initialize audio */
393 | void HTS_Audio_initialize(HTS_Audio * audio)
394 | {
395 |    if (audio == NULL)
396 |       return;
397 | 
398 |    audio->sampling_frequency = 0;
399 |    audio->max_buff_size = 0;
400 |    audio->buff = NULL;
401 |    audio->buff_size = 0;
402 |    audio->audio_interface = NULL;
403 | }
404 | 
405 | /* HTS_Audio_set_parameter: set parameters for audio */
406 | void HTS_Audio_set_parameter(HTS_Audio * audio, size_t sampling_frequency, size_t max_buff_size)
407 | {
408 |    if (audio == NULL)
409 |       return;
410 | 
411 |    if (audio->sampling_frequency == sampling_frequency && audio->max_buff_size == max_buff_size)
412 |       return;
413 | 
414 |    HTS_Audio_clear(audio);
415 | 
416 |    if (sampling_frequency == 0 || max_buff_size == 0)
417 |       return;
418 | 
419 |    audio->audio_interface = HTS_AudioInterface_open(sampling_frequency, max_buff_size);
420 |    if (audio->audio_interface == NULL)
421 |       return;
422 | 
423 |    audio->sampling_frequency = sampling_frequency;
424 |    audio->max_buff_size = max_buff_size;
425 |    audio->buff = (short *) HTS_calloc(max_buff_size, sizeof(short));
426 |    audio->buff_size = 0;
427 | }
428 | 
429 | /* HTS_Audio_write: send data to audio device */
430 | void HTS_Audio_write(HTS_Audio * audio, short data)
431 | {
432 |    if (audio == NULL)
433 |       return;
434 | 
435 |    audio->buff[audio->buff_size++] = data;
436 | 
437 |    if (audio->buff_size >= audio->max_buff_size) {
438 |       if (audio->audio_interface != NULL)
439 |          HTS_AudioInterface_write((HTS_AudioInterface *) audio->audio_interface, audio->buff, audio->max_buff_size);
440 |       audio->buff_size = 0;
441 |    }
442 | }
443 | 
444 | /* HTS_Audio_flush: flush remain data */
445 | void HTS_Audio_flush(HTS_Audio * audio)
446 | {
447 |    HTS_AudioInterface *audio_interface;
448 | 
449 |    if (audio == NULL || audio->audio_interface == NULL)
450 |       return;
451 | 
452 |    audio_interface = (HTS_AudioInterface *) audio->audio_interface;
453 |    if (audio->buff_size > 0) {
454 |       HTS_AudioInterface_write(audio_interface, audio->buff, audio->buff_size);
455 |       audio->buff_size = 0;
456 |    }
457 | }
458 | 
459 | /* HTS_Audio_clear: free audio */
460 | void HTS_Audio_clear(HTS_Audio * audio)
461 | {
462 |    HTS_AudioInterface *audio_interface;
463 | 
464 |    if (audio == NULL || audio->audio_interface == NULL)
465 |       return;
466 |    audio_interface = (HTS_AudioInterface *) audio->audio_interface;
467 | 
468 |    HTS_Audio_flush(audio);
469 |    HTS_AudioInterface_close(audio_interface);
470 |    if (audio->buff != NULL)
471 |       HTS_free(audio->buff);
472 |    HTS_Audio_initialize(audio);
473 | }
474 | 
475 | #endif                          /* AUDIO_PLAY_PORTAUDIO */
476 | 
477 | #ifdef AUDIO_PLAY_NONE
478 | 
479 | /* HTS_Audio_initialize: initialize audio */
480 | void HTS_Audio_initialize(HTS_Audio * audio)
481 | {
482 | }
483 | 
484 | /* HTS_Audio_set_parameter: set parameters for audio */
485 | void HTS_Audio_set_parameter(HTS_Audio * audio, size_t sampling_frequeny, size_t max_buff_size)
486 | {
487 | }
488 | 
489 | /* HTS_Audio_write: send data to audio */
490 | void HTS_Audio_write(HTS_Audio * audio, short data)
491 | {
492 | }
493 | 
494 | /* HTS_Audio_flush: flush remain data */
495 | void HTS_Audio_flush(HTS_Audio * audio)
496 | {
497 | }
498 | 
499 | /* HTS_Audio_clear: free audio */
500 | void HTS_Audio_clear(HTS_Audio * audio)
501 | {
502 | }
503 | 
504 | #endif                          /* AUDIO_PLAY_NONE */
505 | 
506 | HTS_AUDIO_C_END;
507 | 
508 | #endif                          /* !HTS_AUDIO_C */
509 | 


--------------------------------------------------------------------------------
/src/lib/HTS_gstream.c:
--------------------------------------------------------------------------------
  1 | /* ----------------------------------------------------------------- */
  2 | /*           The HMM-Based Speech Synthesis Engine "hts_engine API"  */
  3 | /*           developed by HTS Working Group                          */
  4 | /*           http://hts-engine.sourceforge.net/                      */
  5 | /* ----------------------------------------------------------------- */
  6 | /*                                                                   */
  7 | /*  Copyright (c) 2001-2014  Nagoya Institute of Technology          */
  8 | /*                           Department of Computer Science          */
  9 | /*                                                                   */
 10 | /*                2001-2008  Tokyo Institute of Technology           */
 11 | /*                           Interdisciplinary Graduate School of    */
 12 | /*                           Science and Engineering                 */
 13 | /*                                                                   */
 14 | /* All rights reserved.                                              */
 15 | /*                                                                   */
 16 | /* Redistribution and use in source and binary forms, with or        */
 17 | /* without modification, are permitted provided that the following   */
 18 | /* conditions are met:                                               */
 19 | /*                                                                   */
 20 | /* - Redistributions of source code must retain the above copyright  */
 21 | /*   notice, this list of conditions and the following disclaimer.   */
 22 | /* - Redistributions in binary form must reproduce the above         */
 23 | /*   copyright notice, this list of conditions and the following     */
 24 | /*   disclaimer in the documentation and/or other materials provided */
 25 | /*   with the distribution.                                          */
 26 | /* - Neither the name of the HTS working group nor the names of its  */
 27 | /*   contributors may be used to endorse or promote products derived */
 28 | /*   from this software without specific prior written permission.   */
 29 | /*                                                                   */
 30 | /* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND            */
 31 | /* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,       */
 32 | /* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF          */
 33 | /* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE          */
 34 | /* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS */
 35 | /* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,          */
 36 | /* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED   */
 37 | /* TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,     */
 38 | /* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON */
 39 | /* ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,   */
 40 | /* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY    */
 41 | /* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE           */
 42 | /* POSSIBILITY OF SUCH DAMAGE.                                       */
 43 | /* ----------------------------------------------------------------- */
 44 | 
 45 | #ifndef HTS_GSTREAM_C
 46 | #define HTS_GSTREAM_C
 47 | 
 48 | #ifdef __cplusplus
 49 | #define HTS_GSTREAM_C_START extern "C" {
 50 | #define HTS_GSTREAM_C_END   }
 51 | #else
 52 | #define HTS_GSTREAM_C_START
 53 | #define HTS_GSTREAM_C_END
 54 | #endif                          /* __CPLUSPLUS */
 55 | 
 56 | HTS_GSTREAM_C_START;
 57 | 
 58 | /* hts_engine libraries */
 59 | #include "HTS_hidden.h"
 60 | 
 61 | /* HTS_GStreamSet_initialize: initialize generated parameter stream set */
 62 | void HTS_GStreamSet_initialize(HTS_GStreamSet * gss)
 63 | {
 64 |    gss->nstream = 0;
 65 |    gss->total_frame = 0;
 66 |    gss->total_nsample = 0;
 67 |    gss->gstream = NULL;
 68 |    gss->gspeech = NULL;
 69 | }
 70 | 
 71 | /* HTS_GStreamSet_create: generate speech */
 72 | HTS_Boolean HTS_GStreamSet_create(HTS_GStreamSet * gss, HTS_PStreamSet * pss, size_t stage, HTS_Boolean use_log_gain, size_t sampling_rate, size_t fperiod, double alpha, double beta, HTS_Boolean * stop, double volume, HTS_Audio * audio)
 73 | {
 74 |    size_t i, j, k;
 75 |    size_t msd_frame;
 76 |    HTS_Vocoder v;
 77 |    size_t nlpf = 0;
 78 |    double *lpf = NULL;
 79 | 
 80 |    /* check */
 81 |    if (gss->gstream || gss->gspeech) {
 82 |       HTS_error(1, "HTS_GStreamSet_create: HTS_GStreamSet is not initialized.\n");
 83 |       return FALSE;
 84 |    }
 85 | 
 86 |    /* initialize */
 87 |    gss->nstream = HTS_PStreamSet_get_nstream(pss);
 88 |    gss->total_frame = HTS_PStreamSet_get_total_frame(pss);
 89 |    gss->total_nsample = fperiod * gss->total_frame;
 90 |    gss->gstream = (HTS_GStream *) HTS_calloc(gss->nstream, sizeof(HTS_GStream));
 91 |    for (i = 0; i < gss->nstream; i++) {
 92 |       gss->gstream[i].vector_length = HTS_PStreamSet_get_vector_length(pss, i);
 93 |       gss->gstream[i].par = (double **) HTS_calloc(gss->total_frame, sizeof(double *));
 94 |       for (j = 0; j < gss->total_frame; j++)
 95 |          gss->gstream[i].par[j] = (double *) HTS_calloc(gss->gstream[i].vector_length, sizeof(double));
 96 |    }
 97 |    gss->gspeech = (double *) HTS_calloc(gss->total_nsample, sizeof(double));
 98 | 
 99 |    /* copy generated parameter */
100 |    for (i = 0; i < gss->nstream; i++) {
101 |       if (HTS_PStreamSet_is_msd(pss, i)) {      /* for MSD */
102 |          for (j = 0, msd_frame = 0; j < gss->total_frame; j++)
103 |             if (HTS_PStreamSet_get_msd_flag(pss, i, j)) {
104 |                for (k = 0; k < gss->gstream[i].vector_length; k++)
105 |                   gss->gstream[i].par[j][k] = HTS_PStreamSet_get_parameter(pss, i, msd_frame, k);
106 |                msd_frame++;
107 |             } else
108 |                for (k = 0; k < gss->gstream[i].vector_length; k++)
109 |                   gss->gstream[i].par[j][k] = HTS_NODATA;
110 |       } else {                  /* for non MSD */
111 |          for (j = 0; j < gss->total_frame; j++)
112 |             for (k = 0; k < gss->gstream[i].vector_length; k++)
113 |                gss->gstream[i].par[j][k] = HTS_PStreamSet_get_parameter(pss, i, j, k);
114 |       }
115 |    }
116 | 
117 |    /* check */
118 |    if (gss->nstream != 2 && gss->nstream != 3) {
119 |       HTS_error(1, "HTS_GStreamSet_create: The number of streams should be 2 or 3.\n");
120 |       HTS_GStreamSet_clear(gss);
121 |       return FALSE;
122 |    }
123 |    if (HTS_PStreamSet_get_vector_length(pss, 1) != 1) {
124 |       HTS_error(1, "HTS_GStreamSet_create: The size of lf0 static vector should be 1.\n");
125 |       HTS_GStreamSet_clear(gss);
126 |       return FALSE;
127 |    }
128 |    if (gss->nstream >= 3 && gss->gstream[2].vector_length % 2 == 0) {
129 |       HTS_error(1, "HTS_GStreamSet_create: The number of low-pass filter coefficient should be odd numbers.");
130 |       HTS_GStreamSet_clear(gss);
131 |       return FALSE;
132 |    }
133 | 
134 |    /* synthesize speech waveform */
135 |    HTS_Vocoder_initialize(&v, gss->gstream[0].vector_length - 1, stage, use_log_gain, sampling_rate, fperiod);
136 |    if (gss->nstream >= 3)
137 |       nlpf = gss->gstream[2].vector_length;
138 |    for (i = 0; i < gss->total_frame && (*stop) == FALSE; i++) {
139 |       j = i * fperiod;
140 |       if (gss->nstream >= 3)
141 |          lpf = &gss->gstream[2].par[i][0];
142 |       HTS_Vocoder_synthesize(&v, gss->gstream[0].vector_length - 1, gss->gstream[1].par[i][0], &gss->gstream[0].par[i][0], nlpf, lpf, alpha, beta, volume, &gss->gspeech[j], audio);
143 |    }
144 |    HTS_Vocoder_clear(&v);
145 |    if (audio)
146 |       HTS_Audio_flush(audio);
147 | 
148 |    return TRUE;
149 | }
150 | 
151 | /* HTS_GStreamSet_get_total_nsamples: get total number of sample */
152 | size_t HTS_GStreamSet_get_total_nsamples(HTS_GStreamSet * gss)
153 | {
154 |    return gss->total_nsample;
155 | }
156 | 
157 | /* HTS_GStreamSet_get_total_frame: get total number of frame */
158 | size_t HTS_GStreamSet_get_total_frame(HTS_GStreamSet * gss)
159 | {
160 |    return gss->total_frame;
161 | }
162 | 
163 | /* HTS_GStreamSet_get_vector_length: get features length */
164 | size_t HTS_GStreamSet_get_vector_length(HTS_GStreamSet * gss, size_t stream_index)
165 | {
166 |    return gss->gstream[stream_index].vector_length;
167 | }
168 | 
169 | /* HTS_GStreamSet_get_speech: get synthesized speech parameter */
170 | double HTS_GStreamSet_get_speech(HTS_GStreamSet * gss, size_t sample_index)
171 | {
172 |    return gss->gspeech[sample_index];
173 | }
174 | 
175 | /* HTS_GStreamSet_get_parameter: get generated parameter */
176 | double HTS_GStreamSet_get_parameter(HTS_GStreamSet * gss, size_t stream_index, size_t frame_index, size_t vector_index)
177 | {
178 |    return gss->gstream[stream_index].par[frame_index][vector_index];
179 | }
180 | 
181 | /* HTS_GStreamSet_clear: free generated parameter stream set */
182 | void HTS_GStreamSet_clear(HTS_GStreamSet * gss)
183 | {
184 |    size_t i, j;
185 | 
186 |    if (gss->gstream) {
187 |       for (i = 0; i < gss->nstream; i++) {
188 |          if (gss->gstream[i].par != NULL) {
189 |             for (j = 0; j < gss->total_frame; j++)
190 |                HTS_free(gss->gstream[i].par[j]);
191 |             HTS_free(gss->gstream[i].par);
192 |          }
193 |       }
194 |       HTS_free(gss->gstream);
195 |    }
196 |    if (gss->gspeech)
197 |       HTS_free(gss->gspeech);
198 |    HTS_GStreamSet_initialize(gss);
199 | }
200 | 
201 | HTS_GSTREAM_C_END;
202 | 
203 | #endif                          /* !HTS_GSTREAM_C */
204 | 


--------------------------------------------------------------------------------
/src/lib/HTS_hidden.h:
--------------------------------------------------------------------------------
  1 | /* ----------------------------------------------------------------- */
  2 | /*           The HMM-Based Speech Synthesis Engine "hts_engine API"  */
  3 | /*           developed by HTS Working Group                          */
  4 | /*           http://hts-engine.sourceforge.net/                      */
  5 | /* ----------------------------------------------------------------- */
  6 | /*                                                                   */
  7 | /*  Copyright (c) 2001-2014  Nagoya Institute of Technology          */
  8 | /*                           Department of Computer Science          */
  9 | /*                                                                   */
 10 | /*                2001-2008  Tokyo Institute of Technology           */
 11 | /*                           Interdisciplinary Graduate School of    */
 12 | /*                           Science and Engineering                 */
 13 | /*                                                                   */
 14 | /* All rights reserved.                                              */
 15 | /*                                                                   */
 16 | /* Redistribution and use in source and binary forms, with or        */
 17 | /* without modification, are permitted provided that the following   */
 18 | /* conditions are met:                                               */
 19 | /*                                                                   */
 20 | /* - Redistributions of source code must retain the above copyright  */
 21 | /*   notice, this list of conditions and the following disclaimer.   */
 22 | /* - Redistributions in binary form must reproduce the above         */
 23 | /*   copyright notice, this list of conditions and the following     */
 24 | /*   disclaimer in the documentation and/or other materials provided */
 25 | /*   with the distribution.                                          */
 26 | /* - Neither the name of the HTS working group nor the names of its  */
 27 | /*   contributors may be used to endorse or promote products derived */
 28 | /*   from this software without specific prior written permission.   */
 29 | /*                                                                   */
 30 | /* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND            */
 31 | /* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,       */
 32 | /* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF          */
 33 | /* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE          */
 34 | /* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS */
 35 | /* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,          */
 36 | /* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED   */
 37 | /* TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,     */
 38 | /* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON */
 39 | /* ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,   */
 40 | /* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY    */
 41 | /* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE           */
 42 | /* POSSIBILITY OF SUCH DAMAGE.                                       */
 43 | /* ----------------------------------------------------------------- */
 44 | 
 45 | #ifndef HTS_HIDDEN_H
 46 | #define HTS_HIDDEN_H
 47 | 
 48 | #ifdef __cplusplus
 49 | #define HTS_HIDDEN_H_START extern "C" {
 50 | #define HTS_HIDDEN_H_END   }
 51 | #else
 52 | #define HTS_HIDDEN_H_START
 53 | #define HTS_HIDDEN_H_END
 54 | #endif                          /* __CPLUSPLUS */
 55 | 
 56 | HTS_HIDDEN_H_START;
 57 | 
 58 | /* hts_engine libraries */
 59 | #include "HTS_engine.h"
 60 | 
 61 | /* common ---------------------------------------------------------- */
 62 | 
 63 | #define HTS_MAXBUFLEN 1024
 64 | 
 65 | #if !defined(WORDS_BIGENDIAN) && !defined(WORDS_LITTLEENDIAN)
 66 | #define WORDS_LITTLEENDIAN
 67 | #endif                          /* !WORDS_BIGENDIAN && !WORDS_LITTLEENDIAN */
 68 | #if defined(WORDS_BIGENDIAN) && defined(WORDS_LITTLEENDIAN)
 69 | #undef WORDS_BIGENDIAN
 70 | #endif                          /* WORDS_BIGENDIAN && WORDS_LITTLEENDIAN */
 71 | 
 72 | #define MAX_F0    20000.0
 73 | #define MIN_F0    20.0
 74 | #define MAX_LF0   9.9034875525361280454891979401956     /* log(20000.0) */
 75 | #define MIN_LF0   2.9957322735539909934352235761425     /* log(20.0) */
 76 | #define HALF_TONE 0.05776226504666210911810267678818    /* log(2.0) / 12.0 */
 77 | #define DB        0.11512925464970228420089957273422    /* log(10.0) / 20.0 */
 78 | 
 79 | /* misc ------------------------------------------------------------ */
 80 | 
 81 | typedef struct _HTS_File {
 82 |    unsigned char type;
 83 |    void *pointer;
 84 | } HTS_File;
 85 | 
 86 | /* HTS_fopen: wrapper for fopen */
 87 | HTS_File *HTS_fopen_from_fn(const char *name, const char *opt);
 88 | 
 89 | /* HTS_fopen_from_fp: wrapper for fopen */
 90 | HTS_File *HTS_fopen_from_fp(HTS_File * fp, size_t size);
 91 | 
 92 | /* HTS_fopen_from_data: wrapper for fopen */
 93 | HTS_File *HTS_fopen_from_data(void *data, size_t size);
 94 | 
 95 | /* HTS_fclose: wrapper for fclose */
 96 | void HTS_fclose(HTS_File * fp);
 97 | 
 98 | /* HTS_fgetc: wrapper for fgetc */
 99 | int HTS_fgetc(HTS_File * fp);
100 | 
101 | /* HTS_feof: wrapper for feof */
102 | int HTS_feof(HTS_File * fp);
103 | 
104 | /* HTS_fseek: wrapper for fseek */
105 | int HTS_fseek(HTS_File * fp, long offset, int origin);
106 | 
107 | /* HTS_ftell: wrapper for ftell */
108 | size_t HTS_ftell(HTS_File * fp);
109 | 
110 | /* HTS_fread_big_endian: fread with byteswap */
111 | size_t HTS_fread_big_endian(void *buf, size_t size, size_t n, HTS_File * fp);
112 | 
113 | /* HTS_fread_little_endian: fread with byteswap */
114 | size_t HTS_fread_little_endian(void *buf, size_t size, size_t n, HTS_File * fp);
115 | 
116 | /* HTS_fwrite_little_endian: fwrite with byteswap */
117 | size_t HTS_fwrite_little_endian(const void *buf, size_t size, size_t n, FILE * fp);
118 | 
119 | /* HTS_get_pattern_token: get pattern token (single/double quote can be used) */
120 | HTS_Boolean HTS_get_pattern_token(HTS_File * fp, char *buff);
121 | 
122 | /* HTS_get_token: get token from file pointer (separators are space,tab,line break) */
123 | HTS_Boolean HTS_get_token_from_fp(HTS_File * fp, char *buff);
124 | 
125 | /* HTS_get_token: get token from file pointer with specified separator */
126 | HTS_Boolean HTS_get_token_from_fp_with_separator(HTS_File * fp, char *buff, char separator);
127 | 
128 | /* HTS_get_token_from_string: get token from string (separator are space,tab,line break) */
129 | HTS_Boolean HTS_get_token_from_string(const char *string, size_t * index, char *buff);
130 | 
131 | /* HTS_get_token_from_string_with_separator: get token from string with specified separator */
132 | HTS_Boolean HTS_get_token_from_string_with_separator(const char *str, size_t * index, char *buff, char separator);
133 | 
134 | /* HTS_calloc: wrapper for calloc */
135 | void *HTS_calloc(const size_t num, const size_t size);
136 | 
137 | /* HTS_strdup: wrapper for strdup */
138 | char *HTS_strdup(const char *string);
139 | 
140 | /* HTS_calloc_matrix: allocate double matrix */
141 | double **HTS_alloc_matrix(size_t x, size_t y);
142 | 
143 | /* HTS_free_matrix: free double matrix */
144 | void HTS_free_matrix(double **p, size_t x);
145 | 
146 | /* HTS_Free: wrapper for free */
147 | void HTS_free(void *p);
148 | 
149 | /* HTS_error: output error message */
150 | void HTS_error(int error, const char *message, ...);
151 | 
152 | /* audio ----------------------------------------------------------- */
153 | 
154 | /* HTS_Audio_initialize: initialize audio */
155 | void HTS_Audio_initialize(HTS_Audio * audio);
156 | 
157 | /* HTS_Audio_set_parameter: set parameters for audio */
158 | void HTS_Audio_set_parameter(HTS_Audio * audio, size_t sampling_frequency, size_t max_buff_size);
159 | 
160 | /* HTS_Audio_write: send data to audio */
161 | void HTS_Audio_write(HTS_Audio * audio, short data);
162 | 
163 | /* HTS_Audio_flush: flush remain data */
164 | void HTS_Audio_flush(HTS_Audio * audio);
165 | 
166 | /* HTS_Audio_clear: free audio */
167 | void HTS_Audio_clear(HTS_Audio * audio);
168 | 
169 | /* model ----------------------------------------------------------- */
170 | 
171 | /* HTS_ModelSet_initialize: initialize model set */
172 | void HTS_ModelSet_initialize(HTS_ModelSet * ms);
173 | 
174 | /* HTS_ModelSet_load: load HTS voices */
175 | HTS_Boolean HTS_ModelSet_load(HTS_ModelSet * ms, char **voices, size_t num_voices);
176 | 
177 | /* HTS_ModelSet_get_sampling_frequency: get sampling frequency of HTS voices */
178 | size_t HTS_ModelSet_get_sampling_frequency(HTS_ModelSet * ms);
179 | 
180 | /* HTS_ModelSet_get_fperiod: get frame period of HTS voices */
181 | size_t HTS_ModelSet_get_fperiod(HTS_ModelSet * ms);
182 | 
183 | /* HTS_ModelSet_get_fperiod: get stream option */
184 | const char *HTS_ModelSet_get_option(HTS_ModelSet * ms, size_t stream_index);
185 | 
186 | /* HTS_ModelSet_get_gv_flag: get GV flag */
187 | HTS_Boolean HTS_ModelSet_get_gv_flag(HTS_ModelSet * ms, const char *string);
188 | 
189 | /* HTS_ModelSet_get_nstate: get number of state */
190 | size_t HTS_ModelSet_get_nstate(HTS_ModelSet * ms);
191 | 
192 | /* HTS_Engine_get_fullcontext_label_format: get full-context label format */
193 | const char *HTS_ModelSet_get_fullcontext_label_format(HTS_ModelSet * ms);
194 | 
195 | /* HTS_Engine_get_fullcontext_label_version: get full-context label version */
196 | const char *HTS_ModelSet_get_fullcontext_label_version(HTS_ModelSet * ms);
197 | 
198 | /* HTS_ModelSet_get_nstream: get number of stream */
199 | size_t HTS_ModelSet_get_nstream(HTS_ModelSet * ms);
200 | 
201 | /* HTS_ModelSet_get_nvoices: get number of HTS voices */
202 | size_t HTS_ModelSet_get_nvoices(HTS_ModelSet * ms);
203 | 
204 | /* HTS_ModelSet_get_vector_length: get vector length */
205 | size_t HTS_ModelSet_get_vector_length(HTS_ModelSet * ms, size_t stream_index);
206 | 
207 | /* HTS_ModelSet_is_msd: get MSD flag */
208 | HTS_Boolean HTS_ModelSet_is_msd(HTS_ModelSet * ms, size_t stream_index);
209 | 
210 | /* HTS_ModelSet_get_window_size: get dynamic window size */
211 | size_t HTS_ModelSet_get_window_size(HTS_ModelSet * ms, size_t stream_index);
212 | 
213 | /* HTS_ModelSet_get_window_left_width: get left width of dynamic window */
214 | int HTS_ModelSet_get_window_left_width(HTS_ModelSet * ms, size_t stream_index, size_t window_index);
215 | 
216 | /* HTS_ModelSet_get_window_right_width: get right width of dynamic window */
217 | int HTS_ModelSet_get_window_right_width(HTS_ModelSet * ms, size_t stream_index, size_t window_index);
218 | 
219 | /* HTS_ModelSet_get_window_coefficient: get coefficient of dynamic window */
220 | double HTS_ModelSet_get_window_coefficient(HTS_ModelSet * ms, size_t stream_index, size_t window_index, size_t coefficient_index);
221 | 
222 | /* HTS_ModelSet_get_window_max_width: get max width of dynamic window */
223 | size_t HTS_ModelSet_get_window_max_width(HTS_ModelSet * ms, size_t stream_index);
224 | 
225 | /* HTS_ModelSet_use_gv: get GV flag */
226 | HTS_Boolean HTS_ModelSet_use_gv(HTS_ModelSet * ms, size_t stream_index);
227 | 
228 | /* HTS_ModelSet_get_duration_index: get index of duration tree and PDF */
229 | void HTS_ModelSet_get_duration_index(HTS_ModelSet * ms, size_t voice_index, const char *string, size_t * tree_index, size_t * pdf_index);
230 | 
231 | /* HTS_ModelSet_get_duration: get duration using interpolation weight */
232 | void HTS_ModelSet_get_duration(HTS_ModelSet * ms, const char *string, const double *iw, double *mean, double *vari);
233 | 
234 | /* HTS_ModelSet_get_parameter_index: get index of parameter tree and PDF */
235 | void HTS_ModelSet_get_parameter_index(HTS_ModelSet * ms, size_t voice_index, size_t stream_index, size_t state_index, const char *string, size_t * tree_index, size_t * pdf_index);
236 | 
237 | /* HTS_ModelSet_get_parameter: get parameter using interpolation weight */
238 | void HTS_ModelSet_get_parameter(HTS_ModelSet * ms, size_t stream_index, size_t state_index, const char *string, const double *const *iw, double *mean, double *vari, double *msd);
239 | 
240 | void HTS_ModelSet_get_gv_index(HTS_ModelSet * ms, size_t voice_index, size_t stream_index, const char *string, size_t * tree_index, size_t * pdf_index);
241 | 
242 | /* HTS_ModelSet_get_gv: get GV using interpolation weight */
243 | void HTS_ModelSet_get_gv(HTS_ModelSet * ms, size_t stream_index, const char *string, const double *const *iw, double *mean, double *vari);
244 | 
245 | /* HTS_ModelSet_clear: free model set */
246 | void HTS_ModelSet_clear(HTS_ModelSet * ms);
247 | 
248 | /* label ----------------------------------------------------------- */
249 | 
250 | /* HTS_Label_initialize: initialize label */
251 | void HTS_Label_initialize(HTS_Label * label);
252 | 
253 | /* HTS_Label_load_from_fn: load label from file name */
254 | void HTS_Label_load_from_fn(HTS_Label * label, size_t sampling_rate, size_t fperiod, const char *fn);
255 | 
256 | /* HTS_Label_load_from_strings: load label list from string list */
257 | void HTS_Label_load_from_strings(HTS_Label * label, size_t sampling_rate, size_t fperiod, char **lines, size_t num_lines);
258 | 
259 | /* HTS_Label_get_size: get number of label string */
260 | size_t HTS_Label_get_size(HTS_Label * label);
261 | 
262 | /* HTS_Label_get_string: get label string */
263 | const char *HTS_Label_get_string(HTS_Label * label, size_t index);
264 | 
265 | /* HTS_Label_get_start_frame: get start frame */
266 | double HTS_Label_get_start_frame(HTS_Label * label, size_t index);
267 | 
268 | /* HTS_Label_get_end_frame: get end frame */
269 | double HTS_Label_get_end_frame(HTS_Label * label, size_t index);
270 | 
271 | /* HTS_Label_clear: free label */
272 | void HTS_Label_clear(HTS_Label * label);
273 | 
274 | /* sstream --------------------------------------------------------- */
275 | 
276 | /* HTS_SStreamSet_initialize: initialize state stream set */
277 | void HTS_SStreamSet_initialize(HTS_SStreamSet * sss);
278 | 
279 | /* HTS_SStreamSet_create: parse label and determine state duration */
280 | HTS_Boolean HTS_SStreamSet_create(HTS_SStreamSet * sss, HTS_ModelSet * ms, HTS_Label * label, HTS_Boolean phoneme_alignment_flag, double speed, double *duration_iw, double **parameter_iw, double **gv_iw);
281 | 
282 | /* HTS_SStreamSet_get_nstream: get number of stream */
283 | size_t HTS_SStreamSet_get_nstream(HTS_SStreamSet * sss);
284 | 
285 | /* HTS_SStreamSet_get_vector_length: get vector length */
286 | size_t HTS_SStreamSet_get_vector_length(HTS_SStreamSet * sss, size_t stream_index);
287 | 
288 | /* HTS_SStreamSet_is_msd: get MSD flag */
289 | HTS_Boolean HTS_SStreamSet_is_msd(HTS_SStreamSet * sss, size_t stream_index);
290 | 
291 | /* HTS_SStreamSet_get_total_state: get total number of state */
292 | size_t HTS_SStreamSet_get_total_state(HTS_SStreamSet * sss);
293 | 
294 | /* HTS_SStreamSet_get_total_frame: get total number of frame */
295 | size_t HTS_SStreamSet_get_total_frame(HTS_SStreamSet * sss);
296 | 
297 | /* HTS_SStreamSet_get_msd: get msd parameter */
298 | double HTS_SStreamSet_get_msd(HTS_SStreamSet * sss, size_t stream_index, size_t state_index);
299 | 
300 | /* HTS_SStreamSet_window_size: get dynamic window size */
301 | size_t HTS_SStreamSet_get_window_size(HTS_SStreamSet * sss, size_t stream_index);
302 | 
303 | /* HTS_SStreamSet_get_window_left_width: get left width of dynamic window */
304 | int HTS_SStreamSet_get_window_left_width(HTS_SStreamSet * sss, size_t stream_index, size_t window_index);
305 | 
306 | /* HTS_SStreamSet_get_window_right_width: get right width of dynamic window */
307 | int HTS_SStreamSet_get_window_right_width(HTS_SStreamSet * sss, size_t stream_index, size_t window_index);
308 | 
309 | /* HTS_SStreamSet_get_window_coefficient: get coefficient of dynamic window */
310 | double HTS_SStreamSet_get_window_coefficient(HTS_SStreamSet * sss, size_t stream_index, size_t window_index, int coefficient_index);
311 | 
312 | /* HTS_SStreamSet_get_window_max_width: get max width of dynamic window */
313 | size_t HTS_SStreamSet_get_window_max_width(HTS_SStreamSet * sss, size_t stream_index);
314 | 
315 | /* HTS_SStreamSet_use_gv: get GV flag */
316 | HTS_Boolean HTS_SStreamSet_use_gv(HTS_SStreamSet * sss, size_t stream_index);
317 | 
318 | /* HTS_SStreamSet_get_duration: get state duration */
319 | size_t HTS_SStreamSet_get_duration(HTS_SStreamSet * sss, size_t state_index);
320 | 
321 | /* HTS_SStreamSet_get_mean: get mean parameter */
322 | double HTS_SStreamSet_get_mean(HTS_SStreamSet * sss, size_t stream_index, size_t state_index, size_t vector_index);
323 | 
324 | /* HTS_SStreamSet_set_mean: set mean parameter */
325 | void HTS_SStreamSet_set_mean(HTS_SStreamSet * sss, size_t stream_index, size_t state_index, size_t vector_index, double f);
326 | 
327 | /* HTS_SStreamSet_get_vari: get variance parameter */
328 | double HTS_SStreamSet_get_vari(HTS_SStreamSet * sss, size_t stream_index, size_t state_index, size_t vector_index);
329 | 
330 | /* HTS_SStreamSet_set_vari: set variance parameter */
331 | void HTS_SStreamSet_set_vari(HTS_SStreamSet * sss, size_t stream_index, size_t state_index, size_t vector_index, double f);
332 | 
333 | /* HTS_SStreamSet_get_gv_mean: get GV mean parameter */
334 | double HTS_SStreamSet_get_gv_mean(HTS_SStreamSet * sss, size_t stream_index, size_t vector_index);
335 | 
336 | /* HTS_SStreamSet_get_gv_mean: get GV variance parameter */
337 | double HTS_SStreamSet_get_gv_vari(HTS_SStreamSet * sss, size_t stream_index, size_t vector_index);
338 | 
339 | /* HTS_SStreamSet_set_gv_switch: set GV switch */
340 | void HTS_SStreamSet_set_gv_switch(HTS_SStreamSet * sss, size_t stream_index, size_t state_index, HTS_Boolean i);
341 | 
342 | /* HTS_SStreamSet_get_gv_switch: get GV switch */
343 | HTS_Boolean HTS_SStreamSet_get_gv_switch(HTS_SStreamSet * sss, size_t stream_index, size_t state_index);
344 | 
345 | /* HTS_SStreamSet_clear: free state stream set */
346 | void HTS_SStreamSet_clear(HTS_SStreamSet * sss);
347 | 
348 | /* pstream --------------------------------------------------------- */
349 | 
350 | /* check variance in finv() */
351 | #define INFTY   ((double) 1.0e+38)
352 | #define INFTY2  ((double) 1.0e+19)
353 | #define INVINF  ((double) 1.0e-38)
354 | #define INVINF2 ((double) 1.0e-19)
355 | 
356 | /* GV */
357 | #define STEPINIT 0.1
358 | #define STEPDEC  0.5
359 | #define STEPINC  1.2
360 | #define W1       1.0
361 | #define W2       1.0
362 | #define GV_MAX_ITERATION 5
363 | 
364 | /* HTS_PStreamSet_initialize: initialize parameter stream set */
365 | void HTS_PStreamSet_initialize(HTS_PStreamSet * pss);
366 | 
367 | /* HTS_PStreamSet_create: parameter generation using GV weight */
368 | HTS_Boolean HTS_PStreamSet_create(HTS_PStreamSet * pss, HTS_SStreamSet * sss, double *msd_threshold, double *gv_weight);
369 | 
370 | /* HTS_PStreamSet_get_nstream: get number of stream */
371 | size_t HTS_PStreamSet_get_nstream(HTS_PStreamSet * pss);
372 | 
373 | /* HTS_PStreamSet_get_static_length: get features length */
374 | size_t HTS_PStreamSet_get_vector_length(HTS_PStreamSet * pss, size_t stream_index);
375 | 
376 | /* HTS_PStreamSet_get_total_frame: get total number of frame */
377 | size_t HTS_PStreamSet_get_total_frame(HTS_PStreamSet * pss);
378 | 
379 | /* HTS_PStreamSet_get_parameter: get parameter */
380 | double HTS_PStreamSet_get_parameter(HTS_PStreamSet * pss, size_t stream_index, size_t frame_index, size_t vector_index);
381 | 
382 | /* HTS_PStreamSet_get_parameter_vector: get parameter vector */
383 | double *HTS_PStreamSet_get_parameter_vector(HTS_PStreamSet * pss, size_t stream_index, size_t frame_index);
384 | 
385 | /* HTS_PStreamSet_get_msd_flag: get generated MSD flag per frame */
386 | HTS_Boolean HTS_PStreamSet_get_msd_flag(HTS_PStreamSet * pss, size_t stream_index, size_t frame_index);
387 | 
388 | /* HTS_PStreamSet_is_msd: get MSD flag */
389 | HTS_Boolean HTS_PStreamSet_is_msd(HTS_PStreamSet * pss, size_t stream_index);
390 | 
391 | /* HTS_PStreamSet_clear: free parameter stream set */
392 | void HTS_PStreamSet_clear(HTS_PStreamSet * pss);
393 | 
394 | /* gstream --------------------------------------------------------- */
395 | 
396 | /* HTS_GStreamSet_initialize: initialize generated parameter stream set */
397 | void HTS_GStreamSet_initialize(HTS_GStreamSet * gss);
398 | 
399 | /* HTS_GStreamSet_create: generate speech */
400 | HTS_Boolean HTS_GStreamSet_create(HTS_GStreamSet * gss, HTS_PStreamSet * pss, size_t stage, HTS_Boolean use_log_gain, size_t sampling_rate, size_t fperiod, double alpha, double beta, HTS_Boolean * stop, double volume, HTS_Audio * audio);
401 | 
402 | /* HTS_GStreamSet_get_total_nsamples: get total number of sample */
403 | size_t HTS_GStreamSet_get_total_nsamples(HTS_GStreamSet * gss);
404 | 
405 | /* HTS_GStreamSet_get_total_frame: get total number of frame */
406 | size_t HTS_GStreamSet_get_total_frame(HTS_GStreamSet * gss);
407 | 
408 | /* HTS_GStreamSet_get_static_length: get features length */
409 | size_t HTS_GStreamSet_get_vector_length(HTS_GStreamSet * gss, size_t stream_index);
410 | 
411 | /* HTS_GStreamSet_get_speech: get synthesized speech parameter */
412 | double HTS_GStreamSet_get_speech(HTS_GStreamSet * gss, size_t sample_index);
413 | 
414 | /* HTS_GStreamSet_get_parameter: get generated parameter */
415 | double HTS_GStreamSet_get_parameter(HTS_GStreamSet * gss, size_t stream_index, size_t frame_index, size_t vector_index);
416 | 
417 | /* HTS_GStreamSet_clear: free generated parameter stream set */
418 | void HTS_GStreamSet_clear(HTS_GStreamSet * gss);
419 | 
420 | /* vocoder --------------------------------------------------------- */
421 | 
422 | #ifndef LZERO
423 | #define LZERO (-1.0e+10)        /* ~log(0) */
424 | #endif                          /* !LZERO */
425 | 
426 | #ifndef ZERO
427 | #define ZERO  (1.0e-10)         /* ~(0) */
428 | #endif                          /* !ZERO */
429 | 
430 | #ifndef PI
431 | #define PI  3.14159265358979323846
432 | #endif                          /* !PI */
433 | 
434 | #ifndef PI2
435 | #define PI2 6.28318530717958647692
436 | #endif                          /* !PI2 */
437 | 
438 | #define RANDMAX 32767
439 | 
440 | #define SEED    1
441 | #define B0      0x00000001
442 | #define B28     0x10000000
443 | #define B31     0x80000000
444 | #define B31_    0x7fffffff
445 | #define Z       0x00000000
446 | 
447 | #ifdef HTS_EMBEDDED
448 | #define GAUSS     FALSE
449 | #define PADEORDER 4             /* pade order (for MLSA filter) */
450 | #define IRLENG    384           /* length of impulse response */
451 | #else
452 | #define GAUSS     TRUE
453 | #define PADEORDER 5
454 | #define IRLENG    576
455 | #endif                          /* HTS_EMBEDDED */
456 | 
457 | #define CHECK_LSP_STABILITY_MIN 0.25
458 | #define CHECK_LSP_STABILITY_NUM 4
459 | 
460 | /* for MGLSA filter */
461 | #define NORMFLG1 TRUE
462 | #define NORMFLG2 FALSE
463 | #define MULGFLG1 TRUE
464 | #define MULGFLG2 FALSE
465 | #define NGAIN    FALSE
466 | 
467 | /* HTS_Vocoder: structure for setting of vocoder */
468 | typedef struct _HTS_Vocoder {
469 |    HTS_Boolean is_first;
470 |    size_t stage;                /* Gamma=-1/stage: if stage=0 then Gamma=0 */
471 |    double gamma;                /* Gamma */
472 |    HTS_Boolean use_log_gain;    /* log gain flag (for LSP) */
473 |    size_t fprd;                 /* frame shift */
474 |    unsigned long next;          /* temporary variable for random generator */
475 |    HTS_Boolean gauss;           /* flag to use Gaussian noise */
476 |    double rate;                 /* sampling rate */
477 |    double pitch_of_curr_point;  /* used in excitation generation */
478 |    double pitch_counter;        /* used in excitation generation */
479 |    double pitch_inc_per_point;  /* used in excitation generation */
480 |    double *excite_ring_buff;    /* used in excitation generation */
481 |    size_t excite_buff_size;     /* used in excitation generation */
482 |    size_t excite_buff_index;    /* used in excitation generation */
483 |    unsigned char sw;            /* switch used in random generator */
484 |    int x;                       /* excitation signal */
485 |    double *freqt_buff;          /* used in freqt */
486 |    size_t freqt_size;           /* buffer size for freqt */
487 |    double *spectrum2en_buff;    /* used in spectrum2en */
488 |    size_t spectrum2en_size;     /* buffer size for spectrum2en */
489 |    double r1, r2, s;            /* used in random generator */
490 |    double *postfilter_buff;     /* used in postfiltering */
491 |    size_t postfilter_size;      /* buffer size for postfiltering */
492 |    double *c, *cc, *cinc, *d1;  /* used in the MLSA/MGLSA filter */
493 |    double *lsp2lpc_buff;        /* used in lsp2lpc */
494 |    size_t lsp2lpc_size;         /* buffer size of lsp2lpc */
495 |    double *gc2gc_buff;          /* used in gc2gc */
496 |    size_t gc2gc_size;           /* buffer size for gc2gc */
497 | } HTS_Vocoder;
498 | 
499 | /* HTS_Vocoder_initialize: initialize vocoder */
500 | void HTS_Vocoder_initialize(HTS_Vocoder * v, size_t m, size_t stage, HTS_Boolean use_log_gain, size_t rate, size_t fperiod);
501 | 
502 | /* HTS_Vocoder_synthesize: pulse/noise excitation and MLSA/MGLSA filster based waveform synthesis */
503 | void HTS_Vocoder_synthesize(HTS_Vocoder * v, size_t m, double lf0, double *spectrum, size_t nlpf, double *lpf, double alpha, double beta, double volume, double *rawdata, HTS_Audio * audio);
504 | 
505 | /* HTS_Vocoder_clear: clear vocoder */
506 | void HTS_Vocoder_clear(HTS_Vocoder * v);
507 | 
508 | HTS_HIDDEN_H_END;
509 | 
510 | #endif                          /* !HTS_HIDDEN_H */
511 | 


--------------------------------------------------------------------------------
/src/lib/HTS_label.c:
--------------------------------------------------------------------------------
  1 | /* ----------------------------------------------------------------- */
  2 | /*           The HMM-Based Speech Synthesis Engine "hts_engine API"  */
  3 | /*           developed by HTS Working Group                          */
  4 | /*           http://hts-engine.sourceforge.net/                      */
  5 | /* ----------------------------------------------------------------- */
  6 | /*                                                                   */
  7 | /*  Copyright (c) 2001-2014  Nagoya Institute of Technology          */
  8 | /*                           Department of Computer Science          */
  9 | /*                                                                   */
 10 | /*                2001-2008  Tokyo Institute of Technology           */
 11 | /*                           Interdisciplinary Graduate School of    */
 12 | /*                           Science and Engineering                 */
 13 | /*                                                                   */
 14 | /* All rights reserved.                                              */
 15 | /*                                                                   */
 16 | /* Redistribution and use in source and binary forms, with or        */
 17 | /* without modification, are permitted provided that the following   */
 18 | /* conditions are met:                                               */
 19 | /*                                                                   */
 20 | /* - Redistributions of source code must retain the above copyright  */
 21 | /*   notice, this list of conditions and the following disclaimer.   */
 22 | /* - Redistributions in binary form must reproduce the above         */
 23 | /*   copyright notice, this list of conditions and the following     */
 24 | /*   disclaimer in the documentation and/or other materials provided */
 25 | /*   with the distribution.                                          */
 26 | /* - Neither the name of the HTS working group nor the names of its  */
 27 | /*   contributors may be used to endorse or promote products derived */
 28 | /*   from this software without specific prior written permission.   */
 29 | /*                                                                   */
 30 | /* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND            */
 31 | /* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,       */
 32 | /* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF          */
 33 | /* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE          */
 34 | /* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS */
 35 | /* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,          */
 36 | /* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED   */
 37 | /* TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,     */
 38 | /* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON */
 39 | /* ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,   */
 40 | /* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY    */
 41 | /* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE           */
 42 | /* POSSIBILITY OF SUCH DAMAGE.                                       */
 43 | /* ----------------------------------------------------------------- */
 44 | 
 45 | #ifndef HTS_LABEL_C
 46 | #define HTS_LABEL_C
 47 | 
 48 | #ifdef __cplusplus
 49 | #define HTS_LABEL_C_START extern "C" {
 50 | #define HTS_LABEL_C_END   }
 51 | #else
 52 | #define HTS_LABEL_C_START
 53 | #define HTS_LABEL_C_END
 54 | #endif                          /* __CPLUSPLUS */
 55 | 
 56 | HTS_LABEL_C_START;
 57 | 
 58 | #include <stdlib.h>             /* for atof() */
 59 | #include <ctype.h>              /* for isgraph(),isdigit() */
 60 | 
 61 | /* hts_engine libraries */
 62 | #include "HTS_hidden.h"
 63 | 
 64 | static HTS_Boolean isdigit_string(char *str)
 65 | {
 66 |    int i;
 67 | 
 68 |    if (sscanf(str, "%d", &i) == 1)
 69 |       return TRUE;
 70 |    else
 71 |       return FALSE;
 72 | }
 73 | 
 74 | /* HTS_Label_initialize: initialize label */
 75 | void HTS_Label_initialize(HTS_Label * label)
 76 | {
 77 |    label->head = NULL;
 78 |    label->size = 0;
 79 | }
 80 | 
 81 | /* HTS_Label_check_time: check label */
 82 | static void HTS_Label_check_time(HTS_Label * label)
 83 | {
 84 |    HTS_LabelString *lstring = label->head;
 85 |    HTS_LabelString *next = NULL;
 86 | 
 87 |    if (lstring)
 88 |       lstring->start = 0.0;
 89 |    while (lstring) {
 90 |       next = lstring->next;
 91 |       if (!next)
 92 |          break;
 93 |       if (lstring->end < 0.0 && next->start >= 0.0)
 94 |          lstring->end = next->start;
 95 |       else if (lstring->end >= 0.0 && next->start < 0.0)
 96 |          next->start = lstring->end;
 97 |       if (lstring->start < 0.0)
 98 |          lstring->start = -1.0;
 99 |       if (lstring->end < 0.0)
100 |          lstring->end = -1.0;
101 |       lstring = next;
102 |    }
103 | }
104 | 
105 | /* HTS_Label_load: load label */
106 | static void HTS_Label_load(HTS_Label * label, size_t sampling_rate, size_t fperiod, HTS_File * fp)
107 | {
108 |    char buff[HTS_MAXBUFLEN];
109 |    HTS_LabelString *lstring = NULL;
110 |    double start, end;
111 |    const double rate = (double) sampling_rate / ((double) fperiod * 1e+7);
112 | 
113 |    if (label->head || label->size != 0) {
114 |       HTS_error(1, "HTS_Label_load_from_fp: label is not initialized.\n");
115 |       return;
116 |    }
117 | 
118 |    /* parse label file */
119 |    while (HTS_get_token_from_fp(fp, buff)) {
120 |       if (!isgraph((int) buff[0]))
121 |          break;
122 |       label->size++;
123 | 
124 |       if (lstring) {
125 |          lstring->next = (HTS_LabelString *) HTS_calloc(1, sizeof(HTS_LabelString));
126 |          lstring = lstring->next;
127 |       } else {                  /* first time */
128 |          lstring = (HTS_LabelString *) HTS_calloc(1, sizeof(HTS_LabelString));
129 |          label->head = lstring;
130 |       }
131 |       if (isdigit_string(buff)) {       /* has frame infomation */
132 |          start = atof(buff);
133 |          HTS_get_token_from_fp(fp, buff);
134 |          end = atof(buff);
135 |          HTS_get_token_from_fp(fp, buff);
136 |          lstring->start = rate * start;
137 |          lstring->end = rate * end;
138 |       } else {
139 |          lstring->start = -1.0;
140 |          lstring->end = -1.0;
141 |       }
142 |       lstring->next = NULL;
143 |       lstring->name = HTS_strdup(buff);
144 |    }
145 |    HTS_Label_check_time(label);
146 | }
147 | 
148 | /* HTS_Label_load_from_fn: load label from file name */
149 | void HTS_Label_load_from_fn(HTS_Label * label, size_t sampling_rate, size_t fperiod, const char *fn)
150 | {
151 |    HTS_File *fp = HTS_fopen_from_fn(fn, "r");
152 |    HTS_Label_load(label, sampling_rate, fperiod, fp);
153 |    HTS_fclose(fp);
154 | }
155 | 
156 | /* HTS_Label_load_from_strings: load label from strings */
157 | void HTS_Label_load_from_strings(HTS_Label * label, size_t sampling_rate, size_t fperiod, char **lines, size_t num_lines)
158 | {
159 |    char buff[HTS_MAXBUFLEN];
160 |    HTS_LabelString *lstring = NULL;
161 |    size_t i;
162 |    size_t data_index;
163 |    double start, end;
164 |    const double rate = (double) sampling_rate / ((double) fperiod * 1e+7);
165 | 
166 |    if (label->head || label->size != 0) {
167 |       HTS_error(1, "HTS_Label_load_from_fp: label list is not initialized.\n");
168 |       return;
169 |    }
170 |    /* copy label */
171 |    for (i = 0; i < num_lines; i++) {
172 |       if (!isgraph((int) lines[i][0]))
173 |          break;
174 |       label->size++;
175 | 
176 |       if (lstring) {
177 |          lstring->next = (HTS_LabelString *) HTS_calloc(1, sizeof(HTS_LabelString));
178 |          lstring = lstring->next;
179 |       } else {                  /* first time */
180 |          lstring = (HTS_LabelString *) HTS_calloc(1, sizeof(HTS_LabelString));
181 |          label->head = lstring;
182 |       }
183 |       data_index = 0;
184 |       if (isdigit_string(lines[i])) {   /* has frame infomation */
185 |          HTS_get_token_from_string(lines[i], &data_index, buff);
186 |          start = atof(buff);
187 |          HTS_get_token_from_string(lines[i], &data_index, buff);
188 |          end = atof(buff);
189 |          HTS_get_token_from_string(lines[i], &data_index, buff);
190 |          lstring->name = HTS_strdup(buff);
191 |          lstring->start = rate * start;
192 |          lstring->end = rate * end;
193 |       } else {
194 |          lstring->start = -1.0;
195 |          lstring->end = -1.0;
196 |          lstring->name = HTS_strdup(lines[i]);
197 |       }
198 |       lstring->next = NULL;
199 |    }
200 |    HTS_Label_check_time(label);
201 | }
202 | 
203 | /* HTS_Label_get_size: get number of label string */
204 | size_t HTS_Label_get_size(HTS_Label * label)
205 | {
206 |    return label->size;
207 | }
208 | 
209 | /* HTS_Label_get_string: get label string */
210 | const char *HTS_Label_get_string(HTS_Label * label, size_t index)
211 | {
212 |    size_t i;
213 |    HTS_LabelString *lstring = label->head;
214 | 
215 |    for (i = 0; i < index && lstring; i++)
216 |       lstring = lstring->next;
217 |    if (!lstring)
218 |       return NULL;
219 |    return lstring->name;
220 | }
221 | 
222 | /* HTS_Label_get_start_frame: get start frame */
223 | double HTS_Label_get_start_frame(HTS_Label * label, size_t index)
224 | {
225 |    size_t i;
226 |    HTS_LabelString *lstring = label->head;
227 | 
228 |    for (i = 0; i < index && lstring; i++)
229 |       lstring = lstring->next;
230 |    if (!lstring)
231 |       return -1.0;
232 |    return lstring->start;
233 | }
234 | 
235 | /* HTS_Label_get_end_frame: get end frame */
236 | double HTS_Label_get_end_frame(HTS_Label * label, size_t index)
237 | {
238 |    size_t i;
239 |    HTS_LabelString *lstring = label->head;
240 | 
241 |    for (i = 0; i < index && lstring; i++)
242 |       lstring = lstring->next;
243 |    if (!lstring)
244 |       return -1.0;
245 |    return lstring->end;
246 | }
247 | 
248 | /* HTS_Label_clear: free label */
249 | void HTS_Label_clear(HTS_Label * label)
250 | {
251 |    HTS_LabelString *lstring, *next_lstring;
252 | 
253 |    for (lstring = label->head; lstring; lstring = next_lstring) {
254 |       next_lstring = lstring->next;
255 |       HTS_free(lstring->name);
256 |       HTS_free(lstring);
257 |    }
258 |    HTS_Label_initialize(label);
259 | }
260 | 
261 | HTS_LABEL_C_END;
262 | 
263 | #endif                          /* !HTS_LABEL_C */
264 | 


--------------------------------------------------------------------------------
/src/lib/HTS_misc.c:
--------------------------------------------------------------------------------
  1 | /* ----------------------------------------------------------------- */
  2 | /*           The HMM-Based Speech Synthesis Engine "hts_engine API"  */
  3 | /*           developed by HTS Working Group                          */
  4 | /*           http://hts-engine.sourceforge.net/                      */
  5 | /* ----------------------------------------------------------------- */
  6 | /*                                                                   */
  7 | /*  Copyright (c) 2001-2014  Nagoya Institute of Technology          */
  8 | /*                           Department of Computer Science          */
  9 | /*                                                                   */
 10 | /*                2001-2008  Tokyo Institute of Technology           */
 11 | /*                           Interdisciplinary Graduate School of    */
 12 | /*                           Science and Engineering                 */
 13 | /*                                                                   */
 14 | /* All rights reserved.                                              */
 15 | /*                                                                   */
 16 | /* Redistribution and use in source and binary forms, with or        */
 17 | /* without modification, are permitted provided that the following   */
 18 | /* conditions are met:                                               */
 19 | /*                                                                   */
 20 | /* - Redistributions of source code must retain the above copyright  */
 21 | /*   notice, this list of conditions and the following disclaimer.   */
 22 | /* - Redistributions in binary form must reproduce the above         */
 23 | /*   copyright notice, this list of conditions and the following     */
 24 | /*   disclaimer in the documentation and/or other materials provided */
 25 | /*   with the distribution.                                          */
 26 | /* - Neither the name of the HTS working group nor the names of its  */
 27 | /*   contributors may be used to endorse or promote products derived */
 28 | /*   from this software without specific prior written permission.   */
 29 | /*                                                                   */
 30 | /* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND            */
 31 | /* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,       */
 32 | /* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF          */
 33 | /* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE          */
 34 | /* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS */
 35 | /* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,          */
 36 | /* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED   */
 37 | /* TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,     */
 38 | /* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON */
 39 | /* ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,   */
 40 | /* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY    */
 41 | /* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE           */
 42 | /* POSSIBILITY OF SUCH DAMAGE.                                       */
 43 | /* ----------------------------------------------------------------- */
 44 | 
 45 | #ifndef HTS_MISC_C
 46 | #define HTS_MISC_C
 47 | 
 48 | #ifdef __cplusplus
 49 | #define HTS_MISC_C_START extern "C" {
 50 | #define HTS_MISC_C_END   }
 51 | #else
 52 | #define HTS_MISC_C_START
 53 | #define HTS_MISC_C_END
 54 | #endif                          /* __CPLUSPLUS */
 55 | 
 56 | HTS_MISC_C_START;
 57 | 
 58 | #include <stdlib.h>             /* for exit(),calloc(),free() */
 59 | #include <stdarg.h>             /* for va_list */
 60 | #include <string.h>             /* for strcpy(),strlen() */
 61 | 
 62 | /* hts_engine libraries */
 63 | #include "HTS_hidden.h"
 64 | 
 65 | #ifdef FESTIVAL
 66 | #include "EST_walloc.h"
 67 | #endif                          /* FESTIVAL */
 68 | 
 69 | #define HTS_FILE  0
 70 | #define HTS_DATA  1
 71 | 
 72 | typedef struct _HTS_Data {
 73 |    unsigned char *data;
 74 |    size_t size;
 75 |    size_t index;
 76 | } HTS_Data;
 77 | 
 78 | /* HTS_fopen_from_fn: wrapper for fopen */
 79 | HTS_File *HTS_fopen_from_fn(const char *name, const char *opt)
 80 | {
 81 |    HTS_File *fp = (HTS_File *) HTS_calloc(1, sizeof(HTS_File));
 82 | 
 83 |    fp->type = HTS_FILE;
 84 |    fp->pointer = (void *) fopen(name, opt);
 85 | 
 86 |    if (fp->pointer == NULL) {
 87 |       HTS_error(0, "HTS_fopen: Cannot open %s.\n", name);
 88 |       HTS_free(fp);
 89 |       return NULL;
 90 |    }
 91 | 
 92 |    return fp;
 93 | }
 94 | 
 95 | /* HTS_fopen_from_fp: wrapper for fopen */
 96 | HTS_File *HTS_fopen_from_fp(HTS_File * fp, size_t size)
 97 | {
 98 |    if (fp == NULL || size == 0)
 99 |       return NULL;
100 |    else if (fp->type == HTS_FILE) {
101 |       HTS_Data *d;
102 |       HTS_File *f;
103 |       d = (HTS_Data *) HTS_calloc(1, sizeof(HTS_Data));
104 |       d->data = (unsigned char *) HTS_calloc(size, sizeof(unsigned char));
105 |       d->size = size;
106 |       d->index = 0;
107 |       if (fread(d->data, sizeof(unsigned char), size, (FILE *) fp->pointer) != size) {
108 |          free(d->data);
109 |          free(d);
110 |          return NULL;
111 |       }
112 |       f = (HTS_File *) HTS_calloc(1, sizeof(HTS_File));
113 |       f->type = HTS_DATA;
114 |       f->pointer = (void *) d;
115 |       return f;
116 |    } else if (fp->type == HTS_DATA) {
117 |       HTS_File *f;
118 |       HTS_Data *tmp1, *tmp2;
119 |       tmp1 = (HTS_Data *) fp->pointer;
120 |       if (tmp1->index + size > tmp1->size)
121 |          return NULL;
122 |       tmp2 = (HTS_Data *) HTS_calloc(1, sizeof(HTS_Data));
123 |       tmp2->data = (unsigned char *) HTS_calloc(size, sizeof(unsigned char));
124 |       tmp2->size = size;
125 |       tmp2->index = 0;
126 |       memcpy(tmp2->data, &tmp1->data[tmp1->index], size);
127 |       tmp1->index += size;
128 |       f = (HTS_File *) HTS_calloc(1, sizeof(HTS_File));
129 |       f->type = HTS_DATA;
130 |       f->pointer = (void *) tmp2;
131 |       return f;
132 |    }
133 | 
134 |    HTS_error(0, "HTS_fopen_from_fp: Unknown file type.\n");
135 |    return NULL;
136 | }
137 | 
138 | /* HTS_fopen_from_data: wrapper for fopen */
139 | HTS_File *HTS_fopen_from_data(void *data, size_t size)
140 | {
141 |    HTS_Data *d;
142 |    HTS_File *f;
143 | 
144 |    if (data == NULL || size == 0)
145 |       return NULL;
146 | 
147 |    d = (HTS_Data *) HTS_calloc(1, sizeof(HTS_Data));
148 |    d->data = (unsigned char *) HTS_calloc(size, sizeof(unsigned char));
149 |    d->size = size;
150 |    d->index = 0;
151 | 
152 |    memcpy(d->data, data, size);
153 | 
154 |    f = (HTS_File *) HTS_calloc(1, sizeof(HTS_File));
155 |    f->type = HTS_DATA;
156 |    f->pointer = (void *) d;
157 | 
158 |    return f;
159 | }
160 | 
161 | /* HTS_fclose: wrapper for fclose */
162 | void HTS_fclose(HTS_File * fp)
163 | {
164 |    if (fp == NULL) {
165 |       return;
166 |    } else if (fp->type == HTS_FILE) {
167 |       if (fp->pointer != NULL)
168 |          fclose((FILE *) fp->pointer);
169 |       HTS_free(fp);
170 |       return;
171 |    } else if (fp->type == HTS_DATA) {
172 |       if (fp->pointer != NULL) {
173 |          HTS_Data *d = (HTS_Data *) fp->pointer;
174 |          if (d->data != NULL)
175 |             HTS_free(d->data);
176 |          HTS_free(d);
177 |       }
178 |       HTS_free(fp);
179 |       return;
180 |    }
181 |    HTS_error(0, "HTS_fclose: Unknown file type.\n");
182 | }
183 | 
184 | /* HTS_fgetc: wrapper for fgetc */
185 | int HTS_fgetc(HTS_File * fp)
186 | {
187 |    if (fp == NULL) {
188 |       return EOF;
189 |    } else if (fp->type == HTS_FILE) {
190 |       return fgetc((FILE *) fp->pointer);
191 |    } else if (fp->type == HTS_DATA) {
192 |       HTS_Data *d = (HTS_Data *) fp->pointer;
193 |       if (d->size <= d->index)
194 |          return EOF;
195 |       return (int) d->data[d->index++];
196 |    }
197 |    HTS_error(0, "HTS_fgetc: Unknown file type.\n");
198 |    return EOF;
199 | }
200 | 
201 | /* HTS_feof: wrapper for feof */
202 | int HTS_feof(HTS_File * fp)
203 | {
204 |    if (fp == NULL) {
205 |       return 1;
206 |    } else if (fp->type == HTS_FILE) {
207 |       return feof((FILE *) fp->pointer);
208 |    } else if (fp->type == HTS_DATA) {
209 |       HTS_Data *d = (HTS_Data *) fp->pointer;
210 |       return d->size <= d->index ? 1 : 0;
211 |    }
212 |    HTS_error(0, "HTS_feof: Unknown file type.\n");
213 |    return 1;
214 | }
215 | 
216 | /* HTS_fseek: wrapper for fseek */
217 | int HTS_fseek(HTS_File * fp, long offset, int origin)
218 | {
219 |    if (fp == NULL) {
220 |       return 1;
221 |    } else if (fp->type == HTS_FILE) {
222 |       return fseek((FILE *) fp->pointer, offset, origin);
223 |    } else if (fp->type == HTS_DATA) {
224 |       HTS_Data *d = (HTS_Data *) fp->pointer;
225 |       if (origin == SEEK_SET) {
226 |          d->index = (size_t) offset;
227 |       } else if (origin == SEEK_CUR) {
228 |          d->index += offset;
229 |       } else if (origin == SEEK_END) {
230 |          d->index = d->size + offset;
231 |       } else {
232 |          return 1;
233 |       }
234 |       return 0;
235 |    }
236 |    HTS_error(0, "HTS_fseek: Unknown file type.\n");
237 |    return 1;
238 | }
239 | 
240 | /* HTS_ftell: rapper for ftell */
241 | size_t HTS_ftell(HTS_File * fp)
242 | {
243 |    if (fp == NULL) {
244 |       return 0;
245 |    } else if (fp->type == HTS_FILE) {
246 |       fpos_t pos;
247 |       fgetpos((FILE *) fp->pointer, &pos);
248 | #if defined(_WIN32) || defined(__CYGWIN__) || defined(__APPLE__) || defined(__ANDROID__)
249 |       return (size_t) pos;
250 | #else
251 |       return (size_t) pos.__pos;
252 | #endif                          /* _WIN32 || __CYGWIN__ || __APPLE__ || __ANDROID__ */
253 |    } else if (fp->type == HTS_DATA) {
254 |       HTS_Data *d = (HTS_Data *) fp->pointer;
255 |       return d->index;
256 |    }
257 |    HTS_error(0, "HTS_ftell: Unknown file type.\n");
258 |    return 0;
259 | }
260 | 
261 | /* HTS_fread: wrapper for fread */
262 | static size_t HTS_fread(void *buf, size_t size, size_t n, HTS_File * fp)
263 | {
264 |    if (fp == NULL || size == 0 || n == 0) {
265 |       return 0;
266 |    }
267 |    if (fp->type == HTS_FILE) {
268 |       return fread(buf, size, n, (FILE *) fp->pointer);
269 |    } else if (fp->type == HTS_DATA) {
270 |       HTS_Data *d = (HTS_Data *) fp->pointer;
271 |       size_t i, length = size * n;
272 |       unsigned char *c = (unsigned char *) buf;
273 |       for (i = 0; i < length; i++) {
274 |          if (d->index < d->size)
275 |             c[i] = d->data[d->index++];
276 |          else
277 |             break;
278 |       }
279 |       if (i == 0)
280 |          return 0;
281 |       else
282 |          return i / size;
283 |    }
284 |    HTS_error(0, "HTS_fread: Unknown file type.\n");
285 |    return 0;
286 | }
287 | 
288 | /* HTS_byte_swap: byte swap */
289 | static void HTS_byte_swap(void *p, size_t size, size_t block)
290 | {
291 |    char *q, tmp;
292 |    size_t i, j;
293 | 
294 |    q = (char *) p;
295 | 
296 |    for (i = 0; i < block; i++) {
297 |       for (j = 0; j < (size / 2); j++) {
298 |          tmp = *(q + j);
299 |          *(q + j) = *(q + (size - 1 - j));
300 |          *(q + (size - 1 - j)) = tmp;
301 |       }
302 |       q += size;
303 |    }
304 | }
305 | 
306 | /* HTS_fread_big_endian: fread with byteswap */
307 | size_t HTS_fread_big_endian(void *buf, size_t size, size_t n, HTS_File * fp)
308 | {
309 |    size_t block = HTS_fread(buf, size, n, fp);
310 | 
311 | #ifdef WORDS_LITTLEENDIAN
312 |    HTS_byte_swap(buf, size, block);
313 | #endif                          /* WORDS_LITTLEENDIAN */
314 | 
315 |    return block;
316 | }
317 | 
318 | /* HTS_fread_little_endian: fread with byteswap */
319 | size_t HTS_fread_little_endian(void *buf, size_t size, size_t n, HTS_File * fp)
320 | {
321 |    size_t block = HTS_fread(buf, size, n, fp);
322 | 
323 | #ifdef WORDS_BIGENDIAN
324 |    HTS_byte_swap(buf, size, block);
325 | #endif                          /* WORDS_BIGENDIAN */
326 | 
327 |    return block;
328 | }
329 | 
330 | /* HTS_fwrite_little_endian: fwrite with byteswap */
331 | size_t HTS_fwrite_little_endian(const void *buf, size_t size, size_t n, FILE * fp)
332 | {
333 | #ifdef WORDS_BIGENDIAN
334 |    HTS_byte_swap(buf, size, n * size);
335 | #endif                          /* WORDS_BIGENDIAN */
336 |    return fwrite(buf, size, n, fp);
337 | }
338 | 
339 | /* HTS_get_pattern_token: get pattern token (single/double quote can be used) */
340 | HTS_Boolean HTS_get_pattern_token(HTS_File * fp, char *buff)
341 | {
342 |    char c;
343 |    size_t i;
344 |    HTS_Boolean squote = FALSE, dquote = FALSE;
345 | 
346 |    if (fp == NULL || HTS_feof(fp))
347 |       return FALSE;
348 |    c = HTS_fgetc(fp);
349 | 
350 |    while (c == ' ' || c == '\n') {
351 |       if (HTS_feof(fp))
352 |          return FALSE;
353 |       c = HTS_fgetc(fp);
354 |    }
355 | 
356 |    if (c == '\'') {             /* single quote case */
357 |       if (HTS_feof(fp))
358 |          return FALSE;
359 |       c = HTS_fgetc(fp);
360 |       squote = TRUE;
361 |    }
362 | 
363 |    if (c == '\"') {             /*double quote case */
364 |       if (HTS_feof(fp))
365 |          return FALSE;
366 |       c = HTS_fgetc(fp);
367 |       dquote = TRUE;
368 |    }
369 | 
370 |    if (c == ',') {              /*special character ',' */
371 |       strcpy(buff, ",");
372 |       return TRUE;
373 |    }
374 | 
375 |    i = 0;
376 |    while (1) {
377 |       buff[i++] = c;
378 |       c = HTS_fgetc(fp);
379 |       if (squote && c == '\'')
380 |          break;
381 |       if (dquote && c == '\"')
382 |          break;
383 |       if (!squote && !dquote) {
384 |          if (c == ' ')
385 |             break;
386 |          if (c == '\n')
387 |             break;
388 |          if (HTS_feof(fp))
389 |             break;
390 |       }
391 |    }
392 | 
393 |    buff[i] = '\0';
394 |    return TRUE;
395 | }
396 | 
397 | /* HTS_get_token: get token from file pointer (separators are space, tab, and line break) */
398 | HTS_Boolean HTS_get_token_from_fp(HTS_File * fp, char *buff)
399 | {
400 |    char c;
401 |    size_t i;
402 | 
403 |    if (fp == NULL || HTS_feof(fp))
404 |       return FALSE;
405 |    c = HTS_fgetc(fp);
406 |    while (c == ' ' || c == '\n' || c == '\t') {
407 |       if (HTS_feof(fp))
408 |          return FALSE;
409 |       c = HTS_fgetc(fp);
410 |       if (c == EOF)
411 |          return FALSE;
412 |    }
413 | 
414 |    for (i = 0; c != ' ' && c != '\n' && c != '\t';) {
415 |       buff[i++] = c;
416 |       if (HTS_feof(fp))
417 |          break;
418 |       c = HTS_fgetc(fp);
419 |       if (c == EOF)
420 |          break;
421 |    }
422 | 
423 |    buff[i] = '\0';
424 |    return TRUE;
425 | }
426 | 
427 | /* HTS_get_token_with_separator: get token from file pointer with specified separator */
428 | HTS_Boolean HTS_get_token_from_fp_with_separator(HTS_File * fp, char *buff, char separator)
429 | {
430 |    char c;
431 |    size_t i;
432 | 
433 |    if (fp == NULL || HTS_feof(fp))
434 |       return FALSE;
435 |    c = HTS_fgetc(fp);
436 |    while (c == separator) {
437 |       if (HTS_feof(fp))
438 |          return FALSE;
439 |       c = HTS_fgetc(fp);
440 |       if (c == EOF)
441 |          return FALSE;
442 |    }
443 | 
444 |    for (i = 0; c != separator;) {
445 |       buff[i++] = c;
446 |       if (HTS_feof(fp))
447 |          break;
448 |       c = HTS_fgetc(fp);
449 |       if (c == EOF)
450 |          break;
451 |    }
452 | 
453 |    buff[i] = '\0';
454 |    return TRUE;
455 | }
456 | 
457 | /* HTS_get_token_from_string: get token from string (separators are space, tab, and line break) */
458 | HTS_Boolean HTS_get_token_from_string(const char *string, size_t * index, char *buff)
459 | {
460 |    char c;
461 |    size_t i;
462 | 
463 |    c = string[(*index)];
464 |    if (c == '\0')
465 |       return FALSE;
466 |    c = string[(*index)++];
467 |    if (c == '\0')
468 |       return FALSE;
469 |    while (c == ' ' || c == '\n' || c == '\t') {
470 |       if (c == '\0')
471 |          return FALSE;
472 |       c = string[(*index)++];
473 |    }
474 |    for (i = 0; c != ' ' && c != '\n' && c != '\t' && c != '\0'; i++) {
475 |       buff[i] = c;
476 |       c = string[(*index)++];
477 |    }
478 | 
479 |    buff[i] = '\0';
480 |    return TRUE;
481 | }
482 | 
483 | /* HTS_get_token_from_string_with_separator: get token from string with specified separator */
484 | HTS_Boolean HTS_get_token_from_string_with_separator(const char *str, size_t * index, char *buff, char separator)
485 | {
486 |    char c;
487 |    size_t len = 0;
488 | 
489 |    if (str == NULL)
490 |       return FALSE;
491 | 
492 |    c = str[(*index)];
493 |    if (c == '\0')
494 |       return FALSE;
495 |    while (c == separator) {
496 |       if (c == '\0')
497 |          return FALSE;
498 |       (*index)++;
499 |       c = str[(*index)];
500 |    }
501 |    while (c != separator && c != '\0') {
502 |       buff[len++] = c;
503 |       (*index)++;
504 |       c = str[(*index)];
505 |    }
506 |    if (c != '\0')
507 |       (*index)++;
508 | 
509 |    buff[len] = '\0';
510 | 
511 |    if (len > 0)
512 |       return TRUE;
513 |    else
514 |       return FALSE;
515 | }
516 | 
517 | /* HTS_calloc: wrapper for calloc */
518 | void *HTS_calloc(const size_t num, const size_t size)
519 | {
520 |    size_t n = num * size;
521 |    void *mem;
522 | 
523 |    if (n == 0)
524 |       return NULL;
525 | 
526 | #ifdef FESTIVAL
527 |    mem = (void *) safe_wcalloc(n);
528 | #else
529 |    mem = (void *) malloc(n);
530 | #endif                          /* FESTIVAL */
531 | 
532 |    memset(mem, 0, n);
533 | 
534 |    if (mem == NULL)
535 |       HTS_error(1, "HTS_calloc: Cannot allocate memory.\n");
536 | 
537 |    return mem;
538 | }
539 | 
540 | /* HTS_Free: wrapper for free */
541 | void HTS_free(void *ptr)
542 | {
543 | #ifdef FESTIVAL
544 |    wfree(ptr);
545 | #else
546 |    free(ptr);
547 | #endif                          /* FESTIVAL */
548 | }
549 | 
550 | /* HTS_strdup: wrapper for strdup */
551 | char *HTS_strdup(const char *string)
552 | {
553 | #ifdef FESTIVAL
554 |    return (wstrdup(string));
555 | #else
556 |    char *buff = (char *) HTS_calloc(strlen(string) + 1, sizeof(char));
557 |    strcpy(buff, string);
558 |    return buff;
559 | #endif                          /* FESTIVAL */
560 | }
561 | 
562 | /* HTS_alloc_matrix: allocate double matrix */
563 | double **HTS_alloc_matrix(size_t x, size_t y)
564 | {
565 |    size_t i;
566 |    double **p;
567 | 
568 |    if (x == 0 || y == 0)
569 |       return NULL;
570 | 
571 |    p = (double **) HTS_calloc(x, sizeof(double *));
572 | 
573 |    for (i = 0; i < x; i++)
574 |       p[i] = (double *) HTS_calloc(y, sizeof(double));
575 |    return p;
576 | }
577 | 
578 | /* HTS_free_matrix: free double matrix */
579 | void HTS_free_matrix(double **p, size_t x)
580 | {
581 |    size_t i;
582 | 
583 |    for (i = 0; i < x; i++)
584 |       HTS_free(p[i]);
585 |    HTS_free(p);
586 | }
587 | 
588 | /* HTS_error: output error message */
589 | void HTS_error(int error, const char *message, ...)
590 | {
591 |    va_list arg;
592 | 
593 |    fflush(stdout);
594 |    fflush(stderr);
595 | 
596 |    if (error > 0)
597 |       fprintf(stderr, "\nError: ");
598 |    else
599 |       fprintf(stderr, "\nWarning: ");
600 | 
601 |    va_start(arg, message);
602 |    vfprintf(stderr, message, arg);
603 |    va_end(arg);
604 | 
605 |    fflush(stderr);
606 | 
607 |    if (error > 0)
608 |       exit(error);
609 | }
610 | 
611 | HTS_MISC_C_END;
612 | 
613 | #endif                          /* !HTS_MISC_C */
614 | 


--------------------------------------------------------------------------------
/src/lib/HTS_pstream.c:
--------------------------------------------------------------------------------
  1 | /* ----------------------------------------------------------------- */
  2 | /*           The HMM-Based Speech Synthesis Engine "hts_engine API"  */
  3 | /*           developed by HTS Working Group                          */
  4 | /*           http://hts-engine.sourceforge.net/                      */
  5 | /* ----------------------------------------------------------------- */
  6 | /*                                                                   */
  7 | /*  Copyright (c) 2001-2014  Nagoya Institute of Technology          */
  8 | /*                           Department of Computer Science          */
  9 | /*                                                                   */
 10 | /*                2001-2008  Tokyo Institute of Technology           */
 11 | /*                           Interdisciplinary Graduate School of    */
 12 | /*                           Science and Engineering                 */
 13 | /*                                                                   */
 14 | /* All rights reserved.                                              */
 15 | /*                                                                   */
 16 | /* Redistribution and use in source and binary forms, with or        */
 17 | /* without modification, are permitted provided that the following   */
 18 | /* conditions are met:                                               */
 19 | /*                                                                   */
 20 | /* - Redistributions of source code must retain the above copyright  */
 21 | /*   notice, this list of conditions and the following disclaimer.   */
 22 | /* - Redistributions in binary form must reproduce the above         */
 23 | /*   copyright notice, this list of conditions and the following     */
 24 | /*   disclaimer in the documentation and/or other materials provided */
 25 | /*   with the distribution.                                          */
 26 | /* - Neither the name of the HTS working group nor the names of its  */
 27 | /*   contributors may be used to endorse or promote products derived */
 28 | /*   from this software without specific prior written permission.   */
 29 | /*                                                                   */
 30 | /* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND            */
 31 | /* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,       */
 32 | /* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF          */
 33 | /* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE          */
 34 | /* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS */
 35 | /* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,          */
 36 | /* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED   */
 37 | /* TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,     */
 38 | /* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON */
 39 | /* ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,   */
 40 | /* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY    */
 41 | /* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE           */
 42 | /* POSSIBILITY OF SUCH DAMAGE.                                       */
 43 | /* ----------------------------------------------------------------- */
 44 | 
 45 | #ifndef HTS_PSTREAM_C
 46 | #define HTS_PSTREAM_C
 47 | 
 48 | #ifdef __cplusplus
 49 | #define HTS_PSTREAM_C_START extern "C" {
 50 | #define HTS_PSTREAM_C_END   }
 51 | #else
 52 | #define HTS_PSTREAM_C_START
 53 | #define HTS_PSTREAM_C_END
 54 | #endif                          /* __CPLUSPLUS */
 55 | 
 56 | HTS_PSTREAM_C_START;
 57 | 
 58 | #include <math.h>               /* for sqrt() */
 59 | 
 60 | /* hts_engine libraries */
 61 | #include "HTS_hidden.h"
 62 | 
 63 | /* HTS_finv: calculate 1.0/variance function */
 64 | static double HTS_finv(const double x)
 65 | {
 66 |    if (x >= INFTY2)
 67 |       return 0.0;
 68 |    if (x <= -INFTY2)
 69 |       return 0.0;
 70 |    if (x <= INVINF2 && x >= 0)
 71 |       return INFTY;
 72 |    if (x >= -INVINF2 && x < 0)
 73 |       return -INFTY;
 74 | 
 75 |    return (1.0 / x);
 76 | }
 77 | 
 78 | /* HTS_PStream_calc_wuw_and_wum: calcurate W'U^{-1}W and W'U^{-1}M */
 79 | static void HTS_PStream_calc_wuw_and_wum(HTS_PStream * pst, size_t m)
 80 | {
 81 |    size_t t, i, j;
 82 |    int shift;
 83 |    double wu;
 84 | 
 85 |    for (t = 0; t < pst->length; t++) {
 86 |       /* initialize */
 87 |       pst->sm.wum[t] = 0.0;
 88 |       for (i = 0; i < pst->width; i++)
 89 |          pst->sm.wuw[t][i] = 0.0;
 90 | 
 91 |       /* calc WUW & WUM */
 92 |       for (i = 0; i < pst->win_size; i++)
 93 |          for (shift = pst->win_l_width[i]; shift <= pst->win_r_width[i]; shift++)
 94 |             if (((int) t + shift >= 0) && ((int) t + shift < pst->length) && (pst->win_coefficient[i][-shift] != 0.0)) {
 95 |                wu = pst->win_coefficient[i][-shift] * pst->sm.ivar[t + shift][i * pst->vector_length + m];
 96 |                pst->sm.wum[t] += wu * pst->sm.mean[t + shift][i * pst->vector_length + m];
 97 |                for (j = 0; (j < pst->width) && (t + j < pst->length); j++)
 98 |                   if (((int) j <= pst->win_r_width[i] + shift) && (pst->win_coefficient[i][j - shift] != 0.0))
 99 |                      pst->sm.wuw[t][j] += wu * pst->win_coefficient[i][j - shift];
100 |             }
101 |    }
102 | }
103 | 
104 | 
105 | /* HTS_PStream_ldl_factorization: Factorize W'*U^{-1}*W to L*D*L' (L: lower triangular, D: diagonal) */
106 | static void HTS_PStream_ldl_factorization(HTS_PStream * pst)
107 | {
108 |    size_t t, i, j;
109 | 
110 |    for (t = 0; t < pst->length; t++) {
111 |       for (i = 1; (i < pst->width) && (t >= i); i++)
112 |          pst->sm.wuw[t][0] -= pst->sm.wuw[t - i][i] * pst->sm.wuw[t - i][i] * pst->sm.wuw[t - i][0];
113 | 
114 |       for (i = 1; i < pst->width; i++) {
115 |          for (j = 1; (i + j < pst->width) && (t >= j); j++)
116 |             pst->sm.wuw[t][i] -= pst->sm.wuw[t - j][j] * pst->sm.wuw[t - j][i + j] * pst->sm.wuw[t - j][0];
117 |          pst->sm.wuw[t][i] /= pst->sm.wuw[t][0];
118 |       }
119 |    }
120 | }
121 | 
122 | /* HTS_PStream_forward_substitution: forward subtitution for mlpg */
123 | static void HTS_PStream_forward_substitution(HTS_PStream * pst)
124 | {
125 |    size_t t, i;
126 | 
127 |    for (t = 0; t < pst->length; t++) {
128 |       pst->sm.g[t] = pst->sm.wum[t];
129 |       for (i = 1; (i < pst->width) && (t >= i); i++)
130 |          pst->sm.g[t] -= pst->sm.wuw[t - i][i] * pst->sm.g[t - i];
131 |    }
132 | }
133 | 
134 | /* HTS_PStream_backward_substitution: backward subtitution for mlpg */
135 | static void HTS_PStream_backward_substitution(HTS_PStream * pst, size_t m)
136 | {
137 |    size_t rev, t, i;
138 | 
139 |    for (rev = 0; rev < pst->length; rev++) {
140 |       t = pst->length - 1 - rev;
141 |       pst->par[t][m] = pst->sm.g[t] / pst->sm.wuw[t][0];
142 |       for (i = 1; (i < pst->width) && (t + i < pst->length); i++)
143 |          pst->par[t][m] -= pst->sm.wuw[t][i] * pst->par[t + i][m];
144 |    }
145 | }
146 | 
147 | /* HTS_PStream_calc_gv: subfunction for mlpg using GV */
148 | static void HTS_PStream_calc_gv(HTS_PStream * pst, size_t m, double *mean, double *vari)
149 | {
150 |    size_t t;
151 | 
152 |    *mean = 0.0;
153 |    for (t = 0; t < pst->length; t++)
154 |       if (pst->gv_switch[t])
155 |          *mean += pst->par[t][m];
156 |    *mean /= pst->gv_length;
157 |    *vari = 0.0;
158 |    for (t = 0; t < pst->length; t++)
159 |       if (pst->gv_switch[t])
160 |          *vari += (pst->par[t][m] - *mean) * (pst->par[t][m] - *mean);
161 |    *vari /= pst->gv_length;
162 | }
163 | 
164 | /* HTS_PStream_conv_gv: subfunction for mlpg using GV */
165 | static void HTS_PStream_conv_gv(HTS_PStream * pst, size_t m)
166 | {
167 |    size_t t;
168 |    double ratio;
169 |    double mean;
170 |    double vari;
171 | 
172 |    HTS_PStream_calc_gv(pst, m, &mean, &vari);
173 |    ratio = sqrt(pst->gv_mean[m] / vari);
174 |    for (t = 0; t < pst->length; t++)
175 |       if (pst->gv_switch[t])
176 |          pst->par[t][m] = ratio * (pst->par[t][m] - mean) + mean;
177 | }
178 | 
179 | /* HTS_PStream_calc_derivative: subfunction for mlpg using GV */
180 | static double HTS_PStream_calc_derivative(HTS_PStream * pst, size_t m)
181 | {
182 |    size_t t, i;
183 |    double mean;
184 |    double vari;
185 |    double dv;
186 |    double h;
187 |    double gvobj;
188 |    double hmmobj;
189 |    double w = 1.0 / (pst->win_size * pst->length);
190 | 
191 |    HTS_PStream_calc_gv(pst, m, &mean, &vari);
192 |    gvobj = -0.5 * W2 * vari * pst->gv_vari[m] * (vari - 2.0 * pst->gv_mean[m]);
193 |    dv = -2.0 * pst->gv_vari[m] * (vari - pst->gv_mean[m]) / pst->length;
194 | 
195 |    for (t = 0; t < pst->length; t++) {
196 |       pst->sm.g[t] = pst->sm.wuw[t][0] * pst->par[t][m];
197 |       for (i = 1; i < pst->width; i++) {
198 |          if (t + i < pst->length)
199 |             pst->sm.g[t] += pst->sm.wuw[t][i] * pst->par[t + i][m];
200 |          if (t + 1 > i)
201 |             pst->sm.g[t] += pst->sm.wuw[t - i][i] * pst->par[t - i][m];
202 |       }
203 |    }
204 | 
205 |    for (t = 0, hmmobj = 0.0; t < pst->length; t++) {
206 |       hmmobj += W1 * w * pst->par[t][m] * (pst->sm.wum[t] - 0.5 * pst->sm.g[t]);
207 |       h = -W1 * w * pst->sm.wuw[t][1 - 1] - W2 * 2.0 / (pst->length * pst->length) * ((pst->length - 1) * pst->gv_vari[m] * (vari - pst->gv_mean[m]) + 2.0 * pst->gv_vari[m] * (pst->par[t][m] - mean) * (pst->par[t][m] - mean));
208 |       if (pst->gv_switch[t])
209 |          pst->sm.g[t] = 1.0 / h * (W1 * w * (-pst->sm.g[t] + pst->sm.wum[t]) + W2 * dv * (pst->par[t][m] - mean));
210 |       else
211 |          pst->sm.g[t] = 1.0 / h * (W1 * w * (-pst->sm.g[t] + pst->sm.wum[t]));
212 |    }
213 | 
214 |    return (-(hmmobj + gvobj));
215 | }
216 | 
217 | /* HTS_PStream_gv_parmgen: function for mlpg using GV */
218 | static void HTS_PStream_gv_parmgen(HTS_PStream * pst, size_t m)
219 | {
220 |    size_t t, i;
221 |    double step = STEPINIT;
222 |    double prev = 0.0;
223 |    double obj;
224 | 
225 |    if (pst->gv_length == 0)
226 |       return;
227 | 
228 |    HTS_PStream_conv_gv(pst, m);
229 |    if (GV_MAX_ITERATION > 0) {
230 |       HTS_PStream_calc_wuw_and_wum(pst, m);
231 |       for (i = 1; i <= GV_MAX_ITERATION; i++) {
232 |          obj = HTS_PStream_calc_derivative(pst, m);
233 |          if (i > 1) {
234 |             if (obj > prev)
235 |                step *= STEPDEC;
236 |             if (obj < prev)
237 |                step *= STEPINC;
238 |          }
239 |          for (t = 0; t < pst->length; t++)
240 |             pst->par[t][m] += step * pst->sm.g[t];
241 |          prev = obj;
242 |       }
243 |    }
244 | }
245 | 
246 | /* HTS_PStream_mlpg: generate sequence of speech parameter vector maximizing its output probability for given pdf sequence */
247 | static void HTS_PStream_mlpg(HTS_PStream * pst)
248 | {
249 |    size_t m;
250 | 
251 |    if (pst->length == 0)
252 |       return;
253 | 
254 |    for (m = 0; m < pst->vector_length; m++) {
255 |       HTS_PStream_calc_wuw_and_wum(pst, m);
256 |       HTS_PStream_ldl_factorization(pst);       /* LDL factorization */
257 |       HTS_PStream_forward_substitution(pst);    /* forward substitution   */
258 |       HTS_PStream_backward_substitution(pst, m);        /* backward substitution  */
259 |       if (pst->gv_length > 0)
260 |          HTS_PStream_gv_parmgen(pst, m);
261 |    }
262 | }
263 | 
264 | /* HTS_PStreamSet_initialize: initialize parameter stream set */
265 | void HTS_PStreamSet_initialize(HTS_PStreamSet * pss)
266 | {
267 |    pss->pstream = NULL;
268 |    pss->nstream = 0;
269 |    pss->total_frame = 0;
270 | }
271 | 
272 | /* HTS_PStreamSet_create: parameter generation using GV weight */
273 | HTS_Boolean HTS_PStreamSet_create(HTS_PStreamSet * pss, HTS_SStreamSet * sss, double *msd_threshold, double *gv_weight)
274 | {
275 |    size_t i, j, k, l, m;
276 |    int shift;
277 |    size_t frame, msd_frame, state;
278 | 
279 |    HTS_PStream *pst;
280 |    HTS_Boolean not_bound;
281 | 
282 |    if (pss->nstream != 0) {
283 |       HTS_error(1, "HTS_PstreamSet_create: HTS_PStreamSet should be clear.\n");
284 |       return FALSE;
285 |    }
286 | 
287 |    /* initialize */
288 |    pss->nstream = HTS_SStreamSet_get_nstream(sss);
289 |    pss->pstream = (HTS_PStream *) HTS_calloc(pss->nstream, sizeof(HTS_PStream));
290 |    pss->total_frame = HTS_SStreamSet_get_total_frame(sss);
291 | 
292 |    /* create */
293 |    for (i = 0; i < pss->nstream; i++) {
294 |       pst = &pss->pstream[i];
295 |       if (HTS_SStreamSet_is_msd(sss, i)) {      /* for MSD */
296 |          pst->length = 0;
297 |          for (state = 0; state < HTS_SStreamSet_get_total_state(sss); state++)
298 |             if (HTS_SStreamSet_get_msd(sss, i, state) > msd_threshold[i])
299 |                pst->length += HTS_SStreamSet_get_duration(sss, state);
300 |          pst->msd_flag = (HTS_Boolean *) HTS_calloc(pss->total_frame, sizeof(HTS_Boolean));
301 |          for (state = 0, frame = 0; state < HTS_SStreamSet_get_total_state(sss); state++)
302 |             if (HTS_SStreamSet_get_msd(sss, i, state) > msd_threshold[i])
303 |                for (j = 0; j < HTS_SStreamSet_get_duration(sss, state); j++) {
304 |                   pst->msd_flag[frame] = TRUE;
305 |                   frame++;
306 |             } else
307 |                for (j = 0; j < HTS_SStreamSet_get_duration(sss, state); j++) {
308 |                   pst->msd_flag[frame] = FALSE;
309 |                   frame++;
310 |                }
311 |       } else {                  /* for non MSD */
312 |          pst->length = pss->total_frame;
313 |          pst->msd_flag = NULL;
314 |       }
315 |       pst->vector_length = HTS_SStreamSet_get_vector_length(sss, i);
316 |       pst->width = HTS_SStreamSet_get_window_max_width(sss, i) * 2 + 1; /* band width of R */
317 |       pst->win_size = HTS_SStreamSet_get_window_size(sss, i);
318 |       if (pst->length > 0) {
319 |          pst->sm.mean = HTS_alloc_matrix(pst->length, pst->vector_length * pst->win_size);
320 |          pst->sm.ivar = HTS_alloc_matrix(pst->length, pst->vector_length * pst->win_size);
321 |          pst->sm.wum = (double *) HTS_calloc(pst->length, sizeof(double));
322 |          pst->sm.wuw = HTS_alloc_matrix(pst->length, pst->width);
323 |          pst->sm.g = (double *) HTS_calloc(pst->length, sizeof(double));
324 |          pst->par = HTS_alloc_matrix(pst->length, pst->vector_length);
325 |       }
326 |       /* copy dynamic window */
327 |       pst->win_l_width = (int *) HTS_calloc(pst->win_size, sizeof(int));
328 |       pst->win_r_width = (int *) HTS_calloc(pst->win_size, sizeof(int));
329 |       pst->win_coefficient = (double **) HTS_calloc(pst->win_size, sizeof(double));
330 |       for (j = 0; j < pst->win_size; j++) {
331 |          pst->win_l_width[j] = HTS_SStreamSet_get_window_left_width(sss, i, j);
332 |          pst->win_r_width[j] = HTS_SStreamSet_get_window_right_width(sss, i, j);
333 |          if (pst->win_l_width[j] + pst->win_r_width[j] == 0)
334 |             pst->win_coefficient[j] = (double *)
335 |                 HTS_calloc(-2 * pst->win_l_width[j] + 1, sizeof(double));
336 |          else
337 |             pst->win_coefficient[j] = (double *)
338 |                 HTS_calloc(-2 * pst->win_l_width[j], sizeof(double));
339 |          pst->win_coefficient[j] -= pst->win_l_width[j];
340 |          for (shift = pst->win_l_width[j]; shift <= pst->win_r_width[j]; shift++)
341 |             pst->win_coefficient[j][shift] = HTS_SStreamSet_get_window_coefficient(sss, i, j, shift);
342 |       }
343 |       /* copy GV */
344 |       if (HTS_SStreamSet_use_gv(sss, i)) {
345 |          pst->gv_mean = (double *) HTS_calloc(pst->vector_length, sizeof(double));
346 |          pst->gv_vari = (double *) HTS_calloc(pst->vector_length, sizeof(double));
347 |          for (j = 0; j < pst->vector_length; j++) {
348 |             pst->gv_mean[j] = HTS_SStreamSet_get_gv_mean(sss, i, j) * gv_weight[i];
349 |             pst->gv_vari[j] = HTS_SStreamSet_get_gv_vari(sss, i, j);
350 |          }
351 |          pst->gv_switch = (HTS_Boolean *) HTS_calloc(pst->length, sizeof(HTS_Boolean));
352 |          if (HTS_SStreamSet_is_msd(sss, i)) {   /* for MSD */
353 |             for (state = 0, frame = 0, msd_frame = 0; state < HTS_SStreamSet_get_total_state(sss); state++)
354 |                for (j = 0; j < HTS_SStreamSet_get_duration(sss, state); j++, frame++)
355 |                   if (pst->msd_flag[frame])
356 |                      pst->gv_switch[msd_frame++] = HTS_SStreamSet_get_gv_switch(sss, i, state);
357 |          } else {               /* for non MSD */
358 |             for (state = 0, frame = 0; state < HTS_SStreamSet_get_total_state(sss); state++)
359 |                for (j = 0; j < HTS_SStreamSet_get_duration(sss, state); j++)
360 |                   pst->gv_switch[frame++] = HTS_SStreamSet_get_gv_switch(sss, i, state);
361 |          }
362 |          for (j = 0, pst->gv_length = 0; j < pst->length; j++)
363 |             if (pst->gv_switch[j])
364 |                pst->gv_length++;
365 |       } else {
366 |          pst->gv_switch = NULL;
367 |          pst->gv_length = 0;
368 |          pst->gv_mean = NULL;
369 |          pst->gv_vari = NULL;
370 |       }
371 |       /* copy pdfs */
372 |       if (HTS_SStreamSet_is_msd(sss, i)) {      /* for MSD */
373 |          for (state = 0, frame = 0, msd_frame = 0; state < HTS_SStreamSet_get_total_state(sss); state++) {
374 |             for (j = 0; j < HTS_SStreamSet_get_duration(sss, state); j++) {
375 |                if (pst->msd_flag[frame]) {
376 |                   /* check current frame is MSD boundary or not */
377 |                   for (k = 0; k < pst->win_size; k++) {
378 |                      not_bound = TRUE;
379 |                      for (shift = pst->win_l_width[k]; shift <= pst->win_r_width[k]; shift++)
380 |                         if ((int) frame + shift < 0 || (int) pss->total_frame <= (int) frame + shift || !pst->msd_flag[frame + shift]) {
381 |                            not_bound = FALSE;
382 |                            break;
383 |                         }
384 |                      for (l = 0; l < pst->vector_length; l++) {
385 |                         m = pst->vector_length * k + l;
386 |                         pst->sm.mean[msd_frame][m] = HTS_SStreamSet_get_mean(sss, i, state, m);
387 |                         if (not_bound || k == 0)
388 |                            pst->sm.ivar[msd_frame][m] = HTS_finv(HTS_SStreamSet_get_vari(sss, i, state, m));
389 |                         else
390 |                            pst->sm.ivar[msd_frame][m] = 0.0;
391 |                      }
392 |                   }
393 |                   msd_frame++;
394 |                }
395 |                frame++;
396 |             }
397 |          }
398 |       } else {                  /* for non MSD */
399 |          for (state = 0, frame = 0; state < HTS_SStreamSet_get_total_state(sss); state++) {
400 |             for (j = 0; j < HTS_SStreamSet_get_duration(sss, state); j++) {
401 |                for (k = 0; k < pst->win_size; k++) {
402 |                   not_bound = TRUE;
403 |                   for (shift = pst->win_l_width[k]; shift <= pst->win_r_width[k]; shift++)
404 |                      if ((int) frame + shift < 0 || (int) pss->total_frame <= (int) frame + shift) {
405 |                         not_bound = FALSE;
406 |                         break;
407 |                      }
408 |                   for (l = 0; l < pst->vector_length; l++) {
409 |                      m = pst->vector_length * k + l;
410 |                      pst->sm.mean[frame][m] = HTS_SStreamSet_get_mean(sss, i, state, m);
411 |                      if (not_bound || k == 0)
412 |                         pst->sm.ivar[frame][m] = HTS_finv(HTS_SStreamSet_get_vari(sss, i, state, m));
413 |                      else
414 |                         pst->sm.ivar[frame][m] = 0.0;
415 |                   }
416 |                }
417 |                frame++;
418 |             }
419 |          }
420 |       }
421 |       /* parameter generation */
422 |       HTS_PStream_mlpg(pst);
423 |    }
424 | 
425 |    return TRUE;
426 | }
427 | 
428 | /* HTS_PStreamSet_get_nstream: get number of stream */
429 | size_t HTS_PStreamSet_get_nstream(HTS_PStreamSet * pss)
430 | {
431 |    return pss->nstream;
432 | }
433 | 
434 | /* HTS_PStreamSet_get_vector_length: get feature length */
435 | size_t HTS_PStreamSet_get_vector_length(HTS_PStreamSet * pss, size_t stream_index)
436 | {
437 |    return pss->pstream[stream_index].vector_length;
438 | }
439 | 
440 | /* HTS_PStreamSet_get_total_frame: get total number of frame */
441 | size_t HTS_PStreamSet_get_total_frame(HTS_PStreamSet * pss)
442 | {
443 |    return pss->total_frame;
444 | }
445 | 
446 | /* HTS_PStreamSet_get_parameter: get parameter */
447 | double HTS_PStreamSet_get_parameter(HTS_PStreamSet * pss, size_t stream_index, size_t frame_index, size_t vector_index)
448 | {
449 |    return pss->pstream[stream_index].par[frame_index][vector_index];
450 | }
451 | 
452 | /* HTS_PStreamSet_get_parameter_vector: get parameter vector*/
453 | double *HTS_PStreamSet_get_parameter_vector(HTS_PStreamSet * pss, size_t stream_index, size_t frame_index)
454 | {
455 |    return pss->pstream[stream_index].par[frame_index];
456 | }
457 | 
458 | /* HTS_PStreamSet_get_msd_flag: get generated MSD flag per frame */
459 | HTS_Boolean HTS_PStreamSet_get_msd_flag(HTS_PStreamSet * pss, size_t stream_index, size_t frame_index)
460 | {
461 |    return pss->pstream[stream_index].msd_flag[frame_index];
462 | }
463 | 
464 | /* HTS_PStreamSet_is_msd: get MSD flag */
465 | HTS_Boolean HTS_PStreamSet_is_msd(HTS_PStreamSet * pss, size_t stream_index)
466 | {
467 |    return pss->pstream[stream_index].msd_flag ? TRUE : FALSE;
468 | }
469 | 
470 | /* HTS_PStreamSet_clear: free parameter stream set */
471 | void HTS_PStreamSet_clear(HTS_PStreamSet * pss)
472 | {
473 |    size_t i, j;
474 |    HTS_PStream *pstream;
475 | 
476 |    if (pss->pstream) {
477 |       for (i = 0; i < pss->nstream; i++) {
478 |          pstream = &pss->pstream[i];
479 |          if (pstream->sm.wum)
480 |             HTS_free(pstream->sm.wum);
481 |          if (pstream->sm.g)
482 |             HTS_free(pstream->sm.g);
483 |          if (pstream->sm.wuw)
484 |             HTS_free_matrix(pstream->sm.wuw, pstream->length);
485 |          if (pstream->sm.ivar)
486 |             HTS_free_matrix(pstream->sm.ivar, pstream->length);
487 |          if (pstream->sm.mean)
488 |             HTS_free_matrix(pstream->sm.mean, pstream->length);
489 |          if (pstream->par)
490 |             HTS_free_matrix(pstream->par, pstream->length);
491 |          if (pstream->msd_flag)
492 |             HTS_free(pstream->msd_flag);
493 |          if (pstream->win_coefficient) {
494 |             for (j = 0; j < pstream->win_size; j++) {
495 |                pstream->win_coefficient[j] += pstream->win_l_width[j];
496 |                HTS_free(pstream->win_coefficient[j]);
497 |             }
498 |          }
499 |          if (pstream->gv_mean)
500 |             HTS_free(pstream->gv_mean);
501 |          if (pstream->gv_vari)
502 |             HTS_free(pstream->gv_vari);
503 |          if (pstream->win_coefficient)
504 |             HTS_free(pstream->win_coefficient);
505 |          if (pstream->win_l_width)
506 |             HTS_free(pstream->win_l_width);
507 |          if (pstream->win_r_width)
508 |             HTS_free(pstream->win_r_width);
509 |          if (pstream->gv_switch)
510 |             HTS_free(pstream->gv_switch);
511 |       }
512 |       HTS_free(pss->pstream);
513 |    }
514 |    HTS_PStreamSet_initialize(pss);
515 | }
516 | 
517 | HTS_PSTREAM_C_END;
518 | 
519 | #endif                          /* !HTS_PSTREAM_C */
520 | 


--------------------------------------------------------------------------------
/src/lib/HTS_sstream.c:
--------------------------------------------------------------------------------
  1 | /* ----------------------------------------------------------------- */
  2 | /*           The HMM-Based Speech Synthesis Engine "hts_engine API"  */
  3 | /*           developed by HTS Working Group                          */
  4 | /*           http://hts-engine.sourceforge.net/                      */
  5 | /* ----------------------------------------------------------------- */
  6 | /*                                                                   */
  7 | /*  Copyright (c) 2001-2014  Nagoya Institute of Technology          */
  8 | /*                           Department of Computer Science          */
  9 | /*                                                                   */
 10 | /*                2001-2008  Tokyo Institute of Technology           */
 11 | /*                           Interdisciplinary Graduate School of    */
 12 | /*                           Science and Engineering                 */
 13 | /*                                                                   */
 14 | /* All rights reserved.                                              */
 15 | /*                                                                   */
 16 | /* Redistribution and use in source and binary forms, with or        */
 17 | /* without modification, are permitted provided that the following   */
 18 | /* conditions are met:                                               */
 19 | /*                                                                   */
 20 | /* - Redistributions of source code must retain the above copyright  */
 21 | /*   notice, this list of conditions and the following disclaimer.   */
 22 | /* - Redistributions in binary form must reproduce the above         */
 23 | /*   copyright notice, this list of conditions and the following     */
 24 | /*   disclaimer in the documentation and/or other materials provided */
 25 | /*   with the distribution.                                          */
 26 | /* - Neither the name of the HTS working group nor the names of its  */
 27 | /*   contributors may be used to endorse or promote products derived */
 28 | /*   from this software without specific prior written permission.   */
 29 | /*                                                                   */
 30 | /* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND            */
 31 | /* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,       */
 32 | /* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF          */
 33 | /* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE          */
 34 | /* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS */
 35 | /* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,          */
 36 | /* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED   */
 37 | /* TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,     */
 38 | /* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON */
 39 | /* ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,   */
 40 | /* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY    */
 41 | /* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE           */
 42 | /* POSSIBILITY OF SUCH DAMAGE.                                       */
 43 | /* ----------------------------------------------------------------- */
 44 | 
 45 | #ifndef HTS_SSTREAM_C
 46 | #define HTS_SSTREAM_C
 47 | 
 48 | #ifdef __cplusplus
 49 | #define HTS_SSTREAM_C_START extern "C" {
 50 | #define HTS_SSTREAM_C_END   }
 51 | #else
 52 | #define HTS_SSTREAM_C_START
 53 | #define HTS_SSTREAM_C_END
 54 | #endif                          /* __CPLUSPLUS */
 55 | 
 56 | HTS_SSTREAM_C_START;
 57 | 
 58 | #include <stdlib.h>
 59 | #include <math.h>
 60 | 
 61 | /* hts_engine libraries */
 62 | #include "HTS_hidden.h"
 63 | 
 64 | /* HTS_set_default_duration: set default duration from state duration probability distribution */
 65 | static double HTS_set_default_duration(size_t * duration, double *mean, double *vari, size_t size)
 66 | {
 67 |    size_t i;
 68 |    double temp;
 69 |    size_t sum = 0;
 70 | 
 71 |    for (i = 0; i < size; i++) {
 72 |       temp = mean[i] + 0.5;
 73 |       if (temp < 1.0)
 74 |          duration[i] = 1;
 75 |       else
 76 |          duration[i] = (size_t) temp;
 77 |       sum += duration[i];
 78 |    }
 79 | 
 80 |    return (double) sum;
 81 | }
 82 | 
 83 | /* HTS_set_specified_duration: set duration from state duration probability distribution and specified frame length */
 84 | static double HTS_set_specified_duration(size_t * duration, double *mean, double *vari, size_t size, double frame_length)
 85 | {
 86 |    size_t i;
 87 |    int j;
 88 |    double temp1, temp2;
 89 |    double rho = 0.0;
 90 |    size_t sum = 0;
 91 |    size_t target_length;
 92 | 
 93 |    /* get the target frame length */
 94 |    if (frame_length + 0.5 < 1.0)
 95 |       target_length = 1;
 96 |    else
 97 |       target_length = (size_t) (frame_length + 0.5);
 98 | 
 99 |    /* check the specified duration */
100 |    if (target_length <= size) {
101 |       if (target_length < size)
102 |          HTS_error(-1, "HTS_set_specified_duration: Specified frame length is too short.\n");
103 |       for (i = 0; i < size; i++)
104 |          duration[i] = 1;
105 |       return (double) size;
106 |    }
107 | 
108 |    /* RHO calculation */
109 |    temp1 = 0.0;
110 |    temp2 = 0.0;
111 |    for (i = 0; i < size; i++) {
112 |       temp1 += mean[i];
113 |       temp2 += vari[i];
114 |    }
115 |    rho = ((double) target_length - temp1) / temp2;
116 | 
117 |    /* first estimation */
118 |    for (i = 0; i < size; i++) {
119 |       temp1 = mean[i] + rho * vari[i] + 0.5;
120 |       if (temp1 < 1.0)
121 |          duration[i] = 1;
122 |       else
123 |          duration[i] = (size_t) temp1;
124 |       sum += duration[i];
125 |    }
126 | 
127 |    /* loop estimation */
128 |    while (target_length != sum) {
129 |       /* sarch flexible state and modify its duration */
130 |       if (target_length > sum) {
131 |          j = -1;
132 |          for (i = 0; i < size; i++) {
133 |             temp2 = fabs(rho - ((double) duration[i] + 1 - mean[i]) / vari[i]);
134 |             if (j < 0 || temp1 > temp2) {
135 |                j = i;
136 |                temp1 = temp2;
137 |             }
138 |          }
139 |          sum++;
140 |          duration[j]++;
141 |       } else {
142 |          j = -1;
143 |          for (i = 0; i < size; i++) {
144 |             if (duration[i] > 1) {
145 |                temp2 = fabs(rho - ((double) duration[i] - 1 - mean[i]) / vari[i]);
146 |                if (j < 0 || temp1 > temp2) {
147 |                   j = i;
148 |                   temp1 = temp2;
149 |                }
150 |             }
151 |          }
152 |          sum--;
153 |          duration[j]--;
154 |       }
155 |    }
156 | 
157 |    return (double) target_length;
158 | }
159 | 
160 | /* HTS_SStreamSet_initialize: initialize state stream set */
161 | void HTS_SStreamSet_initialize(HTS_SStreamSet * sss)
162 | {
163 |    sss->nstream = 0;
164 |    sss->nstate = 0;
165 |    sss->sstream = NULL;
166 |    sss->duration = NULL;
167 |    sss->total_state = 0;
168 |    sss->total_frame = 0;
169 | }
170 | 
171 | /* HTS_SStreamSet_create: parse label and determine state duration */
172 | HTS_Boolean HTS_SStreamSet_create(HTS_SStreamSet * sss, HTS_ModelSet * ms, HTS_Label * label, HTS_Boolean phoneme_alignment_flag, double speed, double *duration_iw, double **parameter_iw, double **gv_iw)
173 | {
174 |    size_t i, j, k;
175 |    double temp;
176 |    int shift;
177 |    size_t state;
178 |    HTS_SStream *sst;
179 |    double *duration_mean, *duration_vari;
180 |    double frame_length;
181 |    size_t next_time;
182 |    size_t next_state;
183 | 
184 |    /* check interpolation weights */
185 |    for (i = 0, temp = 0.0; i < HTS_ModelSet_get_nvoices(ms); i++)
186 |       temp += duration_iw[i];
187 |    if (temp == 0.0) {
188 |       return FALSE;
189 |    } else if (temp != 1.0) {
190 |       for (i = 0; i < HTS_ModelSet_get_nvoices(ms); i++)
191 |          if (duration_iw[i] != 0.0)
192 |             duration_iw[i] /= temp;
193 |    }
194 | 
195 |    for (i = 0; i < HTS_ModelSet_get_nstream(ms); i++) {
196 |       for (j = 0, temp = 0.0; j < HTS_ModelSet_get_nvoices(ms); j++)
197 |          temp += parameter_iw[j][i];
198 |       if (temp == 0.0) {
199 |          return FALSE;
200 |       } else if (temp != 1.0) {
201 |          for (j = 0; j < HTS_ModelSet_get_nvoices(ms); j++)
202 |             if (parameter_iw[j][i] != 0.0)
203 |                parameter_iw[j][i] /= temp;
204 |       }
205 |       if (HTS_ModelSet_use_gv(ms, i)) {
206 |          for (j = 0, temp = 0.0; j < HTS_ModelSet_get_nvoices(ms); j++)
207 |             temp += gv_iw[j][i];
208 |          if (temp == 0.0)
209 |             return FALSE;
210 |          else if (temp != 1.0)
211 |             for (j = 0; j < HTS_ModelSet_get_nvoices(ms); j++)
212 |                if (gv_iw[j][i] != 0.0)
213 |                   gv_iw[j][i] /= temp;
214 |       }
215 |    }
216 | 
217 |    /* initialize state sequence */
218 |    sss->nstate = HTS_ModelSet_get_nstate(ms);
219 |    sss->nstream = HTS_ModelSet_get_nstream(ms);
220 |    sss->total_frame = 0;
221 |    sss->total_state = HTS_Label_get_size(label) * sss->nstate;
222 |    sss->duration = (size_t *) HTS_calloc(sss->total_state, sizeof(size_t));
223 |    sss->sstream = (HTS_SStream *) HTS_calloc(sss->nstream, sizeof(HTS_SStream));
224 |    for (i = 0; i < sss->nstream; i++) {
225 |       sst = &sss->sstream[i];
226 |       sst->vector_length = HTS_ModelSet_get_vector_length(ms, i);
227 |       sst->mean = (double **) HTS_calloc(sss->total_state, sizeof(double *));
228 |       sst->vari = (double **) HTS_calloc(sss->total_state, sizeof(double *));
229 |       if (HTS_ModelSet_is_msd(ms, i))
230 |          sst->msd = (double *) HTS_calloc(sss->total_state, sizeof(double));
231 |       else
232 |          sst->msd = NULL;
233 |       for (j = 0; j < sss->total_state; j++) {
234 |          sst->mean[j] = (double *) HTS_calloc(sst->vector_length * HTS_ModelSet_get_window_size(ms, i), sizeof(double));
235 |          sst->vari[j] = (double *) HTS_calloc(sst->vector_length * HTS_ModelSet_get_window_size(ms, i), sizeof(double));
236 |       }
237 |       if (HTS_ModelSet_use_gv(ms, i)) {
238 |          sst->gv_switch = (HTS_Boolean *) HTS_calloc(sss->total_state, sizeof(HTS_Boolean));
239 |          for (j = 0; j < sss->total_state; j++)
240 |             sst->gv_switch[j] = TRUE;
241 |       } else {
242 |          sst->gv_switch = NULL;
243 |       }
244 |    }
245 | 
246 |    /* determine state duration */
247 |    duration_mean = (double *) HTS_calloc(sss->total_state, sizeof(double));
248 |    duration_vari = (double *) HTS_calloc(sss->total_state, sizeof(double));
249 |    for (i = 0; i < HTS_Label_get_size(label); i++)
250 |       HTS_ModelSet_get_duration(ms, HTS_Label_get_string(label, i), duration_iw, &duration_mean[i * sss->nstate], &duration_vari[i * sss->nstate]);
251 |    if (phoneme_alignment_flag == TRUE) {
252 |       /* use duration set by user */
253 |       next_time = 0;
254 |       next_state = 0;
255 |       state = 0;
256 |       for (i = 0; i < HTS_Label_get_size(label); i++) {
257 |          temp = HTS_Label_get_end_frame(label, i);
258 |          if (temp >= 0) {
259 |             next_time += (size_t) HTS_set_specified_duration(&sss->duration[next_state], &duration_mean[next_state], &duration_vari[next_state], state + sss->nstate - next_state, temp - next_time);
260 |             next_state = state + sss->nstate;
261 |          } else if (i + 1 == HTS_Label_get_size(label)) {
262 |             HTS_error(-1, "HTS_SStreamSet_create: The time of final label is not specified.\n");
263 |             HTS_set_default_duration(&sss->duration[next_state], &duration_mean[next_state], &duration_vari[next_state], state + sss->nstate - next_state);
264 |          }
265 |          state += sss->nstate;
266 |       }
267 |    } else {
268 |       /* determine frame length */
269 |       if (speed != 1.0) {
270 |          temp = 0.0;
271 |          for (i = 0; i < sss->total_state; i++) {
272 |             temp += duration_mean[i];
273 |          }
274 |          frame_length = temp / speed;
275 |          HTS_set_specified_duration(sss->duration, duration_mean, duration_vari, sss->total_state, frame_length);
276 |       } else {
277 |          HTS_set_default_duration(sss->duration, duration_mean, duration_vari, sss->total_state);
278 |       }
279 |    }
280 |    HTS_free(duration_mean);
281 |    HTS_free(duration_vari);
282 | 
283 |    /* get parameter */
284 |    for (i = 0, state = 0; i < HTS_Label_get_size(label); i++) {
285 |       for (j = 2; j <= sss->nstate + 1; j++) {
286 |          sss->total_frame += sss->duration[state];
287 |          for (k = 0; k < sss->nstream; k++) {
288 |             sst = &sss->sstream[k];
289 |             if (sst->msd)
290 |                HTS_ModelSet_get_parameter(ms, k, j, HTS_Label_get_string(label, i), (const double *const *) parameter_iw, sst->mean[state], sst->vari[state], &sst->msd[state]);
291 |             else
292 |                HTS_ModelSet_get_parameter(ms, k, j, HTS_Label_get_string(label, i), (const double *const *) parameter_iw, sst->mean[state], sst->vari[state], NULL);
293 |          }
294 |          state++;
295 |       }
296 |    }
297 | 
298 |    /* copy dynamic window */
299 |    for (i = 0; i < sss->nstream; i++) {
300 |       sst = &sss->sstream[i];
301 |       sst->win_size = HTS_ModelSet_get_window_size(ms, i);
302 |       sst->win_max_width = HTS_ModelSet_get_window_max_width(ms, i);
303 |       sst->win_l_width = (int *) HTS_calloc(sst->win_size, sizeof(int));
304 |       sst->win_r_width = (int *) HTS_calloc(sst->win_size, sizeof(int));
305 |       sst->win_coefficient = (double **) HTS_calloc(sst->win_size, sizeof(double));
306 |       for (j = 0; j < sst->win_size; j++) {
307 |          sst->win_l_width[j] = HTS_ModelSet_get_window_left_width(ms, i, j);
308 |          sst->win_r_width[j] = HTS_ModelSet_get_window_right_width(ms, i, j);
309 |          if (sst->win_l_width[j] + sst->win_r_width[j] == 0)
310 |             sst->win_coefficient[j] = (double *) HTS_calloc(-2 * sst->win_l_width[j] + 1, sizeof(double));
311 |          else
312 |             sst->win_coefficient[j] = (double *) HTS_calloc(-2 * sst->win_l_width[j], sizeof(double));
313 |          sst->win_coefficient[j] -= sst->win_l_width[j];
314 |          for (shift = sst->win_l_width[j]; shift <= sst->win_r_width[j]; shift++)
315 |             sst->win_coefficient[j][shift] = HTS_ModelSet_get_window_coefficient(ms, i, j, shift);
316 |       }
317 |    }
318 | 
319 |    /* determine GV */
320 |    for (i = 0; i < sss->nstream; i++) {
321 |       sst = &sss->sstream[i];
322 |       if (HTS_ModelSet_use_gv(ms, i)) {
323 |          sst->gv_mean = (double *) HTS_calloc(sst->vector_length, sizeof(double));
324 |          sst->gv_vari = (double *) HTS_calloc(sst->vector_length, sizeof(double));
325 |          HTS_ModelSet_get_gv(ms, i, HTS_Label_get_string(label, 0), (const double *const *) gv_iw, sst->gv_mean, sst->gv_vari);
326 |       } else {
327 |          sst->gv_mean = NULL;
328 |          sst->gv_vari = NULL;
329 |       }
330 |    }
331 | 
332 |    for (i = 0; i < HTS_Label_get_size(label); i++)
333 |       if (HTS_ModelSet_get_gv_flag(ms, HTS_Label_get_string(label, i)) == FALSE)
334 |          for (j = 0; j < sss->nstream; j++)
335 |             if (HTS_ModelSet_use_gv(ms, j) == TRUE)
336 |                for (k = 0; k < sss->nstate; k++)
337 |                   sss->sstream[j].gv_switch[i * sss->nstate + k] = FALSE;
338 | 
339 |    return TRUE;
340 | }
341 | 
342 | /* HTS_SStreamSet_get_nstream: get number of stream */
343 | size_t HTS_SStreamSet_get_nstream(HTS_SStreamSet * sss)
344 | {
345 |    return sss->nstream;
346 | }
347 | 
348 | /* HTS_SStreamSet_get_vector_length: get vector length */
349 | size_t HTS_SStreamSet_get_vector_length(HTS_SStreamSet * sss, size_t stream_index)
350 | {
351 |    return sss->sstream[stream_index].vector_length;
352 | }
353 | 
354 | /* HTS_SStreamSet_is_msd: get MSD flag */
355 | HTS_Boolean HTS_SStreamSet_is_msd(HTS_SStreamSet * sss, size_t stream_index)
356 | {
357 |    return sss->sstream[stream_index].msd ? TRUE : FALSE;
358 | }
359 | 
360 | /* HTS_SStreamSet_get_total_state: get total number of state */
361 | size_t HTS_SStreamSet_get_total_state(HTS_SStreamSet * sss)
362 | {
363 |    return sss->total_state;
364 | }
365 | 
366 | /* HTS_SStreamSet_get_total_frame: get total number of frame */
367 | size_t HTS_SStreamSet_get_total_frame(HTS_SStreamSet * sss)
368 | {
369 |    return sss->total_frame;
370 | }
371 | 
372 | /* HTS_SStreamSet_get_msd: get MSD parameter */
373 | double HTS_SStreamSet_get_msd(HTS_SStreamSet * sss, size_t stream_index, size_t state_index)
374 | {
375 |    return sss->sstream[stream_index].msd[state_index];
376 | }
377 | 
378 | /* HTS_SStreamSet_window_size: get dynamic window size */
379 | size_t HTS_SStreamSet_get_window_size(HTS_SStreamSet * sss, size_t stream_index)
380 | {
381 |    return sss->sstream[stream_index].win_size;
382 | }
383 | 
384 | /* HTS_SStreamSet_get_window_left_width: get left width of dynamic window */
385 | int HTS_SStreamSet_get_window_left_width(HTS_SStreamSet * sss, size_t stream_index, size_t window_index)
386 | {
387 |    return sss->sstream[stream_index].win_l_width[window_index];
388 | }
389 | 
390 | /* HTS_SStreamSet_get_winodow_right_width: get right width of dynamic window */
391 | int HTS_SStreamSet_get_window_right_width(HTS_SStreamSet * sss, size_t stream_index, size_t window_index)
392 | {
393 |    return sss->sstream[stream_index].win_r_width[window_index];
394 | }
395 | 
396 | /* HTS_SStreamSet_get_window_coefficient: get coefficient of dynamic window */
397 | double HTS_SStreamSet_get_window_coefficient(HTS_SStreamSet * sss, size_t stream_index, size_t window_index, int coefficient_index)
398 | {
399 |    return sss->sstream[stream_index].win_coefficient[window_index][coefficient_index];
400 | }
401 | 
402 | /* HTS_SStreamSet_get_window_max_width: get max width of dynamic window */
403 | size_t HTS_SStreamSet_get_window_max_width(HTS_SStreamSet * sss, size_t stream_index)
404 | {
405 |    return sss->sstream[stream_index].win_max_width;
406 | }
407 | 
408 | /* HTS_SStreamSet_use_gv: get GV flag */
409 | HTS_Boolean HTS_SStreamSet_use_gv(HTS_SStreamSet * sss, size_t stream_index)
410 | {
411 |    return sss->sstream[stream_index].gv_mean ? TRUE : FALSE;
412 | }
413 | 
414 | /* HTS_SStreamSet_get_duration: get state duration */
415 | size_t HTS_SStreamSet_get_duration(HTS_SStreamSet * sss, size_t state_index)
416 | {
417 |    return sss->duration[state_index];
418 | }
419 | 
420 | /* HTS_SStreamSet_get_mean: get mean parameter */
421 | double HTS_SStreamSet_get_mean(HTS_SStreamSet * sss, size_t stream_index, size_t state_index, size_t vector_index)
422 | {
423 |    return sss->sstream[stream_index].mean[state_index][vector_index];
424 | }
425 | 
426 | /* HTS_SStreamSet_set_mean: set mean parameter */
427 | void HTS_SStreamSet_set_mean(HTS_SStreamSet * sss, size_t stream_index, size_t state_index, size_t vector_index, double f)
428 | {
429 |    sss->sstream[stream_index].mean[state_index][vector_index] = f;
430 | }
431 | 
432 | /* HTS_SStreamSet_get_vari: get variance parameter */
433 | double HTS_SStreamSet_get_vari(HTS_SStreamSet * sss, size_t stream_index, size_t state_index, size_t vector_index)
434 | {
435 |    return sss->sstream[stream_index].vari[state_index][vector_index];
436 | }
437 | 
438 | /* HTS_SStreamSet_set_vari: set variance parameter */
439 | void HTS_SStreamSet_set_vari(HTS_SStreamSet * sss, size_t stream_index, size_t state_index, size_t vector_index, double f)
440 | {
441 |    sss->sstream[stream_index].vari[state_index][vector_index] = f;
442 | }
443 | 
444 | /* HTS_SStreamSet_get_gv_mean: get GV mean parameter */
445 | double HTS_SStreamSet_get_gv_mean(HTS_SStreamSet * sss, size_t stream_index, size_t vector_index)
446 | {
447 |    return sss->sstream[stream_index].gv_mean[vector_index];
448 | }
449 | 
450 | /* HTS_SStreamSet_get_gv_mean: get GV variance parameter */
451 | double HTS_SStreamSet_get_gv_vari(HTS_SStreamSet * sss, size_t stream_index, size_t vector_index)
452 | {
453 |    return sss->sstream[stream_index].gv_vari[vector_index];
454 | }
455 | 
456 | /* HTS_SStreamSet_set_gv_switch: set GV switch */
457 | void HTS_SStreamSet_set_gv_switch(HTS_SStreamSet * sss, size_t stream_index, size_t state_index, HTS_Boolean i)
458 | {
459 |    sss->sstream[stream_index].gv_switch[state_index] = i;
460 | }
461 | 
462 | /* HTS_SStreamSet_get_gv_switch: get GV switch */
463 | HTS_Boolean HTS_SStreamSet_get_gv_switch(HTS_SStreamSet * sss, size_t stream_index, size_t state_index)
464 | {
465 |    return sss->sstream[stream_index].gv_switch[state_index];
466 | }
467 | 
468 | /* HTS_SStreamSet_clear: free state stream set */
469 | void HTS_SStreamSet_clear(HTS_SStreamSet * sss)
470 | {
471 |    size_t i, j;
472 |    HTS_SStream *sst;
473 | 
474 |    if (sss->sstream) {
475 |       for (i = 0; i < sss->nstream; i++) {
476 |          sst = &sss->sstream[i];
477 |          for (j = 0; j < sss->total_state; j++) {
478 |             HTS_free(sst->mean[j]);
479 |             HTS_free(sst->vari[j]);
480 |          }
481 |          if (sst->msd)
482 |             HTS_free(sst->msd);
483 |          HTS_free(sst->mean);
484 |          HTS_free(sst->vari);
485 |          for (j = 0; j < sst->win_size; j++) {
486 |             sst->win_coefficient[j] += sst->win_l_width[j];
487 |             HTS_free(sst->win_coefficient[j]);
488 |          }
489 |          HTS_free(sst->win_coefficient);
490 |          HTS_free(sst->win_l_width);
491 |          HTS_free(sst->win_r_width);
492 |          if (sst->gv_mean)
493 |             HTS_free(sst->gv_mean);
494 |          if (sst->gv_vari)
495 |             HTS_free(sst->gv_vari);
496 |          if (sst->gv_switch)
497 |             HTS_free(sst->gv_switch);
498 |       }
499 |       HTS_free(sss->sstream);
500 |    }
501 |    if (sss->duration)
502 |       HTS_free(sss->duration);
503 | 
504 |    HTS_SStreamSet_initialize(sss);
505 | }
506 | 
507 | HTS_SSTREAM_C_END;
508 | 
509 | #endif                          /* !HTS_SSTREAM_C */
510 | 


--------------------------------------------------------------------------------
/src/lib/HTS_vocoder.c:
--------------------------------------------------------------------------------
  1 | /* ----------------------------------------------------------------- */
  2 | /*           The HMM-Based Speech Synthesis Engine "hts_engine API"  */
  3 | /*           developed by HTS Working Group                          */
  4 | /*           http://hts-engine.sourceforge.net/                      */
  5 | /* ----------------------------------------------------------------- */
  6 | /*                                                                   */
  7 | /*  Copyright (c) 2001-2014  Nagoya Institute of Technology          */
  8 | /*                           Department of Computer Science          */
  9 | /*                                                                   */
 10 | /*                2001-2008  Tokyo Institute of Technology           */
 11 | /*                           Interdisciplinary Graduate School of    */
 12 | /*                           Science and Engineering                 */
 13 | /*                                                                   */
 14 | /* All rights reserved.                                              */
 15 | /*                                                                   */
 16 | /* Redistribution and use in source and binary forms, with or        */
 17 | /* without modification, are permitted provided that the following   */
 18 | /* conditions are met:                                               */
 19 | /*                                                                   */
 20 | /* - Redistributions of source code must retain the above copyright  */
 21 | /*   notice, this list of conditions and the following disclaimer.   */
 22 | /* - Redistributions in binary form must reproduce the above         */
 23 | /*   copyright notice, this list of conditions and the following     */
 24 | /*   disclaimer in the documentation and/or other materials provided */
 25 | /*   with the distribution.                                          */
 26 | /* - Neither the name of the HTS working group nor the names of its  */
 27 | /*   contributors may be used to endorse or promote products derived */
 28 | /*   from this software without specific prior written permission.   */
 29 | /*                                                                   */
 30 | /* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND            */
 31 | /* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,       */
 32 | /* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF          */
 33 | /* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE          */
 34 | /* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS */
 35 | /* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,          */
 36 | /* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED   */
 37 | /* TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,     */
 38 | /* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON */
 39 | /* ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,   */
 40 | /* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY    */
 41 | /* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE           */
 42 | /* POSSIBILITY OF SUCH DAMAGE.                                       */
 43 | /* ----------------------------------------------------------------- */
 44 | 
 45 | #ifndef HTS_VOCODER_C
 46 | #define HTS_VOCODER_C
 47 | 
 48 | #ifdef __cplusplus
 49 | #define HTS_VOCODER_C_START extern "C" {
 50 | #define HTS_VOCODER_C_END   }
 51 | #else
 52 | #define HTS_VOCODER_C_START
 53 | #define HTS_VOCODER_C_END
 54 | #endif                          /* __CPLUSPLUS */
 55 | 
 56 | HTS_VOCODER_C_START;
 57 | 
 58 | #include <math.h>               /* for sqrt(),log(),exp(),pow(),cos() */
 59 | 
 60 | /* hts_engine libraries */
 61 | #include "HTS_hidden.h"
 62 | 
 63 | static const double HTS_pade[21] = {
 64 |    1.00000000000,
 65 |    1.00000000000,
 66 |    0.00000000000,
 67 |    1.00000000000,
 68 |    0.00000000000,
 69 |    0.00000000000,
 70 |    1.00000000000,
 71 |    0.00000000000,
 72 |    0.00000000000,
 73 |    0.00000000000,
 74 |    1.00000000000,
 75 |    0.49992730000,
 76 |    0.10670050000,
 77 |    0.01170221000,
 78 |    0.00056562790,
 79 |    1.00000000000,
 80 |    0.49993910000,
 81 |    0.11070980000,
 82 |    0.01369984000,
 83 |    0.00095648530,
 84 |    0.00003041721
 85 | };
 86 | 
 87 | /* HTS_movem: move memory */
 88 | static void HTS_movem(double *a, double *b, const int nitem)
 89 | {
 90 |    long i = (long) nitem;
 91 | 
 92 |    if (a > b)
 93 |       while (i--)
 94 |          *b++ = *a++;
 95 |    else {
 96 |       a += i;
 97 |       b += i;
 98 |       while (i--)
 99 |          *--b = *--a;
100 |    }
101 | }
102 | 
103 | /* HTS_mlsafir: sub functions for MLSA filter */
104 | static double HTS_mlsafir(const double x, const double *b, const int m, const double a, const double aa, double *d)
105 | {
106 |    double y = 0.0;
107 |    int i;
108 | 
109 |    d[0] = x;
110 |    d[1] = aa * d[0] + a * d[1];
111 | 
112 |    for (i = 2; i <= m; i++)
113 |       d[i] += a * (d[i + 1] - d[i - 1]);
114 | 
115 |    for (i = 2; i <= m; i++)
116 |       y += d[i] * b[i];
117 | 
118 |    for (i = m + 1; i > 1; i--)
119 |       d[i] = d[i - 1];
120 | 
121 |    return (y);
122 | }
123 | 
124 | /* HTS_mlsadf1: sub functions for MLSA filter */
125 | static double HTS_mlsadf1(double x, const double *b, const int m, const double a, const double aa, const int pd, double *d, const double *ppade)
126 | {
127 |    double v, out = 0.0, *pt;
128 |    int i;
129 | 
130 |    pt = &d[pd + 1];
131 | 
132 |    for (i = pd; i >= 1; i--) {
133 |       d[i] = aa * pt[i - 1] + a * d[i];
134 |       pt[i] = d[i] * b[1];
135 |       v = pt[i] * ppade[i];
136 |       x += (1 & i) ? v : -v;
137 |       out += v;
138 |    }
139 | 
140 |    pt[0] = x;
141 |    out += x;
142 | 
143 |    return (out);
144 | }
145 | 
146 | /* HTS_mlsadf2: sub functions for MLSA filter */
147 | static double HTS_mlsadf2(double x, const double *b, const int m, const double a, const double aa, const int pd, double *d, const double *ppade)
148 | {
149 |    double v, out = 0.0, *pt;
150 |    int i;
151 | 
152 |    pt = &d[pd * (m + 2)];
153 | 
154 |    for (i = pd; i >= 1; i--) {
155 |       pt[i] = HTS_mlsafir(pt[i - 1], b, m, a, aa, &d[(i - 1) * (m + 2)]);
156 |       v = pt[i] * ppade[i];
157 | 
158 |       x += (1 & i) ? v : -v;
159 |       out += v;
160 |    }
161 | 
162 |    pt[0] = x;
163 |    out += x;
164 | 
165 |    return (out);
166 | }
167 | 
168 | /* HTS_mlsadf: functions for MLSA filter */
169 | static double HTS_mlsadf(double x, const double *b, const int m, const double a, const int pd, double *d)
170 | {
171 |    const double aa = 1 - a * a;
172 |    const double *ppade = &(HTS_pade[pd * (pd + 1) / 2]);
173 | 
174 |    x = HTS_mlsadf1(x, b, m, a, aa, pd, d, ppade);
175 |    x = HTS_mlsadf2(x, b, m, a, aa, pd, &d[2 * (pd + 1)], ppade);
176 | 
177 |    return (x);
178 | }
179 | 
180 | /* HTS_rnd: functions for random noise generation */
181 | static double HTS_rnd(unsigned long *next)
182 | {
183 |    double r;
184 | 
185 |    *next = *next * 1103515245L + 12345;
186 |    r = (*next / 65536L) % 32768L;
187 | 
188 |    return (r / RANDMAX);
189 | }
190 | 
191 | /* HTS_nrandom: functions for gaussian random noise generation */
192 | static double HTS_nrandom(HTS_Vocoder * v)
193 | {
194 |    if (v->sw == 0) {
195 |       v->sw = 1;
196 |       do {
197 |          v->r1 = 2 * HTS_rnd(&v->next) - 1;
198 |          v->r2 = 2 * HTS_rnd(&v->next) - 1;
199 |          v->s = v->r1 * v->r1 + v->r2 * v->r2;
200 |       } while (v->s > 1 || v->s == 0);
201 |       v->s = sqrt(-2 * log(v->s) / v->s);
202 |       return (v->r1 * v->s);
203 |    } else {
204 |       v->sw = 0;
205 |       return (v->r2 * v->s);
206 |    }
207 | }
208 | 
209 | /* HTS_mceq: function for M-sequence random noise generation */
210 | static int HTS_mseq(HTS_Vocoder * v)
211 | {
212 |    int x0, x28;
213 | 
214 |    v->x >>= 1;
215 |    if (v->x & B0)
216 |       x0 = 1;
217 |    else
218 |       x0 = -1;
219 |    if (v->x & B28)
220 |       x28 = 1;
221 |    else
222 |       x28 = -1;
223 |    if (x0 + x28)
224 |       v->x &= B31_;
225 |    else
226 |       v->x |= B31;
227 | 
228 |    return (x0);
229 | }
230 | 
231 | /* HTS_mc2b: transform mel-cepstrum to MLSA digital fillter coefficients */
232 | static void HTS_mc2b(double *mc, double *b, int m, const double a)
233 | {
234 |    if (mc != b) {
235 |       if (a != 0.0) {
236 |          b[m] = mc[m];
237 |          for (m--; m >= 0; m--)
238 |             b[m] = mc[m] - a * b[m + 1];
239 |       } else
240 |          HTS_movem(mc, b, m + 1);
241 |    } else if (a != 0.0)
242 |       for (m--; m >= 0; m--)
243 |          b[m] -= a * b[m + 1];
244 | }
245 | 
246 | /* HTS_b2bc: transform MLSA digital filter coefficients to mel-cepstrum */
247 | static void HTS_b2mc(const double *b, double *mc, int m, const double a)
248 | {
249 |    double d, o;
250 | 
251 |    d = mc[m] = b[m];
252 |    for (m--; m >= 0; m--) {
253 |       o = b[m] + a * d;
254 |       d = b[m];
255 |       mc[m] = o;
256 |    }
257 | }
258 | 
259 | /* HTS_freqt: frequency transformation */
260 | static void HTS_freqt(HTS_Vocoder * v, const double *c1, const int m1, double *c2, const int m2, const double a)
261 | {
262 |    int i, j;
263 |    const double b = 1 - a * a;
264 |    double *g;
265 | 
266 |    if (m2 > v->freqt_size) {
267 |       if (v->freqt_buff != NULL)
268 |          HTS_free(v->freqt_buff);
269 |       v->freqt_buff = (double *) HTS_calloc(m2 + m2 + 2, sizeof(double));
270 |       v->freqt_size = m2;
271 |    }
272 |    g = v->freqt_buff + v->freqt_size + 1;
273 | 
274 |    for (i = 0; i < m2 + 1; i++)
275 |       g[i] = 0.0;
276 | 
277 |    for (i = -m1; i <= 0; i++) {
278 |       if (0 <= m2)
279 |          g[0] = c1[-i] + a * (v->freqt_buff[0] = g[0]);
280 |       if (1 <= m2)
281 |          g[1] = b * v->freqt_buff[0] + a * (v->freqt_buff[1] = g[1]);
282 |       for (j = 2; j <= m2; j++)
283 |          g[j] = v->freqt_buff[j - 1] + a * ((v->freqt_buff[j] = g[j]) - g[j - 1]);
284 |    }
285 | 
286 |    HTS_movem(g, c2, m2 + 1);
287 | }
288 | 
289 | /* HTS_c2ir: The minimum phase impulse response is evaluated from the minimum phase cepstrum */
290 | static void HTS_c2ir(const double *c, const int nc, double *h, const int leng)
291 | {
292 |    int n, k, upl;
293 |    double d;
294 | 
295 |    h[0] = exp(c[0]);
296 |    for (n = 1; n < leng; n++) {
297 |       d = 0;
298 |       upl = (n >= nc) ? nc - 1 : n;
299 |       for (k = 1; k <= upl; k++)
300 |          d += k * c[k] * h[n - k];
301 |       h[n] = d / n;
302 |    }
303 | }
304 | 
305 | /* HTS_b2en: calculate frame energy */
306 | static double HTS_b2en(HTS_Vocoder * v, const double *b, const int m, const double a)
307 | {
308 |    int i;
309 |    double en = 0.0;
310 |    double *cep;
311 |    double *ir;
312 | 
313 |    if (v->spectrum2en_size < m) {
314 |       if (v->spectrum2en_buff != NULL)
315 |          HTS_free(v->spectrum2en_buff);
316 |       v->spectrum2en_buff = (double *) HTS_calloc((m + 1) + 2 * IRLENG, sizeof(double));
317 |       v->spectrum2en_size = m;
318 |    }
319 |    cep = v->spectrum2en_buff + m + 1;
320 |    ir = cep + IRLENG;
321 | 
322 |    HTS_b2mc(b, v->spectrum2en_buff, m, a);
323 |    HTS_freqt(v, v->spectrum2en_buff, m, cep, IRLENG - 1, -a);
324 |    HTS_c2ir(cep, IRLENG, ir, IRLENG);
325 | 
326 |    for (i = 0; i < IRLENG; i++)
327 |       en += ir[i] * ir[i];
328 | 
329 |    return (en);
330 | }
331 | 
332 | /* HTS_ignorm: inverse gain normalization */
333 | static void HTS_ignorm(double *c1, double *c2, int m, const double g)
334 | {
335 |    double k;
336 |    if (g != 0.0) {
337 |       k = pow(c1[0], g);
338 |       for (; m >= 1; m--)
339 |          c2[m] = k * c1[m];
340 |       c2[0] = (k - 1.0) / g;
341 |    } else {
342 |       HTS_movem(&c1[1], &c2[1], m);
343 |       c2[0] = log(c1[0]);
344 |    }
345 | }
346 | 
347 | /* HTS_gnorm: gain normalization */
348 | static void HTS_gnorm(double *c1, double *c2, int m, const double g)
349 | {
350 |    double k;
351 |    if (g != 0.0) {
352 |       k = 1.0 + g * c1[0];
353 |       for (; m >= 1; m--)
354 |          c2[m] = c1[m] / k;
355 |       c2[0] = pow(k, 1.0 / g);
356 |    } else {
357 |       HTS_movem(&c1[1], &c2[1], m);
358 |       c2[0] = exp(c1[0]);
359 |    }
360 | }
361 | 
362 | /* HTS_lsp2lpc: transform LSP to LPC */
363 | static void HTS_lsp2lpc(HTS_Vocoder * v, double *lsp, double *a, const int m)
364 | {
365 |    int i, k, mh1, mh2, flag_odd;
366 |    double xx, xf, xff;
367 |    double *p, *q;
368 |    double *a0, *a1, *a2, *b0, *b1, *b2;
369 | 
370 |    flag_odd = 0;
371 |    if (m % 2 == 0)
372 |       mh1 = mh2 = m / 2;
373 |    else {
374 |       mh1 = (m + 1) / 2;
375 |       mh2 = (m - 1) / 2;
376 |       flag_odd = 1;
377 |    }
378 | 
379 |    if (m > v->lsp2lpc_size) {
380 |       if (v->lsp2lpc_buff != NULL)
381 |          HTS_free(v->lsp2lpc_buff);
382 |       v->lsp2lpc_buff = (double *) HTS_calloc(5 * m + 6, sizeof(double));
383 |       v->lsp2lpc_size = m;
384 |    }
385 |    p = v->lsp2lpc_buff + m;
386 |    q = p + mh1;
387 |    a0 = q + mh2;
388 |    a1 = a0 + (mh1 + 1);
389 |    a2 = a1 + (mh1 + 1);
390 |    b0 = a2 + (mh1 + 1);
391 |    b1 = b0 + (mh2 + 1);
392 |    b2 = b1 + (mh2 + 1);
393 | 
394 |    HTS_movem(lsp, v->lsp2lpc_buff, m);
395 | 
396 |    for (i = 0; i < mh1 + 1; i++)
397 |       a0[i] = 0.0;
398 |    for (i = 0; i < mh1 + 1; i++)
399 |       a1[i] = 0.0;
400 |    for (i = 0; i < mh1 + 1; i++)
401 |       a2[i] = 0.0;
402 |    for (i = 0; i < mh2 + 1; i++)
403 |       b0[i] = 0.0;
404 |    for (i = 0; i < mh2 + 1; i++)
405 |       b1[i] = 0.0;
406 |    for (i = 0; i < mh2 + 1; i++)
407 |       b2[i] = 0.0;
408 | 
409 |    /* lsp filter parameters */
410 |    for (i = k = 0; i < mh1; i++, k += 2)
411 |       p[i] = -2.0 * cos(v->lsp2lpc_buff[k]);
412 |    for (i = k = 0; i < mh2; i++, k += 2)
413 |       q[i] = -2.0 * cos(v->lsp2lpc_buff[k + 1]);
414 | 
415 |    /* impulse response of analysis filter */
416 |    xx = 1.0;
417 |    xf = xff = 0.0;
418 | 
419 |    for (k = 0; k <= m; k++) {
420 |       if (flag_odd) {
421 |          a0[0] = xx;
422 |          b0[0] = xx - xff;
423 |          xff = xf;
424 |          xf = xx;
425 |       } else {
426 |          a0[0] = xx + xf;
427 |          b0[0] = xx - xf;
428 |          xf = xx;
429 |       }
430 | 
431 |       for (i = 0; i < mh1; i++) {
432 |          a0[i + 1] = a0[i] + p[i] * a1[i] + a2[i];
433 |          a2[i] = a1[i];
434 |          a1[i] = a0[i];
435 |       }
436 | 
437 |       for (i = 0; i < mh2; i++) {
438 |          b0[i + 1] = b0[i] + q[i] * b1[i] + b2[i];
439 |          b2[i] = b1[i];
440 |          b1[i] = b0[i];
441 |       }
442 | 
443 |       if (k != 0)
444 |          a[k - 1] = -0.5 * (a0[mh1] + b0[mh2]);
445 |       xx = 0.0;
446 |    }
447 | 
448 |    for (i = m - 1; i >= 0; i--)
449 |       a[i + 1] = -a[i];
450 |    a[0] = 1.0;
451 | }
452 | 
453 | /* HTS_gc2gc: generalized cepstral transformation */
454 | static void HTS_gc2gc(HTS_Vocoder * v, double *c1, const int m1, const double g1, double *c2, const int m2, const double g2)
455 | {
456 |    int i, min, k, mk;
457 |    double ss1, ss2, cc;
458 | 
459 |    if (m1 > v->gc2gc_size) {
460 |       if (v->gc2gc_buff != NULL)
461 |          HTS_free(v->gc2gc_buff);
462 |       v->gc2gc_buff = (double *) HTS_calloc(m1 + 1, sizeof(double));
463 |       v->gc2gc_size = m1;
464 |    }
465 | 
466 |    HTS_movem(c1, v->gc2gc_buff, m1 + 1);
467 | 
468 |    c2[0] = v->gc2gc_buff[0];
469 |    for (i = 1; i <= m2; i++) {
470 |       ss1 = ss2 = 0.0;
471 |       min = m1 < i ? m1 : i - 1;
472 |       for (k = 1; k <= min; k++) {
473 |          mk = i - k;
474 |          cc = v->gc2gc_buff[k] * c2[mk];
475 |          ss2 += k * cc;
476 |          ss1 += mk * cc;
477 |       }
478 | 
479 |       if (i <= m1)
480 |          c2[i] = v->gc2gc_buff[i] + (g2 * ss2 - g1 * ss1) / i;
481 |       else
482 |          c2[i] = (g2 * ss2 - g1 * ss1) / i;
483 |    }
484 | }
485 | 
486 | /* HTS_mgc2mgc: frequency and generalized cepstral transformation */
487 | static void HTS_mgc2mgc(HTS_Vocoder * v, double *c1, const int m1, const double a1, const double g1, double *c2, const int m2, const double a2, const double g2)
488 | {
489 |    double a;
490 | 
491 |    if (a1 == a2) {
492 |       HTS_gnorm(c1, c1, m1, g1);
493 |       HTS_gc2gc(v, c1, m1, g1, c2, m2, g2);
494 |       HTS_ignorm(c2, c2, m2, g2);
495 |    } else {
496 |       a = (a2 - a1) / (1 - a1 * a2);
497 |       HTS_freqt(v, c1, m1, c2, m2, a);
498 |       HTS_gnorm(c2, c2, m2, g1);
499 |       HTS_gc2gc(v, c2, m2, g1, c2, m2, g2);
500 |       HTS_ignorm(c2, c2, m2, g2);
501 |    }
502 | }
503 | 
504 | /* HTS_lsp2mgc: transform LSP to MGC */
505 | static void HTS_lsp2mgc(HTS_Vocoder * v, double *lsp, double *mgc, const int m, const double alpha)
506 | {
507 |    int i;
508 |    /* lsp2lpc */
509 |    HTS_lsp2lpc(v, lsp + 1, mgc, m);
510 |    if (v->use_log_gain)
511 |       mgc[0] = exp(lsp[0]);
512 |    else
513 |       mgc[0] = lsp[0];
514 | 
515 |    /* mgc2mgc */
516 |    if (NORMFLG1)
517 |       HTS_ignorm(mgc, mgc, m, v->gamma);
518 |    else if (MULGFLG1)
519 |       mgc[0] = (1.0 - mgc[0]) * ((double) v->stage);
520 |    if (MULGFLG1)
521 |       for (i = m; i >= 1; i--)
522 |          mgc[i] *= -((double) v->stage);
523 |    HTS_mgc2mgc(v, mgc, m, alpha, v->gamma, mgc, m, alpha, v->gamma);
524 |    if (NORMFLG2)
525 |       HTS_gnorm(mgc, mgc, m, v->gamma);
526 |    else if (MULGFLG2)
527 |       mgc[0] = mgc[0] * v->gamma + 1.0;
528 |    if (MULGFLG2)
529 |       for (i = m; i >= 1; i--)
530 |          mgc[i] *= v->gamma;
531 | }
532 | 
533 | /* HTS_mglsadff: sub functions for MGLSA filter */
534 | static double HTS_mglsadff(double x, const double *b, const int m, const double a, double *d)
535 | {
536 |    int i;
537 | 
538 |    double y;
539 |    y = d[0] * b[1];
540 |    for (i = 1; i < m; i++) {
541 |       d[i] += a * (d[i + 1] - d[i - 1]);
542 |       y += d[i] * b[i + 1];
543 |    }
544 |    x -= y;
545 | 
546 |    for (i = m; i > 0; i--)
547 |       d[i] = d[i - 1];
548 |    d[0] = a * d[0] + (1 - a * a) * x;
549 |    return x;
550 | }
551 | 
552 | /* HTS_mglsadf: sub functions for MGLSA filter */
553 | static double HTS_mglsadf(double x, const double *b, const int m, const double a, const int n, double *d)
554 | {
555 |    int i;
556 | 
557 |    for (i = 0; i < n; i++)
558 |       x = HTS_mglsadff(x, b, m, a, &d[i * (m + 1)]);
559 | 
560 |    return x;
561 | }
562 | 
563 | /* THS_check_lsp_stability: check LSP stability */
564 | static void HTS_check_lsp_stability(double *lsp, size_t m)
565 | {
566 |    size_t i, j;
567 |    double tmp;
568 |    double min = (CHECK_LSP_STABILITY_MIN * PI) / (m + 1);
569 |    HTS_Boolean find;
570 | 
571 |    for (i = 0; i < CHECK_LSP_STABILITY_NUM; i++) {
572 |       find = FALSE;
573 | 
574 |       for (j = 1; j < m; j++) {
575 |          tmp = lsp[j + 1] - lsp[j];
576 |          if (tmp < min) {
577 |             lsp[j] -= 0.5 * (min - tmp);
578 |             lsp[j + 1] += 0.5 * (min - tmp);
579 |             find = TRUE;
580 |          }
581 |       }
582 | 
583 |       if (lsp[1] < min) {
584 |          lsp[1] = min;
585 |          find = TRUE;
586 |       }
587 |       if (lsp[m] > PI - min) {
588 |          lsp[m] = PI - min;
589 |          find = TRUE;
590 |       }
591 | 
592 |       if (find == FALSE)
593 |          break;
594 |    }
595 | }
596 | 
597 | /* HTS_lsp2en: calculate frame energy */
598 | static double HTS_lsp2en(HTS_Vocoder * v, double *lsp, size_t m, double alpha)
599 | {
600 |    size_t i;
601 |    double en = 0.0;
602 |    double *buff;
603 | 
604 |    if (v->spectrum2en_size < m) {
605 |       if (v->spectrum2en_buff != NULL)
606 |          HTS_free(v->spectrum2en_buff);
607 |       v->spectrum2en_buff = (double *) HTS_calloc(m + 1 + IRLENG, sizeof(double));
608 |       v->spectrum2en_size = m;
609 |    }
610 |    buff = v->spectrum2en_buff + m + 1;
611 | 
612 |    /* lsp2lpc */
613 |    HTS_lsp2lpc(v, lsp + 1, v->spectrum2en_buff, m);
614 |    if (v->use_log_gain)
615 |       v->spectrum2en_buff[0] = exp(lsp[0]);
616 |    else
617 |       v->spectrum2en_buff[0] = lsp[0];
618 | 
619 |    /* mgc2mgc */
620 |    if (NORMFLG1)
621 |       HTS_ignorm(v->spectrum2en_buff, v->spectrum2en_buff, m, v->gamma);
622 |    else if (MULGFLG1)
623 |       v->spectrum2en_buff[0] = (1.0 - v->spectrum2en_buff[0]) * ((double) v->stage);
624 |    if (MULGFLG1)
625 |       for (i = 1; i <= m; i++)
626 |          v->spectrum2en_buff[i] *= -((double) v->stage);
627 |    HTS_mgc2mgc(v, v->spectrum2en_buff, m, alpha, v->gamma, buff, IRLENG - 1, 0.0, 1);
628 | 
629 |    for (i = 0; i < IRLENG; i++)
630 |       en += buff[i] * buff[i];
631 |    return en;
632 | }
633 | 
634 | /* HTS_white_noise: return white noise */
635 | static double HTS_white_noise(HTS_Vocoder * v)
636 | {
637 |    if (v->gauss)
638 |       return (double) HTS_nrandom(v);
639 |    else
640 |       return (double) HTS_mseq(v);
641 | }
642 | 
643 | /* HTS_Vocoder_initialize_excitation: initialize excitation */
644 | static void HTS_Vocoder_initialize_excitation(HTS_Vocoder * v, double pitch, size_t nlpf)
645 | {
646 |    size_t i;
647 | 
648 |    v->pitch_of_curr_point = pitch;
649 |    v->pitch_counter = pitch;
650 |    v->pitch_inc_per_point = 0.0;
651 |    if (nlpf > 0) {
652 |       v->excite_buff_size = nlpf;
653 |       v->excite_ring_buff = (double *) HTS_calloc(v->excite_buff_size, sizeof(double));
654 |       for (i = 0; i < v->excite_buff_size; i++)
655 |          v->excite_ring_buff[i] = 0.0;
656 |       v->excite_buff_index = 0;
657 |    } else {
658 |       v->excite_buff_size = 0;
659 |       v->excite_ring_buff = NULL;
660 |       v->excite_buff_index = 0;
661 |    }
662 | }
663 | 
664 | /* HTS_Vocoder_start_excitation: start excitation of each frame */
665 | static void HTS_Vocoder_start_excitation(HTS_Vocoder * v, double pitch)
666 | {
667 |    if (v->pitch_of_curr_point != 0.0 && pitch != 0.0) {
668 |       v->pitch_inc_per_point = (pitch - v->pitch_of_curr_point) / v->fprd;
669 |    } else {
670 |       v->pitch_inc_per_point = 0.0;
671 |       v->pitch_of_curr_point = pitch;
672 |       v->pitch_counter = pitch;
673 |    }
674 | }
675 | 
676 | /* HTS_Vocoder_excite_unvoiced_frame: ping noise to ring buffer */
677 | static void HTS_Vocoder_excite_unvoiced_frame(HTS_Vocoder * v, double noise)
678 | {
679 |    size_t center = (v->excite_buff_size - 1) / 2;
680 |    v->excite_ring_buff[(v->excite_buff_index + center) % v->excite_buff_size] += noise;
681 | }
682 | 
683 | /* HTS_Vocoder_excite_vooiced_frame: ping noise and pulse to ring buffer */
684 | static void HTS_Vocoder_excite_voiced_frame(HTS_Vocoder * v, double noise, double pulse, const double *lpf)
685 | {
686 |    size_t i;
687 |    size_t center = (v->excite_buff_size - 1) / 2;
688 | 
689 |    if (noise != 0.0) {
690 |       for (i = 0; i < v->excite_buff_size; i++) {
691 |          if (i == center)
692 |             v->excite_ring_buff[(v->excite_buff_index + i) % v->excite_buff_size] += noise * (1.0 - lpf[i]);
693 |          else
694 |             v->excite_ring_buff[(v->excite_buff_index + i) % v->excite_buff_size] += noise * (0.0 - lpf[i]);
695 |       }
696 |    }
697 |    if (pulse != 0.0) {
698 |       for (i = 0; i < v->excite_buff_size; i++)
699 |          v->excite_ring_buff[(v->excite_buff_index + i) % v->excite_buff_size] += pulse * lpf[i];
700 |    }
701 | }
702 | 
703 | /* HTS_Vocoder_get_excitation: get excitation of each sample */
704 | static double HTS_Vocoder_get_excitation(HTS_Vocoder * v, const double *lpf)
705 | {
706 |    double x;
707 |    double noise, pulse = 0.0;
708 | 
709 |    if (v->excite_buff_size > 0) {
710 |       noise = HTS_white_noise(v);
711 |       pulse = 0.0;
712 |       if (v->pitch_of_curr_point == 0.0) {
713 |          HTS_Vocoder_excite_unvoiced_frame(v, noise);
714 |       } else {
715 |          v->pitch_counter += 1.0;
716 |          if (v->pitch_counter >= v->pitch_of_curr_point) {
717 |             pulse = sqrt(v->pitch_of_curr_point);
718 |             v->pitch_counter -= v->pitch_of_curr_point;
719 |          }
720 |          HTS_Vocoder_excite_voiced_frame(v, noise, pulse, lpf);
721 |          v->pitch_of_curr_point += v->pitch_inc_per_point;
722 |       }
723 |       x = v->excite_ring_buff[v->excite_buff_index];
724 |       v->excite_ring_buff[v->excite_buff_index] = 0.0;
725 |       v->excite_buff_index++;
726 |       if (v->excite_buff_index >= v->excite_buff_size)
727 |          v->excite_buff_index = 0;
728 |    } else {
729 |       if (v->pitch_of_curr_point == 0.0) {
730 |          x = HTS_white_noise(v);
731 |       } else {
732 |          v->pitch_counter += 1.0;
733 |          if (v->pitch_counter >= v->pitch_of_curr_point) {
734 |             x = sqrt(v->pitch_of_curr_point);
735 |             v->pitch_counter -= v->pitch_of_curr_point;
736 |          } else {
737 |             x = 0.0;
738 |          }
739 |          v->pitch_of_curr_point += v->pitch_inc_per_point;
740 |       }
741 |    }
742 | 
743 |    return x;
744 | }
745 | 
746 | /* HTS_Vocoder_end_excitation: end excitation of each frame */
747 | static void HTS_Vocoder_end_excitation(HTS_Vocoder * v, double pitch)
748 | {
749 |    v->pitch_of_curr_point = pitch;
750 | }
751 | 
752 | /* HTS_Vocoder_postfilter_mcp: postfilter for MCP */
753 | static void HTS_Vocoder_postfilter_mcp(HTS_Vocoder * v, double *mcp, const int m, double alpha, double beta)
754 | {
755 |    double e1, e2;
756 |    int k;
757 | 
758 |    if (beta > 0.0 && m > 1) {
759 |       if (v->postfilter_size < m) {
760 |          if (v->postfilter_buff != NULL)
761 |             HTS_free(v->postfilter_buff);
762 |          v->postfilter_buff = (double *) HTS_calloc(m + 1, sizeof(double));
763 |          v->postfilter_size = m;
764 |       }
765 |       HTS_mc2b(mcp, v->postfilter_buff, m, alpha);
766 |       e1 = HTS_b2en(v, v->postfilter_buff, m, alpha);
767 | 
768 |       v->postfilter_buff[1] -= beta * alpha * v->postfilter_buff[2];
769 |       for (k = 2; k <= m; k++)
770 |          v->postfilter_buff[k] *= (1.0 + beta);
771 | 
772 |       e2 = HTS_b2en(v, v->postfilter_buff, m, alpha);
773 |       v->postfilter_buff[0] += log(e1 / e2) / 2;
774 |       HTS_b2mc(v->postfilter_buff, mcp, m, alpha);
775 |    }
776 | }
777 | 
778 | /* HTS_Vocoder_postfilter_lsp: postfilter for LSP */
779 | static void HTS_Vocoder_postfilter_lsp(HTS_Vocoder * v, double *lsp, size_t m, double alpha, double beta)
780 | {
781 |    double e1, e2;
782 |    size_t i;
783 |    double d1, d2;
784 | 
785 |    if (beta > 0.0 && m > 1) {
786 |       if (v->postfilter_size < m) {
787 |          if (v->postfilter_buff != NULL)
788 |             HTS_free(v->postfilter_buff);
789 |          v->postfilter_buff = (double *) HTS_calloc(m + 1, sizeof(double));
790 |          v->postfilter_size = m;
791 |       }
792 | 
793 |       e1 = HTS_lsp2en(v, lsp, m, alpha);
794 | 
795 |       /* postfiltering */
796 |       for (i = 0; i <= m; i++) {
797 |          if (i > 1 && i < m) {
798 |             d1 = beta * (lsp[i + 1] - lsp[i]);
799 |             d2 = beta * (lsp[i] - lsp[i - 1]);
800 |             v->postfilter_buff[i] = lsp[i - 1] + d2 + (d2 * d2 * ((lsp[i + 1] - lsp[i - 1]) - (d1 + d2))) / ((d2 * d2) + (d1 * d1));
801 |          } else {
802 |             v->postfilter_buff[i] = lsp[i];
803 |          }
804 |       }
805 |       HTS_movem(v->postfilter_buff, lsp, m + 1);
806 | 
807 |       e2 = HTS_lsp2en(v, lsp, m, alpha);
808 | 
809 |       if (e1 != e2) {
810 |          if (v->use_log_gain)
811 |             lsp[0] += 0.5 * log(e1 / e2);
812 |          else
813 |             lsp[0] *= sqrt(e1 / e2);
814 |       }
815 |    }
816 | }
817 | 
818 | /* HTS_Vocoder_initialize: initialize vocoder */
819 | void HTS_Vocoder_initialize(HTS_Vocoder * v, size_t m, size_t stage, HTS_Boolean use_log_gain, size_t rate, size_t fperiod)
820 | {
821 |    /* set parameter */
822 |    v->is_first = TRUE;
823 |    v->stage = stage;
824 |    if (stage != 0)
825 |       v->gamma = -1.0 / v->stage;
826 |    else
827 |       v->gamma = 0.0;
828 |    v->use_log_gain = use_log_gain;
829 |    v->fprd = fperiod;
830 |    v->next = SEED;
831 |    v->gauss = GAUSS;
832 |    v->rate = rate;
833 |    v->pitch_of_curr_point = 0.0;
834 |    v->pitch_counter = 0.0;
835 |    v->pitch_inc_per_point = 0.0;
836 |    v->excite_ring_buff = NULL;
837 |    v->excite_buff_size = 0;
838 |    v->excite_buff_index = 0;
839 |    v->sw = 0;
840 |    v->x = 0x55555555;
841 |    /* init buffer */
842 |    v->freqt_buff = NULL;
843 |    v->freqt_size = 0;
844 |    v->gc2gc_buff = NULL;
845 |    v->gc2gc_size = 0;
846 |    v->lsp2lpc_buff = NULL;
847 |    v->lsp2lpc_size = 0;
848 |    v->postfilter_buff = NULL;
849 |    v->postfilter_size = 0;
850 |    v->spectrum2en_buff = NULL;
851 |    v->spectrum2en_size = 0;
852 |    if (v->stage == 0) {         /* for MCP */
853 |       v->c = (double *) HTS_calloc(m * (3 + PADEORDER) + 5 * PADEORDER + 6, sizeof(double));
854 |       v->cc = v->c + m + 1;
855 |       v->cinc = v->cc + m + 1;
856 |       v->d1 = v->cinc + m + 1;
857 |    } else {                     /* for LSP */
858 |       v->c = (double *) HTS_calloc((m + 1) * (v->stage + 3), sizeof(double));
859 |       v->cc = v->c + m + 1;
860 |       v->cinc = v->cc + m + 1;
861 |       v->d1 = v->cinc + m + 1;
862 |    }
863 | }
864 | 
865 | /* HTS_Vocoder_synthesize: pulse/noise excitation and MLSA/MGLSA filster based waveform synthesis */
866 | void HTS_Vocoder_synthesize(HTS_Vocoder * v, size_t m, double lf0, double *spectrum, size_t nlpf, double *lpf, double alpha, double beta, double volume, double *rawdata, HTS_Audio * audio)
867 | {
868 |    double x;
869 |    int i, j;
870 |    short xs;
871 |    int rawidx = 0;
872 |    double p;
873 | 
874 |    /* lf0 -> pitch */
875 |    if (lf0 == LZERO)
876 |       p = 0.0;
877 |    else if (lf0 <= MIN_LF0)
878 |       p = v->rate / MIN_F0;
879 |    else if (lf0 >= MAX_LF0)
880 |       p = v->rate / MAX_F0;
881 |    else
882 |       p = v->rate / exp(lf0);
883 | 
884 |    /* first time */
885 |    if (v->is_first == TRUE) {
886 |       HTS_Vocoder_initialize_excitation(v, p, nlpf);
887 |       if (v->stage == 0) {      /* for MCP */
888 |          HTS_mc2b(spectrum, v->c, m, alpha);
889 |       } else {                  /* for LSP */
890 |          HTS_movem(spectrum, v->c, m + 1);
891 |          HTS_lsp2mgc(v, v->c, v->c, m, alpha);
892 |          HTS_mc2b(v->c, v->c, m, alpha);
893 |          HTS_gnorm(v->c, v->c, m, v->gamma);
894 |          for (i = 1; i <= m; i++)
895 |             v->c[i] *= v->gamma;
896 |       }
897 |       v->is_first = FALSE;
898 |    }
899 | 
900 |    HTS_Vocoder_start_excitation(v, p);
901 |    if (v->stage == 0) {         /* for MCP */
902 |       HTS_Vocoder_postfilter_mcp(v, spectrum, m, alpha, beta);
903 |       HTS_mc2b(spectrum, v->cc, m, alpha);
904 |       for (i = 0; i <= m; i++)
905 |          v->cinc[i] = (v->cc[i] - v->c[i]) / v->fprd;
906 |    } else {                     /* for LSP */
907 |       HTS_Vocoder_postfilter_lsp(v, spectrum, m, alpha, beta);
908 |       HTS_check_lsp_stability(spectrum, m);
909 |       HTS_lsp2mgc(v, spectrum, v->cc, m, alpha);
910 |       HTS_mc2b(v->cc, v->cc, m, alpha);
911 |       HTS_gnorm(v->cc, v->cc, m, v->gamma);
912 |       for (i = 1; i <= m; i++)
913 |          v->cc[i] *= v->gamma;
914 |       for (i = 0; i <= m; i++)
915 |          v->cinc[i] = (v->cc[i] - v->c[i]) / v->fprd;
916 |    }
917 | 
918 |    for (j = 0; j < v->fprd; j++) {
919 |       x = HTS_Vocoder_get_excitation(v, lpf);
920 |       if (v->stage == 0) {      /* for MCP */
921 |          if (x != 0.0)
922 |             x *= exp(v->c[0]);
923 |          x = HTS_mlsadf(x, v->c, m, alpha, PADEORDER, v->d1);
924 |       } else {                  /* for LSP */
925 |          if (!NGAIN)
926 |             x *= v->c[0];
927 |          x = HTS_mglsadf(x, v->c, m, alpha, v->stage, v->d1);
928 |       }
929 |       x *= volume;
930 | 
931 |       /* output */
932 |       if (rawdata)
933 |          rawdata[rawidx++] = x;
934 |       if (audio) {
935 |          if (x > 32767.0)
936 |             xs = 32767;
937 |          else if (x < -32768.0)
938 |             xs = -32768;
939 |          else
940 |             xs = (short) x;
941 |          HTS_Audio_write(audio, xs);
942 |       }
943 | 
944 |       for (i = 0; i <= m; i++)
945 |          v->c[i] += v->cinc[i];
946 |    }
947 | 
948 |    HTS_Vocoder_end_excitation(v, p);
949 |    HTS_movem(v->cc, v->c, m + 1);
950 | }
951 | 
952 | /* HTS_Vocoder_clear: clear vocoder */
953 | void HTS_Vocoder_clear(HTS_Vocoder * v)
954 | {
955 |    if (v != NULL) {
956 |       /* free buffer */
957 |       if (v->freqt_buff != NULL) {
958 |          HTS_free(v->freqt_buff);
959 |          v->freqt_buff = NULL;
960 |       }
961 |       v->freqt_size = 0;
962 |       if (v->gc2gc_buff != NULL) {
963 |          HTS_free(v->gc2gc_buff);
964 |          v->gc2gc_buff = NULL;
965 |       }
966 |       v->gc2gc_size = 0;
967 |       if (v->lsp2lpc_buff != NULL) {
968 |          HTS_free(v->lsp2lpc_buff);
969 |          v->lsp2lpc_buff = NULL;
970 |       }
971 |       v->lsp2lpc_size = 0;
972 |       if (v->postfilter_buff != NULL) {
973 |          HTS_free(v->postfilter_buff);
974 |          v->postfilter_buff = NULL;
975 |       }
976 |       v->postfilter_size = 0;
977 |       if (v->spectrum2en_buff != NULL) {
978 |          HTS_free(v->spectrum2en_buff);
979 |          v->spectrum2en_buff = NULL;
980 |       }
981 |       v->spectrum2en_size = 0;
982 |       if (v->c != NULL) {
983 |          HTS_free(v->c);
984 |          v->c = NULL;
985 |       }
986 |       v->excite_buff_size = 0;
987 |       v->excite_buff_index = 0;
988 |       if (v->excite_ring_buff != NULL) {
989 |          HTS_free(v->excite_ring_buff);
990 |          v->excite_ring_buff = NULL;
991 |       }
992 |    }
993 | }
994 | 
995 | HTS_VOCODER_C_END;
996 | 
997 | #endif                          /* !HTS_VOCODER_C */
998 | 


--------------------------------------------------------------------------------
/src/lib/Makefile.am:
--------------------------------------------------------------------------------
 1 | 
 2 | EXTRA_DIST = Makefile.mak
 3 | 
 4 | AM_CPPFLAGS = -I @top_srcdir@/include
 5 | 
 6 | lib_LIBRARIES = libHTSEngine.a
 7 | 
 8 | libHTSEngine_a_SOURCES = HTS_audio.c  HTS_engine.c  HTS_hidden.h  HTS_misc.c  \
 9 |                          HTS_pstream.c  HTS_sstream.c  HTS_model.c  HTS_vocoder.c \
10 |                          HTS_gstream.c HTS_label.c
11 | 
12 | DISTCLEANFILES = *.log *.out *~
13 | 
14 | MAINTAINERCLEANFILES = Makefile.in
15 | 


--------------------------------------------------------------------------------
/src/lib/Makefile.mak:
--------------------------------------------------------------------------------
 1 | 
 2 | CC = cl
 3 | 
 4 | CFLAGS = /O2 /Ob2 /Oi /Ot /Oy /GT /GL /TC /I ..\include
 5 | LFLAGS = /LTCG
 6 | 
 7 | CORES = HTS_audio.obj HTS_engine.obj HTS_gstream.obj HTS_label.obj HTS_misc.obj HTS_model.obj HTS_pstream.obj HTS_sstream.obj HTS_vocoder.obj
 8 | 
 9 | all: hts_engine_API.lib
10 | 
11 | hts_engine_API.lib: $(CORES)
12 | 	lib $(LFLAGS) /OUT:$@ $(CORES)
13 | 
14 | .c.obj:
15 | 	$(CC) $(CFLAGS) /c $<
16 | 
17 | clean:
18 | 	del *.lib
19 | 	del *.obj
20 | 


--------------------------------------------------------------------------------